fix: support alter table ~ add ~ custom_type (#5165 )

ci: use 4xlarge for nightly build (#5158 )
fix: loki write row len error (#5161 )
2026-01-03 20:02:54 +00:00 · 2024-12-15 09:05:29 +00:00 · 2024-12-13 12:53:11 +00:00 · 2024-12-13 10:10:59 +00:00 · 2024-12-13 08:17:49 +00:00 · 2024-12-13 07:34:24 +00:00
132 changed files with 5046 additions and 2602 deletions
--- a/.github/cargo-blacklist.txt
+++ b/.github/cargo-blacklist.txt
@@ -1,2 +1,3 @@
 native-tls
 openssl
+aws-lc-sys
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -269,13 +269,6 @@ jobs:
      - name: Install cargo-gc-bin
        shell: bash
        run: cargo install cargo-gc-bin
-      - name: Check aws-lc-sys will not build
-        shell: bash
-        run: |
-             if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
-               echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
-               exit 1
-             fi
      - name: Build greptime bianry
        shell: bash
        # `cargo gc` will invoke `cargo build` with specified args
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -12,7 +12,7 @@ on:
      linux_amd64_runner:
        type: choice
        description: The runner uses to build linux-amd64 artifacts
-        default: ec2-c6i.2xlarge-amd64
+        default: ec2-c6i.4xlarge-amd64
        options:
          - ubuntu-20.04
          - ubuntu-20.04-8-cores
@@ -27,7 +27,7 @@ on:
      linux_arm64_runner:
        type: choice
        description: The runner uses to build linux-arm64 artifacts
-        default: ec2-c6g.2xlarge-arm64
+        default: ec2-c6g.4xlarge-arm64
        options:
          - ec2-c6g.xlarge-arm64 # 4C8G
          - ec2-c6g.2xlarge-arm64 # 8C16G
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -91,7 +91,7 @@ env:
  # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
  NIGHTLY_RELEASE_PREFIX: nightly
  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
-  NEXT_RELEASE_VERSION: v0.11.0
+  NEXT_RELEASE_VERSION: v0.12.0

 # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
 permissions:
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"

 [[package]]
 name = "api"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-base",
 "common-decimal",
@@ -749,7 +749,7 @@ dependencies = [

 [[package]]
 name = "auth"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -798,21 +798,6 @@ dependencies = [
 "cc",
 ]

-[[package]]
-name = "aws-lc-sys"
-version = "0.21.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3ddc4a5b231dd6958b140ff3151b6412b3f4321fab354f399eec8f14b06df62"
-dependencies = [
- "bindgen 0.69.4",
- "cc",
- "cmake",
- "dunce",
- "fs_extra",
- "libc",
- "paste",
-]
-
 [[package]]
 name = "axum"
 version = "0.6.20"
@@ -993,29 +978,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "bindgen"
-version = "0.69.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
-dependencies = [
- "bitflags 2.6.0",
- "cexpr",
- "clang-sys",
- "itertools 0.12.1",
- "lazy_static",
- "lazycell",
- "log",
- "prettyplease",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash 1.1.0",
- "shlex",
- "syn 2.0.90",
- "which",
-]
-
 [[package]]
 name = "bindgen"
 version = "0.70.1"
@@ -1340,7 +1302,7 @@ dependencies = [

 [[package]]
 name = "cache"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "catalog",
 "common-error",
@@ -1348,7 +1310,7 @@ dependencies = [
 "common-meta",
 "moka",
 "snafu 0.8.5",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 ]

 [[package]]
@@ -1375,7 +1337,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

 [[package]]
 name = "catalog"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arrow",
@@ -1714,7 +1676,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"

 [[package]]
 name = "cli"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "auth",
@@ -1758,7 +1720,7 @@ dependencies = [
 "session",
 "snafu 0.8.5",
 "store-api",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "temp-env",
 "tempfile",
@@ -1768,7 +1730,7 @@ dependencies = [

 [[package]]
 name = "client"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arc-swap",
@@ -1797,7 +1759,7 @@ dependencies = [
 "rand",
 "serde_json",
 "snafu 0.8.5",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "substrait 0.37.3",
 "tokio",
 "tokio-stream",
@@ -1838,7 +1800,7 @@ dependencies = [

 [[package]]
 name = "cmd"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "auth",
@@ -1898,7 +1860,7 @@ dependencies = [
 "similar-asserts",
 "snafu 0.8.5",
 "store-api",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "temp-env",
 "tempfile",
@@ -1944,7 +1906,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"

 [[package]]
 name = "common-base"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "anymap2",
 "async-trait",
@@ -1965,7 +1927,7 @@ dependencies = [

 [[package]]
 name = "common-catalog"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "chrono",
 "common-error",
@@ -1976,7 +1938,7 @@ dependencies = [

 [[package]]
 name = "common-config"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-base",
 "common-error",
@@ -1999,7 +1961,7 @@ dependencies = [

 [[package]]
 name = "common-datasource"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arrow",
 "arrow-schema",
@@ -2036,7 +1998,7 @@ dependencies = [

 [[package]]
 name = "common-decimal"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "bigdecimal 0.4.5",
 "common-error",
@@ -2049,7 +2011,7 @@ dependencies = [

 [[package]]
 name = "common-error"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "snafu 0.8.5",
 "strum 0.25.0",
@@ -2058,7 +2020,7 @@ dependencies = [

 [[package]]
 name = "common-frontend"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -2073,7 +2035,7 @@ dependencies = [

 [[package]]
 name = "common-function"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "approx 0.5.1",
@@ -2118,7 +2080,7 @@ dependencies = [

 [[package]]
 name = "common-greptimedb-telemetry"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "common-runtime",
@@ -2135,7 +2097,7 @@ dependencies = [

 [[package]]
 name = "common-grpc"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arrow-flight",
@@ -2161,7 +2123,7 @@ dependencies = [

 [[package]]
 name = "common-grpc-expr"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "common-base",
@@ -2180,7 +2142,7 @@ dependencies = [

 [[package]]
 name = "common-macro"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arc-swap",
 "common-query",
@@ -2194,7 +2156,7 @@ dependencies = [

 [[package]]
 name = "common-mem-prof"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-error",
 "common-macro",
@@ -2207,7 +2169,7 @@ dependencies = [

 [[package]]
 name = "common-meta"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "anymap2",
 "api",
@@ -2264,7 +2226,7 @@ dependencies = [

 [[package]]
 name = "common-options"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-grpc",
 "humantime-serde",
@@ -2273,11 +2235,11 @@ dependencies = [

 [[package]]
 name = "common-plugins"
-version = "0.11.0"
+version = "0.12.0"

 [[package]]
 name = "common-pprof"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-error",
 "common-macro",
@@ -2289,7 +2251,7 @@ dependencies = [

 [[package]]
 name = "common-procedure"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -2316,7 +2278,7 @@ dependencies = [

 [[package]]
 name = "common-procedure-test"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "common-procedure",
@@ -2324,7 +2286,7 @@ dependencies = [

 [[package]]
 name = "common-query"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -2350,7 +2312,7 @@ dependencies = [

 [[package]]
 name = "common-recordbatch"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arc-swap",
 "common-error",
@@ -2369,7 +2331,7 @@ dependencies = [

 [[package]]
 name = "common-runtime"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "clap 4.5.19",
@@ -2399,7 +2361,7 @@ dependencies = [

 [[package]]
 name = "common-telemetry"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "atty",
 "backtrace",
@@ -2427,7 +2389,7 @@ dependencies = [

 [[package]]
 name = "common-test-util"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "client",
 "common-query",
@@ -2439,7 +2401,7 @@ dependencies = [

 [[package]]
 name = "common-time"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arrow",
 "chrono",
@@ -2457,7 +2419,7 @@ dependencies = [

 [[package]]
 name = "common-version"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "build-data",
 "const_format",
@@ -2467,7 +2429,7 @@ dependencies = [

 [[package]]
 name = "common-wal"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "common-base",
 "common-error",
@@ -3276,7 +3238,7 @@ dependencies = [

 [[package]]
 name = "datanode"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arrow-flight",
@@ -3327,7 +3289,7 @@ dependencies = [
 "session",
 "snafu 0.8.5",
 "store-api",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "tokio",
 "toml 0.8.19",
@@ -3336,7 +3298,7 @@ dependencies = [

 [[package]]
 name = "datatypes"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -3719,12 +3681,6 @@ version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"

-[[package]]
-name = "dunce"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
-
 [[package]]
 name = "duration-str"
 version = "0.11.2"
@@ -3954,7 +3910,7 @@ dependencies = [

 [[package]]
 name = "file-engine"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -4071,7 +4027,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"

 [[package]]
 name = "flow"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arrow",
@@ -4128,7 +4084,7 @@ dependencies = [
 "snafu 0.8.5",
 "store-api",
 "strum 0.25.0",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "tokio",
 "tonic 0.11.0",
@@ -4175,7 +4131,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"

 [[package]]
 name = "frontend"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arc-swap",
@@ -4314,12 +4270,6 @@ dependencies = [
 "windows-sys 0.52.0",
 ]

-[[package]]
-name = "fs_extra"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
-
 [[package]]
 name = "fsevent-sys"
 version = "4.1.0"
@@ -5315,7 +5265,7 @@ dependencies = [

 [[package]]
 name = "index"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "asynchronous-codec",
@@ -5955,12 +5905,6 @@ dependencies = [
 "spin 0.9.8",
 ]

-[[package]]
-name = "lazycell"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
-
 [[package]]
 name = "levenshtein_automata"
 version = "0.2.1"
@@ -6150,7 +6094,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"

 [[package]]
 name = "log-query"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "chrono",
 "common-error",
@@ -6161,7 +6105,7 @@ dependencies = [

 [[package]]
 name = "log-store"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -6482,7 +6426,7 @@ dependencies = [

 [[package]]
 name = "meta-client"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -6509,7 +6453,7 @@ dependencies = [

 [[package]]
 name = "meta-srv"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -6588,7 +6532,7 @@ dependencies = [

 [[package]]
 name = "metric-engine"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "aquamarine",
@@ -6692,13 +6636,14 @@ dependencies = [

 [[package]]
 name = "mito2"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "aquamarine",
 "async-channel 1.9.0",
 "async-stream",
 "async-trait",
+ "bytemuck",
 "bytes",
 "common-base",
 "common-config",
@@ -6966,7 +6911,7 @@ checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef"
 dependencies = [
 "base64 0.21.7",
 "bigdecimal 0.4.5",
- "bindgen 0.70.1",
+ "bindgen",
 "bitflags 2.6.0",
 "bitvec",
 "btoi",
@@ -7005,7 +6950,7 @@ checksum = "478b0ff3f7d67b79da2b96f56f334431aef65e15ba4b29dd74a4236e29582bdc"
 dependencies = [
 "base64 0.21.7",
 "bigdecimal 0.4.5",
- "bindgen 0.70.1",
+ "bindgen",
 "bitflags 2.6.0",
 "bitvec",
 "btoi",
@@ -7404,7 +7349,7 @@ dependencies = [

 [[package]]
 name = "object-store"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "anyhow",
 "bytes",
@@ -7657,7 +7602,7 @@ dependencies = [

 [[package]]
 name = "operator"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "ahash 0.8.11",
 "api",
@@ -7705,7 +7650,7 @@ dependencies = [
 "sql",
 "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
 "store-api",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "tokio",
 "tokio-util",
@@ -7955,7 +7900,7 @@ dependencies = [

 [[package]]
 name = "partition"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -8241,7 +8186,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"

 [[package]]
 name = "pipeline"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "ahash 0.8.11",
 "api",
@@ -8404,7 +8349,7 @@ dependencies = [

 [[package]]
 name = "plugins"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "auth",
 "clap 4.5.19",
@@ -8681,7 +8626,7 @@ dependencies = [

 [[package]]
 name = "promql"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "ahash 0.8.11",
 "async-trait",
@@ -8919,7 +8864,7 @@ dependencies = [

 [[package]]
 name = "puffin"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-compression 0.4.13",
 "async-trait",
@@ -8939,6 +8884,7 @@ dependencies = [
 "lz4_flex 0.11.3",
 "moka",
 "pin-project",
+ "prometheus",
 "serde",
 "serde_json",
 "sha2",
@@ -9043,7 +8989,7 @@ dependencies = [

 [[package]]
 name = "query"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "ahash 0.8.11",
 "api",
@@ -9110,7 +9056,7 @@ dependencies = [
 "stats-cli",
 "store-api",
 "streaming-stats",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "tokio",
 "tokio-stream",
@@ -10572,7 +10518,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"

 [[package]]
 name = "script"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arc-swap",
@@ -10866,7 +10812,7 @@ dependencies = [

 [[package]]
 name = "servers"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "ahash 0.8.11",
 "api",
@@ -10876,7 +10822,6 @@ dependencies = [
 "arrow-schema",
 "async-trait",
 "auth",
- "aws-lc-sys",
 "axum",
 "axum-macros",
 "base64 0.21.7",
@@ -10979,7 +10924,7 @@ dependencies = [

 [[package]]
 name = "session"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arc-swap",
@@ -11327,7 +11272,7 @@ dependencies = [

 [[package]]
 name = "sql"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "chrono",
@@ -11352,6 +11297,7 @@ dependencies = [
 "jsonb",
 "lazy_static",
 "regex",
+ "serde",
 "serde_json",
 "snafu 0.8.5",
 "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
@@ -11390,7 +11336,7 @@ dependencies = [

 [[package]]
 name = "sqlness-runner"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "clap 4.5.19",
@@ -11428,6 +11374,7 @@ dependencies = [
 "lazy_static",
 "log",
 "regex",
+ "serde",
 "sqlparser 0.45.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
 ]
@@ -11610,7 +11557,7 @@ dependencies = [

 [[package]]
 name = "store-api"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "aquamarine",
@@ -11781,7 +11728,7 @@ dependencies = [

 [[package]]
 name = "substrait"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "async-trait",
 "bytes",
@@ -11980,7 +11927,7 @@ dependencies = [

 [[package]]
 name = "table"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "async-trait",
@@ -12246,11 +12193,12 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"

 [[package]]
 name = "tests-fuzz"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "arbitrary",
 "async-trait",
 "chrono",
+ "common-base",
 "common-error",
 "common-macro",
 "common-query",
@@ -12288,7 +12236,7 @@ dependencies = [

 [[package]]
 name = "tests-integration"
-version = "0.11.0"
+version = "0.12.0"
 dependencies = [
 "api",
 "arrow-flight",
@@ -12352,7 +12300,7 @@ dependencies = [
 "sql",
 "sqlx",
 "store-api",
- "substrait 0.11.0",
+ "substrait 0.12.0",
 "table",
 "tempfile",
 "time",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -68,7 +68,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.11.0"
+version = "0.12.0"
 edition = "2021"
 license = "Apache-2.0"

@@ -180,6 +180,7 @@ sysinfo = "0.30"
 # on branch v0.44.x
 sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
    "visitor",
+    "serde",
 ] }
 strum = { version = "0.25", features = ["derive"] }
 tempfile = "3"
--- a/config/config.md
+++ b/config/config.md
@@ -13,11 +13,11 @@
 | Key | Type | Default | Descriptions |
 | --- | -----| ------- | ----------- |
 | `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
-| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
 | `default_timezone` | String | Unset | The default timezone of the server. |
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
 | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -61,9 +61,9 @@
 | `wal` | -- | -- | The WAL options. |
 | `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
 | `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -150,6 +150,7 @@
 | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
 | `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
 | `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
+| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
 | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
 | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
 | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
@@ -286,12 +287,12 @@
 | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
 | `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
 | `store_addrs` | Array | -- | Store server address default to etcd store. |
+| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
+| `backend` | String | `EtcdStore` | The datastore for meta server. |
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
-| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
-| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
-| `backend` | String | `EtcdStore` | The datastore for meta server. |
+| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -356,7 +357,6 @@
 | `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
 | `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
-| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
 | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
 | `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
@@ -364,6 +364,7 @@
 | `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
 | `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
 | `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
 | `http` | -- | -- | The HTTP server options. |
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
@@ -398,9 +399,9 @@
 | `wal` | -- | -- | The WAL options. |
 | `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
 | `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -475,6 +476,9 @@
 | `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically (default)<br/>- `disable`: never |
 | `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.<br/>- `auto`: automatically determine the threshold based on the system memory size (default)<br/>- `unlimited`: no memory limit<br/>- `[size]` e.g. `64MB`: fixed memory threshold |
 | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
+| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
+| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
+| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
 | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
 | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
 | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -13,9 +13,6 @@ require_lease_before_startup = false
 ## By default, it provides services after all regions have been initialized.
 init_regions_in_background = false

-## Enable telemetry to collect anonymous usage data.
-enable_telemetry = true
-
 ## Parallelism of initializing regions.
 init_regions_parallelism = 16

@@ -42,6 +39,8 @@ rpc_max_recv_message_size = "512MB"
 ## @toml2docs:none-default
 rpc_max_send_message_size = "512MB"

+## Enable telemetry to collect anonymous usage data. Enabled by default.
+#+ enable_telemetry = true

 ## The HTTP server options.
 [http]
@@ -143,15 +142,15 @@ dir = "/tmp/greptimedb/wal"

 ## The size of the WAL segment file.
 ## **It's only used when the provider is `raft_engine`**.
-file_size = "256MB"
+file_size = "128MB"

 ## The threshold of the WAL size to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
-purge_threshold = "4GB"
+purge_threshold = "1GB"

 ## The interval to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
-purge_interval = "10m"
+purge_interval = "1m"

 ## The read batch size.
 ## **It's only used when the provider is `raft_engine`**.
@@ -544,6 +543,15 @@ mem_threshold_on_create = "auto"
 ## Deprecated, use `region_engine.mito.index.aux_path` instead.
 intermediate_path = ""

+## Cache size for inverted index metadata.
+metadata_cache_size = "64MiB"
+
+## Cache size for inverted index content.
+content_cache_size = "128MiB"
+
+## Page size for inverted index content cache.
+content_cache_page_size = "8MiB"
+
 ## The options for full-text index in Mito engine.
 [region_engine.mito.fulltext_index]

--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -10,6 +10,12 @@ server_addr = "127.0.0.1:3002"
 ## Store server address default to etcd store.
 store_addrs = ["127.0.0.1:2379"]

+## If it's not empty, the metasrv will store all data with this key prefix.
+store_key_prefix = ""
+
+## The datastore for meta server.
+backend = "EtcdStore"
+
 ## Datanode selector type.
 ## - `round_robin` (default value)
 ## - `lease_based`
@@ -20,20 +26,14 @@ selector = "round_robin"
 ## Store data in memory.
 use_memory_store = false

-## Whether to enable greptimedb telemetry.
-enable_telemetry = true
-
-## If it's not empty, the metasrv will store all data with this key prefix.
-store_key_prefix = ""
-
 ## Whether to enable region failover.
 ## This feature is only available on GreptimeDB running on cluster mode and
 ## - Using Remote WAL
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

-## The datastore for meta server.
-backend = "EtcdStore"
+## Whether to enable greptimedb telemetry. Enabled by default.
+#+ enable_telemetry = true

 ## The runtime options.
 #+ [runtime]
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -1,9 +1,6 @@
 ## The running mode of the datanode. It can be `standalone` or `distributed`.
 mode = "standalone"

-## Enable telemetry to collect anonymous usage data.
-enable_telemetry = true
-
 ## The default timezone of the server.
 ## @toml2docs:none-default
 default_timezone = "UTC"
@@ -18,6 +15,9 @@ init_regions_parallelism = 16
 ## The maximum current queries allowed to be executed. Zero means unlimited.
 max_concurrent_queries = 0

+## Enable telemetry to collect anonymous usage data. Enabled by default.
+#+ enable_telemetry = true
+
 ## The runtime options.
 #+ [runtime]
 ## The number of threads to execute the runtime for global read operations.
@@ -147,15 +147,15 @@ dir = "/tmp/greptimedb/wal"

 ## The size of the WAL segment file.
 ## **It's only used when the provider is `raft_engine`**.
-file_size = "256MB"
+file_size = "128MB"

 ## The threshold of the WAL size to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
-purge_threshold = "4GB"
+purge_threshold = "1GB"

 ## The interval to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
-purge_interval = "10m"
+purge_interval = "1m"

 ## The read batch size.
 ## **It's only used when the provider is `raft_engine`**.
@@ -588,6 +588,9 @@ metadata_cache_size = "64MiB"
 ## Cache size for inverted index content.
 content_cache_size = "128MiB"

+## Page size for inverted index content cache.
+content_cache_page_size = "8MiB"
+
 ## The options for full-text index in Mito engine.
 [region_engine.mito.fulltext_index]

--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
--- a/src/common/base/src/range_read.rs
+++ b/src/common/base/src/range_read.rs
@@ -36,6 +36,11 @@ pub struct Metadata {
 /// `RangeReader` reads a range of bytes from a source.
 #[async_trait]
 pub trait RangeReader: Send + Unpin {
+    /// Sets the file size hint for the reader.
+    ///
+    /// It's used to optimize the reading process by reducing the number of remote requests.
+    fn with_file_size_hint(&mut self, file_size_hint: u64);
+
    /// Returns the metadata of the source.
    async fn metadata(&mut self) -> io::Result<Metadata>;

@@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin {

 #[async_trait]
 impl<R: ?Sized + RangeReader> RangeReader for &mut R {
+    fn with_file_size_hint(&mut self, file_size_hint: u64) {
+        (*self).with_file_size_hint(file_size_hint)
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
        (*self).metadata().await
    }
@@ -186,15 +195,17 @@ impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {

 #[async_trait]
 impl RangeReader for Vec<u8> {
+    fn with_file_size_hint(&mut self, _file_size_hint: u64) {
+        // do nothing
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
        Ok(Metadata {
            content_length: self.len() as u64,
        })
    }

-    async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
-        range.end = range.end.min(self.len() as u64);
-
+    async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
        let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
        Ok(bytes)
    }
@@ -222,6 +233,10 @@ impl FileReader {

 #[async_trait]
 impl RangeReader for FileReader {
+    fn with_file_size_hint(&mut self, _file_size_hint: u64) {
+        // do nothing
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
        Ok(Metadata {
            content_length: self.content_length,
--- a/src/common/base/src/readable_size.rs
+++ b/src/common/base/src/readable_size.rs
@@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
 pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
 pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;

-#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
+#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)]
 pub struct ReadableSize(pub u64);

 impl ReadableSize {
--- a/src/common/function/src/scalars/vector.rs
+++ b/src/common/function/src/scalars/vector.rs
@@ -15,6 +15,7 @@
 mod convert;
 mod distance;
 pub(crate) mod impl_conv;
+mod scalar_add;

 use std::sync::Arc;

@@ -32,5 +33,8 @@ impl VectorFunction {
        registry.register(Arc::new(distance::CosDistanceFunction));
        registry.register(Arc::new(distance::DotProductFunction));
        registry.register(Arc::new(distance::L2SqDistanceFunction));
+
+        // scalar calculation
+        registry.register(Arc::new(scalar_add::ScalarAddFunction));
    }
 }
--- a/src/common/function/src/scalars/vector/impl_conv.rs
+++ b/src/common/function/src/scalars/vector/impl_conv.rs
@@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result<Vec<f32>> {
        })
 }

-#[allow(unused)]
 /// Convert a vector literal to a binary literal.
 pub fn veclit_to_binlit(vec: &[f32]) -> Vec<u8> {
    if cfg!(target_endian = "little") {
--- a/src/common/function/src/scalars/vector/scalar_add.rs
+++ b/src/common/function/src/scalars/vector/scalar_add.rs
@@ -0,0 +1,173 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::borrow::Cow;
+use std::fmt::Display;
+
+use common_query::error::{InvalidFuncArgsSnafu, Result};
+use common_query::prelude::Signature;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
+use nalgebra::DVectorView;
+use snafu::ensure;
+
+use crate::function::{Function, FunctionContext};
+use crate::helper;
+use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
+
+const NAME: &str = "vec_scalar_add";
+
+/// Adds a scalar to each element of a vector.
+///
+/// # Example
+///
+/// ```sql
+/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result;
+///
+/// +---------+
+/// | result  |
+/// +---------+
+/// | [2,3,4] |
+/// +---------+
+///
+/// -- Negative scalar to simulate subtraction
+/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result;
+///
+/// +---------+
+/// | result  |
+/// +---------+
+/// | [0,1,2] |
+/// +---------+
+/// ```
+#[derive(Debug, Clone, Default)]
+pub struct ScalarAddFunction;
+
+impl Function for ScalarAddFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::binary_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        helper::one_of_sigs2(
+            vec![ConcreteDataType::float64_datatype()],
+            vec![
+                ConcreteDataType::string_datatype(),
+                ConcreteDataType::binary_datatype(),
+            ],
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure!(
+            columns.len() == 2,
+            InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect exactly two, have: {}",
+                    columns.len()
+                ),
+            }
+        );
+        let arg0 = &columns[0];
+        let arg1 = &columns[1];
+
+        let len = arg0.len();
+        let mut result = BinaryVectorBuilder::with_capacity(len);
+        if len == 0 {
+            return Ok(result.to_vector());
+        }
+
+        let arg1_const = as_veclit_if_const(arg1)?;
+
+        for i in 0..len {
+            let arg0 = arg0.get(i).as_f64_lossy();
+            let Some(arg0) = arg0 else {
+                result.push_null();
+                continue;
+            };
+
+            let arg1 = match arg1_const.as_ref() {
+                Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
+                None => as_veclit(arg1.get_ref(i))?,
+            };
+            let Some(arg1) = arg1 else {
+                result.push_null();
+                continue;
+            };
+
+            let vec = DVectorView::from_slice(&arg1, arg1.len());
+            let vec_res = vec.add_scalar(arg0 as _);
+
+            let veclit = vec_res.as_slice();
+            let binlit = veclit_to_binlit(veclit);
+            result.push(Some(&binlit));
+        }
+
+        Ok(result.to_vector())
+    }
+}
+
+impl Display for ScalarAddFunction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datatypes::vectors::{Float32Vector, StringVector};
+
+    use super::*;
+
+    #[test]
+    fn test_scalar_add() {
+        let func = ScalarAddFunction;
+
+        let input0 = Arc::new(Float32Vector::from(vec![
+            Some(1.0),
+            Some(-1.0),
+            None,
+            Some(3.0),
+        ]));
+        let input1 = Arc::new(StringVector::from(vec![
+            Some("[1.0,2.0,3.0]".to_string()),
+            Some("[4.0,5.0,6.0]".to_string()),
+            Some("[7.0,8.0,9.0]".to_string()),
+            None,
+        ]));
+
+        let result = func
+            .eval(FunctionContext::default(), &[input0, input1])
+            .unwrap();
+
+        let result = result.as_ref();
+        assert_eq!(result.len(), 4);
+        assert_eq!(
+            result.get_ref(0).as_binary().unwrap(),
+            Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
+        );
+        assert_eq!(
+            result.get_ref(1).as_binary().unwrap(),
+            Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice())
+        );
+        assert!(result.get_ref(2).is_null());
+        assert!(result.get_ref(3).is_null());
+    }
+}
--- a/src/common/wal/src/config/raft_engine.rs
+++ b/src/common/wal/src/config/raft_engine.rs
@@ -49,9 +49,9 @@ impl Default for RaftEngineConfig {
    fn default() -> Self {
        Self {
            dir: None,
-            file_size: ReadableSize::mb(256),
-            purge_threshold: ReadableSize::gb(4),
-            purge_interval: Duration::from_secs(600),
+            file_size: ReadableSize::mb(128),
+            purge_threshold: ReadableSize::gb(1),
+            purge_interval: Duration::from_secs(60),
            read_batch_size: 128,
            sync_write: false,
            enable_log_recycle: true,
--- a/src/frontend/src/instance/log_handler.rs
+++ b/src/frontend/src/instance/log_handler.rs
@@ -19,14 +19,16 @@ use async_trait::async_trait;
 use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
 use client::Output;
 use common_error::ext::BoxedError;
+use pipeline::pipeline_operator::PipelineOperator;
 use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion};
 use servers::error::{
    AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
 };
 use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef};
 use servers::query_handler::PipelineHandler;
-use session::context::QueryContextRef;
+use session::context::{QueryContext, QueryContextRef};
 use snafu::ResultExt;
+use table::Table;

 use crate::instance::Instance;

@@ -84,6 +86,22 @@ impl PipelineHandler for Instance {
            .await
            .context(PipelineSnafu)
    }
+
+    async fn get_table(
+        &self,
+        table: &str,
+        query_ctx: &QueryContext,
+    ) -> std::result::Result<Option<Arc<Table>>, catalog::error::Error> {
+        let catalog = query_ctx.current_catalog();
+        let schema = query_ctx.current_schema();
+        self.catalog_manager
+            .table(catalog, &schema, table, None)
+            .await
+    }
+
+    fn build_pipeline(&self, pipeline: &str) -> ServerResult<Pipeline<GreptimeTransformer>> {
+        PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu)
+    }
 }

 impl Instance {
--- a/src/index/src/inverted_index/error.rs
+++ b/src/index/src/inverted_index/error.rs
@@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate;
 #[snafu(visibility(pub))]
 #[stack_trace_debug]
 pub enum Error {
-    #[snafu(display("Failed to seek"))]
-    Seek {
-        #[snafu(source)]
-        error: IoError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to read"))]
    Read {
        #[snafu(source)]
@@ -76,6 +68,18 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Blob size too small"))]
+    BlobSizeTooSmall {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Invalid footer payload size"))]
+    InvalidFooterPayloadSize {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))]
    UnexpectedFooterPayloadSize {
        max_payload_size: u64,
@@ -215,8 +219,7 @@ impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        use Error::*;
        match self {
-            Seek { .. }
-            | Read { .. }
+            Read { .. }
            | Write { .. }
            | Flush { .. }
            | Close { .. }
@@ -229,7 +232,9 @@ impl ErrorExt for Error {
            | KeysApplierUnexpectedPredicates { .. }
            | CommonIo { .. }
            | UnknownIntermediateCodecMagic { .. }
-            | FstCompile { .. } => StatusCode::Unexpected,
+            | FstCompile { .. }
+            | InvalidFooterPayloadSize { .. }
+            | BlobSizeTooSmall { .. } => StatusCode::Unexpected,

            ParseRegex { .. }
            | ParseDFA { .. }
--- a/src/index/src/inverted_index/format/reader.rs
+++ b/src/index/src/inverted_index/format/reader.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::ops::Range;
 use std::sync::Arc;

 use async_trait::async_trait;
@@ -30,23 +31,23 @@ mod footer;
 #[mockall::automock]
 #[async_trait]
 pub trait InvertedIndexReader: Send {
-    /// Reads all data to dest.
-    async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;
-
    /// Seeks to given offset and reads data with exact size as provided.
-    async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
+    async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
+
+    /// Reads the bytes in the given ranges.
+    async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>>;

    /// Retrieves metadata of all inverted indices stored within the blob.
    async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;

    /// Retrieves the finite state transducer (FST) map from the given offset and size.
    async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
-        let fst_data = self.seek_read(offset, size).await?;
+        let fst_data = self.range_read(offset, size).await?;
        FstMap::new(fst_data).context(DecodeFstSnafu)
    }

    /// Retrieves the bitmap from the given offset and size.
    async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
-        self.seek_read(offset, size).await.map(BitVec::from_vec)
+        self.range_read(offset, size).await.map(BitVec::from_vec)
    }
 }
--- a/src/index/src/inverted_index/format/reader/blob.rs
+++ b/src/index/src/inverted_index/format/reader/blob.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::ops::Range;
 use std::sync::Arc;

 use async_trait::async_trait;
@@ -19,8 +20,9 @@ use common_base::range_read::RangeReader;
 use greptime_proto::v1::index::InvertedIndexMetas;
 use snafu::{ensure, ResultExt};

+use super::footer::DEFAULT_PREFETCH_SIZE;
 use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
-use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
+use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader;
 use crate::inverted_index::format::reader::InvertedIndexReader;
 use crate::inverted_index::format::MIN_BLOB_SIZE;

@@ -49,16 +51,7 @@ impl<R> InvertedIndexBlobReader<R> {

 #[async_trait]
 impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
-    async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
-        let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
-        self.source
-            .read_into(0..metadata.content_length, dest)
-            .await
-            .context(CommonIoSnafu)?;
-        Ok(metadata.content_length as usize)
-    }
-
-    async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
+    async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
        let buf = self
            .source
            .read(offset..offset + size as u64)
@@ -67,12 +60,18 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
        Ok(buf.into())
    }

+    async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>> {
+        let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
+        Ok(bufs.into_iter().map(|buf| buf.into()).collect())
+    }
+
    async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
        let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
        let blob_size = metadata.content_length;
        Self::validate_blob_size(blob_size)?;

-        let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
+        let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size)
+            .with_prefetch_size(DEFAULT_PREFETCH_SIZE);
        footer_reader.metadata().await.map(Arc::new)
    }
 }
--- a/src/index/src/inverted_index/format/reader/footer.rs
+++ b/src/index/src/inverted_index/format/reader/footer.rs
@@ -18,53 +18,88 @@ use prost::Message;
 use snafu::{ensure, ResultExt};

 use crate::inverted_index::error::{
-    CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
-    UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
+    BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result,
+    UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu,
+    UnexpectedZeroSegmentRowCountSnafu,
 };
 use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;

-/// InvertedIndeFooterReader is for reading the footer section of the blob.
-pub struct InvertedIndeFooterReader<R> {
+pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
+
+/// InvertedIndexFooterReader is for reading the footer section of the blob.
+pub struct InvertedIndexFooterReader<R> {
    source: R,
    blob_size: u64,
+    prefetch_size: Option<u64>,
 }

-impl<R> InvertedIndeFooterReader<R> {
+impl<R> InvertedIndexFooterReader<R> {
    pub fn new(source: R, blob_size: u64) -> Self {
-        Self { source, blob_size }
+        Self {
+            source,
+            blob_size,
+            prefetch_size: None,
+        }
+    }
+
+    /// Set the prefetch size for the footer reader.
+    pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self {
+        self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE));
+        self
+    }
+
+    pub fn prefetch_size(&self) -> u64 {
+        self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE)
    }
 }

-impl<R: RangeReader> InvertedIndeFooterReader<R> {
+impl<R: RangeReader> InvertedIndexFooterReader<R> {
    pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
-        let payload_size = self.read_payload_size().await?;
-        let metas = self.read_payload(payload_size).await?;
-        Ok(metas)
-    }
+        ensure!(
+            self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE,
+            BlobSizeTooSmallSnafu
+        );

-    async fn read_payload_size(&mut self) -> Result<u64> {
-        let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
-        let end = self.blob_size;
-        let start = end - FOOTER_PAYLOAD_SIZE_SIZE;
-        self.source
-            .read_into(start..end, &mut &mut size_buf[..])
+        let footer_start = self.blob_size.saturating_sub(self.prefetch_size());
+        let suffix = self
+            .source
+            .read(footer_start..self.blob_size)
            .await
            .context(CommonIoSnafu)?;
+        let suffix_len = suffix.len();
+        let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64;
+        self.validate_payload_size(length)?;

-        let payload_size = u32::from_le_bytes(size_buf) as u64;
-        self.validate_payload_size(payload_size)?;
+        let footer_size = FOOTER_PAYLOAD_SIZE_SIZE;

-        Ok(payload_size)
+        // Did not fetch the entire file metadata in the initial read, need to make a second request.
+        if length > suffix_len as u64 - footer_size {
+            let metadata_start = self.blob_size - length - footer_size;
+            let meta = self
+                .source
+                .read(metadata_start..self.blob_size - footer_size)
+                .await
+                .context(CommonIoSnafu)?;
+            self.parse_payload(&meta, length)
+        } else {
+            let metadata_start = self.blob_size - length - footer_size - footer_start;
+            let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
+            self.parse_payload(meta, length)
+        }
    }

-    async fn read_payload(&mut self, payload_size: u64) -> Result<InvertedIndexMetas> {
-        let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE;
-        let start = end - payload_size;
-        let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?;
+    fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
+        let suffix_len = suffix.len();
+        ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu);
+        let mut bytes = [0; 4];
+        bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);

-        let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?;
+        Ok(bytes)
+    }
+
+    fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result<InvertedIndexMetas> {
+        let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?;
        self.validate_metas(&metas, payload_size)?;
-
        Ok(metas)
    }

@@ -113,9 +148,12 @@ impl<R: RangeReader> InvertedIndeFooterReader<R> {

 #[cfg(test)]
 mod tests {
+    use std::assert_matches::assert_matches;
+
    use prost::Message;

    use super::*;
+    use crate::inverted_index::error::Error;

    fn create_test_payload(meta: InvertedIndexMeta) -> Vec<u8> {
        let mut metas = InvertedIndexMetas {
@@ -141,14 +179,18 @@ mod tests {

        let mut payload_buf = create_test_payload(meta);
        let blob_size = payload_buf.len() as u64;
-        let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);

-        let payload_size = reader.read_payload_size().await.unwrap();
-        let metas = reader.read_payload(payload_size).await.unwrap();
+        for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
+            let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
+            if prefetch > 0 {
+                reader = reader.with_prefetch_size(prefetch);
+            }

-        assert_eq!(metas.metas.len(), 1);
-        let index_meta = &metas.metas.get("test").unwrap();
-        assert_eq!(index_meta.name, "test");
+            let metas = reader.metadata().await.unwrap();
+            assert_eq!(metas.metas.len(), 1);
+            let index_meta = &metas.metas.get("test").unwrap();
+            assert_eq!(index_meta.name, "test");
+        }
    }

    #[tokio::test]
@@ -157,14 +199,20 @@ mod tests {
            name: "test".to_string(),
            ..Default::default()
        };
-
        let mut payload_buf = create_test_payload(meta);
        payload_buf.push(0xff); // Add an extra byte to corrupt the footer
        let blob_size = payload_buf.len() as u64;
-        let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);

-        let payload_size_result = reader.read_payload_size().await;
-        assert!(payload_size_result.is_err());
+        for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
+            let blob_size = payload_buf.len() as u64;
+            let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
+            if prefetch > 0 {
+                reader = reader.with_prefetch_size(prefetch);
+            }
+
+            let result = reader.metadata().await;
+            assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. }));
+        }
    }

    #[tokio::test]
@@ -178,10 +226,15 @@ mod tests {

        let mut payload_buf = create_test_payload(meta);
        let blob_size = payload_buf.len() as u64;
-        let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);

-        let payload_size = reader.read_payload_size().await.unwrap();
-        let payload_result = reader.read_payload(payload_size).await;
-        assert!(payload_result.is_err());
+        for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
+            let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
+            if prefetch > 0 {
+                reader = reader.with_prefetch_size(prefetch);
+            }
+
+            let result = reader.metadata().await;
+            assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. }));
+        }
    }
 }
--- a/src/index/src/lib.rs
+++ b/src/index/src/lib.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 #![feature(iter_partition_in_place)]
+#![feature(assert_matches)]

 pub mod fulltext_index;
 pub mod inverted_index;
--- a/src/meta-srv/src/bootstrap.rs
+++ b/src/meta-srv/src/bootstrap.rs
@@ -206,43 +206,41 @@ pub async fn metasrv_builder(
    plugins: Plugins,
    kv_backend: Option<KvBackendRef>,
 ) -> Result<MetasrvBuilder> {
-    let (kv_backend, election) = match (kv_backend, &opts.backend) {
+    let (mut kv_backend, election) = match (kv_backend, &opts.backend) {
        (Some(kv_backend), _) => (kv_backend, None),
        (None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
        (None, BackendImpl::EtcdStore) => {
            let etcd_client = create_etcd_client(opts).await?;
-            let kv_backend = {
-                let etcd_backend =
-                    EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
-                if !opts.store_key_prefix.is_empty() {
-                    Arc::new(ChrootKvBackend::new(
-                        opts.store_key_prefix.clone().into_bytes(),
-                        etcd_backend,
-                    ))
-                } else {
-                    etcd_backend
-                }
-            };
-            (
-                kv_backend,
-                Some(
-                    EtcdElection::with_etcd_client(
-                        &opts.server_addr,
-                        etcd_client.clone(),
-                        opts.store_key_prefix.clone(),
-                    )
-                    .await?,
-                ),
+            let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
+            let election = EtcdElection::with_etcd_client(
+                &opts.server_addr,
+                etcd_client,
+                opts.store_key_prefix.clone(),
            )
+            .await?;
+
+            (kv_backend, Some(election))
        }
        #[cfg(feature = "pg_kvbackend")]
        (None, BackendImpl::PostgresStore) => {
            let pg_client = create_postgres_client(opts).await?;
            let kv_backend = PgStore::with_pg_client(pg_client).await.unwrap();
+            // TODO(jeremy, weny): implement election for postgres
            (kv_backend, None)
        }
    };

+    if !opts.store_key_prefix.is_empty() {
+        info!(
+            "using chroot kv backend with prefix: {prefix}",
+            prefix = opts.store_key_prefix
+        );
+        kv_backend = Arc::new(ChrootKvBackend::new(
+            opts.store_key_prefix.clone().into_bytes(),
+            kv_backend,
+        ))
+    }
+
    let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;

    let selector = match opts.selector {
--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -470,6 +470,10 @@ impl Metasrv {
                });
            }
        } else {
+            warn!(
+                "Ensure only one instance of Metasrv is running, as there is no election service."
+            );
+
            if let Err(e) = self.wal_options_allocator.start().await {
                error!(e; "Failed to start wal options allocator");
            }
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -17,6 +17,7 @@ aquamarine.workspace = true
 async-channel = "1.9"
 async-stream.workspace = true
 async-trait = "0.1"
+bytemuck.workspace = true
 bytes.workspace = true
 common-base.workspace = true
 common-config.workspace = true
--- a/src/mito2/src/cache.rs
+++ b/src/mito2/src/cache.rs
@@ -32,6 +32,7 @@ use moka::notification::RemovalCause;
 use moka::sync::Cache;
 use parquet::column::page::Page;
 use parquet::file::metadata::ParquetMetaData;
+use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
 use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector};

 use crate::cache::cache_size::parquet_meta_size;
@@ -68,6 +69,8 @@ pub struct CacheManager {
    write_cache: Option<WriteCacheRef>,
    /// Cache for inverted index.
    index_cache: Option<InvertedIndexCacheRef>,
+    /// Puffin metadata cache.
+    puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
    /// Cache for time series selectors.
    selector_result_cache: Option<SelectorResultCache>,
 }
@@ -217,6 +220,10 @@ impl CacheManager {
    pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> {
        self.index_cache.as_ref()
    }
+
+    pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
+        self.puffin_metadata_cache.as_ref()
+    }
 }

 /// Increases selector cache miss metrics.
@@ -237,6 +244,8 @@ pub struct CacheManagerBuilder {
    page_cache_size: u64,
    index_metadata_size: u64,
    index_content_size: u64,
+    index_content_page_size: u64,
+    puffin_metadata_size: u64,
    write_cache: Option<WriteCacheRef>,
    selector_result_cache_size: u64,
 }
@@ -278,6 +287,18 @@ impl CacheManagerBuilder {
        self
    }

+    /// Sets page size for index content.
+    pub fn index_content_page_size(mut self, bytes: u64) -> Self {
+        self.index_content_page_size = bytes;
+        self
+    }
+
+    /// Sets cache size for puffin metadata.
+    pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
+        self.puffin_metadata_size = bytes;
+        self
+    }
+
    /// Sets selector result cache size.
    pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
        self.selector_result_cache_size = bytes;
@@ -338,8 +359,13 @@ impl CacheManagerBuilder {
                })
                .build()
        });
-        let inverted_index_cache =
-            InvertedIndexCache::new(self.index_metadata_size, self.index_content_size);
+        let inverted_index_cache = InvertedIndexCache::new(
+            self.index_metadata_size,
+            self.index_content_size,
+            self.index_content_page_size,
+        );
+        let puffin_metadata_cache =
+            PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
        let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
            Cache::builder()
                .max_capacity(self.selector_result_cache_size)
@@ -361,6 +387,7 @@ impl CacheManagerBuilder {
            page_cache,
            write_cache: self.write_cache,
            index_cache: Some(Arc::new(inverted_index_cache)),
+            puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
            selector_result_cache,
        }
    }
--- a/src/mito2/src/cache/index.rs
+++ b/src/mito2/src/cache/index.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::ops::Range;
 use std::sync::Arc;

 use api::v1::index::InvertedIndexMetas;
@@ -34,14 +35,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content";
 /// Inverted index blob reader with cache.
 pub struct CachedInvertedIndexBlobReader<R> {
    file_id: FileId,
+    file_size: u64,
    inner: R,
    cache: InvertedIndexCacheRef,
 }

 impl<R> CachedInvertedIndexBlobReader<R> {
-    pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self {
+    pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
        Self {
            file_id,
+            file_size,
            inner,
            cache,
        }
@@ -59,43 +62,77 @@ where
        offset: u64,
        size: u32,
    ) -> index::inverted_index::error::Result<Vec<u8>> {
-        let range = offset as usize..(offset + size as u64) as usize;
-        if let Some(cached) = self.cache.get_index(IndexKey {
-            file_id: self.file_id,
-        }) {
-            CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
-            Ok(cached[range].to_vec())
-        } else {
-            let mut all_data = Vec::with_capacity(1024 * 1024);
-            self.inner.read_all(&mut all_data).await?;
-            let result = all_data[range].to_vec();
-            self.cache.put_index(
-                IndexKey {
-                    file_id: self.file_id,
-                },
-                Arc::new(all_data),
-            );
-            CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
-            Ok(result)
+        let keys =
+            IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size);
+        // Size is 0, return empty data.
+        if keys.is_empty() {
+            return Ok(Vec::new());
        }
+        // TODO: Can be replaced by an uncontinuous structure like opendal::Buffer.
+        let mut data = Vec::with_capacity(keys.len());
+        data.resize(keys.len(), Arc::new(Vec::new()));
+        let mut cache_miss_range = vec![];
+        let mut cache_miss_idx = vec![];
+        let last_index = keys.len() - 1;
+        // TODO: Avoid copy as much as possible.
+        for (i, index) in keys.clone().into_iter().enumerate() {
+            match self.cache.get_index(&index) {
+                Some(page) => {
+                    CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
+                    data[i] = page;
+                }
+                None => {
+                    CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
+                    let base_offset = index.page_id * self.cache.page_size;
+                    let pruned_size = if i == last_index {
+                        prune_size(&keys, self.file_size, self.cache.page_size)
+                    } else {
+                        self.cache.page_size
+                    };
+                    cache_miss_range.push(base_offset..base_offset + pruned_size);
+                    cache_miss_idx.push(i);
+                }
+            }
+        }
+        if !cache_miss_range.is_empty() {
+            let pages = self.inner.read_vec(&cache_miss_range).await?;
+            for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
+                let page = Arc::new(page);
+                let key = keys[i].clone();
+                data[i] = page.clone();
+                self.cache.put_index(key, page.clone());
+            }
+        }
+        let mut result = Vec::with_capacity(size as usize);
+        data.iter().enumerate().for_each(|(i, page)| {
+            let range = if i == 0 {
+                IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size)
+            } else if i == last_index {
+                IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size)
+            } else {
+                0..self.cache.page_size as usize
+            };
+            result.extend_from_slice(&page[range]);
+        });
+        Ok(result)
    }
 }

 #[async_trait]
 impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
-    async fn read_all(
-        &mut self,
-        dest: &mut Vec<u8>,
-    ) -> index::inverted_index::error::Result<usize> {
-        self.inner.read_all(dest).await
-    }
-
-    async fn seek_read(
+    async fn range_read(
        &mut self,
        offset: u64,
        size: u32,
    ) -> index::inverted_index::error::Result<Vec<u8>> {
-        self.inner.seek_read(offset, size).await
+        self.inner.range_read(offset, size).await
+    }
+
+    async fn read_vec(
+        &mut self,
+        ranges: &[Range<u64>],
+    ) -> index::inverted_index::error::Result<Vec<Vec<u8>>> {
+        self.inner.read_vec(ranges).await
    }

    async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
@@ -130,22 +167,81 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
 }

 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct IndexKey {
+pub struct IndexMetadataKey {
    file_id: FileId,
 }

+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct IndexDataPageKey {
+    file_id: FileId,
+    page_id: u64,
+}
+
+impl IndexDataPageKey {
+    /// Converts an offset to a page ID based on the page size.
+    fn calculate_page_id(offset: u64, page_size: u64) -> u64 {
+        offset / page_size
+    }
+
+    /// Calculates the total number of pages that a given size spans, starting from a specific offset.
+    fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 {
+        let start_page = Self::calculate_page_id(offset, page_size);
+        let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size);
+        (end_page + 1 - start_page) as u32
+    }
+
+    /// Computes the byte range in the first page based on the offset and size.
+    /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096.
+    fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range<usize> {
+        let start = (offset % page_size) as usize;
+        let end = if size > page_size as u32 - start as u32 {
+            page_size as usize
+        } else {
+            start + size as usize
+        };
+        start..end
+    }
+
+    /// Computes the byte range in the last page based on the offset and size.
+    /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904.
+    fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range<usize> {
+        let offset = offset as usize;
+        let size = size as usize;
+        let page_size = page_size as usize;
+        if (offset + size) % page_size == 0 {
+            0..page_size
+        } else {
+            0..((offset + size) % page_size)
+        }
+    }
+
+    /// Generates a vector of IndexKey instances for the pages that a given offset and size span.
+    fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec<Self> {
+        let start_page = Self::calculate_page_id(offset, page_size);
+        let total_pages = Self::calculate_page_count(offset, size, page_size);
+        (0..total_pages)
+            .map(|i| Self {
+                file_id,
+                page_id: start_page + i as u64,
+            })
+            .collect()
+    }
+}
+
 pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;

 pub struct InvertedIndexCache {
    /// Cache for inverted index metadata
-    index_metadata: moka::sync::Cache<IndexKey, Arc<InvertedIndexMetas>>,
+    index_metadata: moka::sync::Cache<IndexMetadataKey, Arc<InvertedIndexMetas>>,
    /// Cache for inverted index content.
-    index: moka::sync::Cache<IndexKey, Arc<Vec<u8>>>,
+    index: moka::sync::Cache<IndexDataPageKey, Arc<Vec<u8>>>,
+    // Page size for index content.
+    page_size: u64,
 }

 impl InvertedIndexCache {
    /// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`.
-    pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self {
+    pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self {
        common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}");
        let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap)
            .name("inverted_index_metadata")
@@ -170,29 +266,29 @@ impl InvertedIndexCache {
        Self {
            index_metadata,
            index: index_cache,
+            page_size,
        }
    }
 }

 impl InvertedIndexCache {
    pub fn get_index_metadata(&self, file_id: FileId) -> Option<Arc<InvertedIndexMetas>> {
-        self.index_metadata.get(&IndexKey { file_id })
+        self.index_metadata.get(&IndexMetadataKey { file_id })
    }

    pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc<InvertedIndexMetas>) {
-        let key = IndexKey { file_id };
+        let key = IndexMetadataKey { file_id };
        CACHE_BYTES
            .with_label_values(&[INDEX_METADATA_TYPE])
            .add(index_metadata_weight(&key, &metadata).into());
        self.index_metadata.insert(key, metadata)
    }

-    // todo(hl): align index file content to pages with size like 4096 bytes.
-    pub fn get_index(&self, key: IndexKey) -> Option<Arc<Vec<u8>>> {
-        self.index.get(&key)
+    pub fn get_index(&self, key: &IndexDataPageKey) -> Option<Arc<Vec<u8>>> {
+        self.index.get(key)
    }

-    pub fn put_index(&self, key: IndexKey, value: Arc<Vec<u8>>) {
+    pub fn put_index(&self, key: IndexDataPageKey, value: Arc<Vec<u8>>) {
        CACHE_BYTES
            .with_label_values(&[INDEX_CONTENT_TYPE])
            .add(index_content_weight(&key, &value).into());
@@ -201,11 +297,234 @@ impl InvertedIndexCache {
 }

 /// Calculates weight for index metadata.
-fn index_metadata_weight(k: &IndexKey, v: &Arc<InvertedIndexMetas>) -> u32 {
+fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc<InvertedIndexMetas>) -> u32 {
    (k.file_id.as_bytes().len() + v.encoded_len()) as u32
 }

 /// Calculates weight for index content.
-fn index_content_weight(k: &IndexKey, v: &Arc<Vec<u8>>) -> u32 {
+fn index_content_weight(k: &IndexDataPageKey, v: &Arc<Vec<u8>>) -> u32 {
    (k.file_id.as_bytes().len() + v.len()) as u32
 }
+
+/// Prunes the size of the last page based on the indexes.
+/// We have following cases:
+/// 1. The rest file size is less than the page size, read to the end of the file.
+/// 2. Otherwise, read the page size.
+fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 {
+    let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0);
+    page_size.min(file_size - last_page_start)
+}
+
+#[cfg(test)]
+mod test {
+    use std::num::NonZeroUsize;
+
+    use common_base::BitVec;
+    use futures::stream;
+    use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
+    use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter};
+    use index::inverted_index::Bytes;
+    use prometheus::register_int_counter_vec;
+    use rand::{Rng, RngCore};
+
+    use super::*;
+    use crate::sst::index::store::InstrumentedStore;
+    use crate::test_util::TestEnv;
+
+    // Fuzz test for index data page key
+    #[test]
+    fn fuzz_index_calculation() {
+        // randomly generate a large u8 array
+        let mut rng = rand::thread_rng();
+        let mut data = vec![0u8; 1024 * 1024];
+        rng.fill_bytes(&mut data);
+        let file_id = FileId::random();
+
+        for _ in 0..100 {
+            let offset = rng.gen_range(0..data.len() as u64);
+            let size = rng.gen_range(0..data.len() as u32 - offset as u32);
+            let page_size: usize = rng.gen_range(1..1024);
+
+            let indexes =
+                IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64);
+            let page_num = indexes.len();
+            let mut read = Vec::with_capacity(size as usize);
+            let last_index = indexes.len() - 1;
+            for (i, key) in indexes.into_iter().enumerate() {
+                let start = key.page_id as usize * page_size;
+                let page = if start + page_size < data.len() {
+                    &data[start..start + page_size]
+                } else {
+                    &data[start..]
+                };
+                let range = if i == 0 {
+                    // first page range
+                    IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64)
+                } else if i == last_index {
+                    // last page range. when the first page is the last page, the range is not used.
+                    IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64)
+                } else {
+                    0..page_size
+                };
+                read.extend_from_slice(&page[range]);
+            }
+            let expected_range = offset as usize..(offset + size as u64 as u64) as usize;
+            if read != data.get(expected_range).unwrap() {
+                panic!(
+                    "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}",
+                    offset, size, page_size, read.len(), size as usize,
+                    IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64),
+                    IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num
+                );
+            }
+        }
+    }
+
+    fn unpack(fst_value: u64) -> [u32; 2] {
+        bytemuck::cast::<u64, [u32; 2]>(fst_value)
+    }
+
+    async fn create_inverted_index_blob() -> Vec<u8> {
+        let mut blob = Vec::new();
+        let mut writer = InvertedIndexBlobWriter::new(&mut blob);
+        writer
+            .add_index(
+                "tag0".to_string(),
+                BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
+                Box::new(stream::iter(vec![
+                    Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
+                    Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
+                    Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
+                ])),
+            )
+            .await
+            .unwrap();
+        writer
+            .add_index(
+                "tag1".to_string(),
+                BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
+                Box::new(stream::iter(vec![
+                    Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
+                    Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
+                    Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
+                ])),
+            )
+            .await
+            .unwrap();
+        writer
+            .finish(8, NonZeroUsize::new(1).unwrap())
+            .await
+            .unwrap();
+
+        blob
+    }
+
+    #[tokio::test]
+    async fn test_inverted_index_cache() {
+        let blob = create_inverted_index_blob().await;
+
+        // Init a test range reader in local fs.
+        let mut env = TestEnv::new();
+        let file_size = blob.len() as u64;
+        let store = env.init_object_store_manager();
+        let temp_path = "data";
+        store.write(temp_path, blob).await.unwrap();
+        let store = InstrumentedStore::new(store);
+        let metric =
+            register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap();
+        let counter = metric.with_label_values(&["test"]);
+        let range_reader = store
+            .range_reader("data", &counter, &counter)
+            .await
+            .unwrap();
+
+        let reader = InvertedIndexBlobReader::new(range_reader);
+        let mut cached_reader = CachedInvertedIndexBlobReader::new(
+            FileId::random(),
+            file_size,
+            reader,
+            Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
+        );
+        let metadata = cached_reader.metadata().await.unwrap();
+        assert_eq!(metadata.total_row_count, 8);
+        assert_eq!(metadata.segment_row_count, 1);
+        assert_eq!(metadata.metas.len(), 2);
+        // tag0
+        let tag0 = metadata.metas.get("tag0").unwrap();
+        let stats0 = tag0.stats.as_ref().unwrap();
+        assert_eq!(stats0.distinct_count, 3);
+        assert_eq!(stats0.null_count, 1);
+        assert_eq!(stats0.min_value, Bytes::from("a"));
+        assert_eq!(stats0.max_value, Bytes::from("c"));
+        let fst0 = cached_reader
+            .fst(
+                tag0.base_offset + tag0.relative_fst_offset as u64,
+                tag0.fst_size,
+            )
+            .await
+            .unwrap();
+        assert_eq!(fst0.len(), 3);
+        let [offset, size] = unpack(fst0.get(b"a").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag0.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
+        let [offset, size] = unpack(fst0.get(b"b").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag0.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
+        let [offset, size] = unpack(fst0.get(b"c").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag0.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
+
+        // tag1
+        let tag1 = metadata.metas.get("tag1").unwrap();
+        let stats1 = tag1.stats.as_ref().unwrap();
+        assert_eq!(stats1.distinct_count, 3);
+        assert_eq!(stats1.null_count, 1);
+        assert_eq!(stats1.min_value, Bytes::from("x"));
+        assert_eq!(stats1.max_value, Bytes::from("z"));
+        let fst1 = cached_reader
+            .fst(
+                tag1.base_offset + tag1.relative_fst_offset as u64,
+                tag1.fst_size,
+            )
+            .await
+            .unwrap();
+        assert_eq!(fst1.len(), 3);
+        let [offset, size] = unpack(fst1.get(b"x").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag1.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
+        let [offset, size] = unpack(fst1.get(b"y").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag1.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
+        let [offset, size] = unpack(fst1.get(b"z").unwrap());
+        let bitmap = cached_reader
+            .bitmap(tag1.base_offset + offset as u64, size)
+            .await
+            .unwrap();
+        assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
+
+        // fuzz test
+        let mut rng = rand::thread_rng();
+        for _ in 0..100 {
+            let offset = rng.gen_range(0..file_size);
+            let size = rng.gen_range(0..file_size as u32 - offset as u32);
+            let expected = cached_reader.range_read(offset, size).await.unwrap();
+            let read = cached_reader.get_or_load(offset, size).await.unwrap();
+            assert_eq!(read, expected);
+        }
+    }
+}
--- a/src/mito2/src/cache/write_cache.rs
+++ b/src/mito2/src/cache/write_cache.rs
@@ -501,7 +501,7 @@ mod tests {

        // Read metadata from write cache
        let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone())
-            .cache(cache_manager.clone());
+            .cache(Some(cache_manager.clone()));
        let reader = builder.build().await.unwrap();

        // Check parquet metadata
--- a/src/mito2/src/compaction.rs
+++ b/src/mito2/src/compaction.rs
@@ -570,7 +570,6 @@ pub struct SerializedCompactionOutput {
 struct CompactionSstReaderBuilder<'a> {
    metadata: RegionMetadataRef,
    sst_layer: AccessLayerRef,
-    cache: CacheManagerRef,
    inputs: &'a [FileHandle],
    append_mode: bool,
    filter_deleted: bool,
@@ -584,7 +583,7 @@ impl<'a> CompactionSstReaderBuilder<'a> {
        let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?)
            .with_files(self.inputs.to_vec())
            .with_append_mode(self.append_mode)
-            .with_cache(self.cache)
+            .with_cache(None)
            .with_filter_deleted(self.filter_deleted)
            // We ignore file not found error during compaction.
            .with_ignore_file_not_found(true)
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -296,7 +296,6 @@ impl Compactor for DefaultCompactor {
                let reader = CompactionSstReaderBuilder {
                    metadata: region_metadata.clone(),
                    sst_layer: sst_layer.clone(),
-                    cache: cache_manager.clone(),
                    inputs: &output.inputs,
                    append_mode,
                    filter_deleted: output.filter_deleted,
--- a/src/mito2/src/config.rs
+++ b/src/mito2/src/config.rs
@@ -304,6 +304,9 @@ pub struct IndexConfig {

    /// Write buffer size for creating the index.
    pub write_buffer_size: ReadableSize,
+
+    /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache.
+    pub metadata_cache_size: ReadableSize,
 }

 impl Default for IndexConfig {
@@ -312,6 +315,7 @@ impl Default for IndexConfig {
            aux_path: String::new(),
            staging_size: ReadableSize::gb(2),
            write_buffer_size: ReadableSize::mb(8),
+            metadata_cache_size: ReadableSize::mb(64),
        }
    }
 }
@@ -412,6 +416,8 @@ pub struct InvertedIndexConfig {
    pub metadata_cache_size: ReadableSize,
    /// Cache size for inverted index content. Setting it to 0 to disable the cache.
    pub content_cache_size: ReadableSize,
+    /// Page size for inverted index content.
+    pub content_cache_page_size: ReadableSize,
 }

 impl InvertedIndexConfig {
@@ -437,6 +443,7 @@ impl Default for InvertedIndexConfig {
            intermediate_path: String::new(),
            metadata_cache_size: ReadableSize::mb(64),
            content_cache_size: ReadableSize::mb(128),
+            content_cache_page_size: ReadableSize::mb(8),
        };

        if let Some(sys_memory) = common_config::utils::get_sys_total_memory() {
--- a/src/mito2/src/engine.rs
+++ b/src/mito2/src/engine.rs
@@ -424,12 +424,16 @@ impl EngineInner {
        // Get cache.
        let cache_manager = self.workers.cache_manager();

-        let scan_region =
-            ScanRegion::new(version, region.access_layer.clone(), request, cache_manager)
-                .with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
-                .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
-                .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
-                .with_start_time(query_start);
+        let scan_region = ScanRegion::new(
+            version,
+            region.access_layer.clone(),
+            request,
+            Some(cache_manager),
+        )
+        .with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
+        .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
+        .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
+        .with_start_time(query_start);

        Ok(scan_region)
    }
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -893,6 +893,14 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to read file metadata"))]
+    Metadata {
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -965,7 +973,8 @@ impl ErrorExt for Error {
            | CreateDir { .. }
            | ReadDataPart { .. }
            | CorruptedEntry { .. }
-            | BuildEntry { .. } => StatusCode::Internal,
+            | BuildEntry { .. }
+            | Metadata { .. } => StatusCode::Internal,

            OpenRegion { source, .. } => source.status_code(),

--- a/src/mito2/src/read/last_row.rs
+++ b/src/mito2/src/read/last_row.rs
@@ -27,7 +27,7 @@ use crate::cache::{
 use crate::error::Result;
 use crate::read::{Batch, BatchReader, BoxedBatchReader};
 use crate::sst::file::FileId;
-use crate::sst::parquet::reader::RowGroupReader;
+use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader};

 /// Reader to keep the last row for each time series.
 /// It assumes that batches from the input reader are
@@ -86,7 +86,7 @@ impl RowGroupLastRowCachedReader {
    pub(crate) fn new(
        file_id: FileId,
        row_group_idx: usize,
-        cache_manager: CacheManagerRef,
+        cache_manager: Option<CacheManagerRef>,
        row_group_reader: RowGroupReader,
    ) -> Self {
        let key = SelectorResultKey {
@@ -95,6 +95,9 @@ impl RowGroupLastRowCachedReader {
            selector: TimeSeriesRowSelector::LastRow,
        };

+        let Some(cache_manager) = cache_manager else {
+            return Self::new_miss(key, row_group_reader, None);
+        };
        if let Some(value) = cache_manager.get_selector_result(&key) {
            let schema_matches = value.projection
                == row_group_reader
@@ -105,10 +108,18 @@ impl RowGroupLastRowCachedReader {
                // Schema matches, use cache batches.
                Self::new_hit(value)
            } else {
-                Self::new_miss(key, row_group_reader, cache_manager)
+                Self::new_miss(key, row_group_reader, Some(cache_manager))
            }
        } else {
-            Self::new_miss(key, row_group_reader, cache_manager)
+            Self::new_miss(key, row_group_reader, Some(cache_manager))
+        }
+    }
+
+    /// Gets the underlying reader metrics if uncached.
+    pub(crate) fn metrics(&self) -> Option<&ReaderMetrics> {
+        match self {
+            RowGroupLastRowCachedReader::Hit(_) => None,
+            RowGroupLastRowCachedReader::Miss(reader) => Some(reader.metrics()),
        }
    }

@@ -122,7 +133,7 @@ impl RowGroupLastRowCachedReader {
    fn new_miss(
        key: SelectorResultKey,
        row_group_reader: RowGroupReader,
-        cache_manager: CacheManagerRef,
+        cache_manager: Option<CacheManagerRef>,
    ) -> Self {
        selector_result_cache_miss();
        Self::Miss(RowGroupLastRowReader::new(
@@ -167,13 +178,17 @@ pub(crate) struct RowGroupLastRowReader {
    reader: RowGroupReader,
    selector: LastRowSelector,
    yielded_batches: Vec<Batch>,
-    cache_manager: CacheManagerRef,
+    cache_manager: Option<CacheManagerRef>,
    /// Index buffer to take a new batch from the last row.
    take_index: UInt32Vector,
 }

 impl RowGroupLastRowReader {
-    fn new(key: SelectorResultKey, reader: RowGroupReader, cache_manager: CacheManagerRef) -> Self {
+    fn new(
+        key: SelectorResultKey,
+        reader: RowGroupReader,
+        cache_manager: Option<CacheManagerRef>,
+    ) -> Self {
        Self {
            key,
            reader,
@@ -213,6 +228,9 @@ impl RowGroupLastRowReader {
            // we always expect that row groups yields batches.
            return;
        }
+        let Some(cache) = &self.cache_manager else {
+            return;
+        };
        let value = Arc::new(SelectorResultValue {
            result: std::mem::take(&mut self.yielded_batches),
            projection: self
@@ -222,7 +240,11 @@ impl RowGroupLastRowReader {
                .projection_indices()
                .to_vec(),
        });
-        self.cache_manager.put_selector_result(self.key, value);
+        cache.put_selector_result(self.key, value);
+    }
+
+    fn metrics(&self) -> &ReaderMetrics {
+        self.reader.metrics()
    }
 }

--- a/src/mito2/src/read/projection.rs
+++ b/src/mito2/src/read/projection.rs
@@ -171,7 +171,7 @@ impl ProjectionMapper {
    pub(crate) fn convert(
        &self,
        batch: &Batch,
-        cache_manager: &CacheManager,
+        cache_manager: Option<&CacheManager>,
    ) -> common_recordbatch::error::Result<RecordBatch> {
        debug_assert_eq!(self.batch_fields.len(), batch.fields().len());
        debug_assert!(self
@@ -204,12 +204,15 @@ impl ProjectionMapper {
            match index {
                BatchIndex::Tag(idx) => {
                    let value = &pk_values[*idx];
-                    let vector = repeated_vector_with_cache(
-                        &column_schema.data_type,
-                        value,
-                        num_rows,
-                        cache_manager,
-                    )?;
+                    let vector = match cache_manager {
+                        Some(cache) => repeated_vector_with_cache(
+                            &column_schema.data_type,
+                            value,
+                            num_rows,
+                            cache,
+                        )?,
+                        None => new_repeated_vector(&column_schema.data_type, value, num_rows)?,
+                    };
                    columns.push(vector);
                }
                BatchIndex::Timestamp => {
@@ -357,7 +360,7 @@ mod tests {
        // With vector cache.
        let cache = CacheManager::builder().vector_cache_size(1024).build();
        let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
-        let record_batch = mapper.convert(&batch, &cache).unwrap();
+        let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
        let expect = "\
 +---------------------+----+----+----+----+
 | ts                  | k0 | k1 | v0 | v1 |
@@ -377,7 +380,7 @@ mod tests {
        assert!(cache
            .get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(3))
            .is_none());
-        let record_batch = mapper.convert(&batch, &cache).unwrap();
+        let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
        assert_eq!(expect, print_record_batch(record_batch));
    }

@@ -398,8 +401,7 @@ mod tests {
        );

        let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
-        let cache = CacheManager::builder().vector_cache_size(1024).build();
-        let record_batch = mapper.convert(&batch, &cache).unwrap();
+        let record_batch = mapper.convert(&batch, None).unwrap();
        let expect = "\
 +----+----+
 | v1 | k0 |
--- a/src/mito2/src/read/prune.rs
+++ b/src/mito2/src/read/prune.rs
@@ -72,11 +72,21 @@ impl PruneReader {
        self.source = source;
    }

-    pub(crate) fn metrics(&mut self) -> &ReaderMetrics {
+    /// Merge metrics with the inner reader and return the merged metrics.
+    pub(crate) fn metrics(&self) -> ReaderMetrics {
+        let mut metrics = self.metrics.clone();
        match &self.source {
-            Source::RowGroup(r) => r.metrics(),
-            Source::LastRow(_) => &self.metrics,
+            Source::RowGroup(r) => {
+                metrics.merge_from(r.metrics());
+            }
+            Source::LastRow(r) => {
+                if let Some(inner_metrics) = r.metrics() {
+                    metrics.merge_from(inner_metrics);
+                }
+            }
        }
+
+        metrics
    }

    pub(crate) async fn next_batch(&mut self) -> Result<Option<Batch>> {
--- a/src/mito2/src/read/range.rs
+++ b/src/mito2/src/read/range.rs
@@ -112,7 +112,7 @@ impl RangeMeta {
        Self::push_unordered_file_ranges(
            input.memtables.len(),
            &input.files,
-            &input.cache_manager,
+            input.cache_manager.as_deref(),
            &mut ranges,
        );

@@ -203,15 +203,16 @@ impl RangeMeta {
    fn push_unordered_file_ranges(
        num_memtables: usize,
        files: &[FileHandle],
-        cache: &CacheManager,
+        cache: Option<&CacheManager>,
        ranges: &mut Vec<RangeMeta>,
    ) {
        // For append mode, we can parallelize reading row groups.
        for (i, file) in files.iter().enumerate() {
            let file_index = num_memtables + i;
            // Get parquet meta from the cache.
-            let parquet_meta =
-                cache.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id());
+            let parquet_meta = cache.and_then(|c| {
+                c.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id())
+            });
            if let Some(parquet_meta) = parquet_meta {
                // Scans each row group.
                for row_group_index in 0..file.meta_ref().num_row_groups {
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -167,7 +167,7 @@ pub(crate) struct ScanRegion {
    /// Scan request.
    request: ScanRequest,
    /// Cache.
-    cache_manager: CacheManagerRef,
+    cache_manager: Option<CacheManagerRef>,
    /// Capacity of the channel to send data from parallel scan tasks to the main task.
    parallel_scan_channel_size: usize,
    /// Whether to ignore inverted index.
@@ -184,7 +184,7 @@ impl ScanRegion {
        version: VersionRef,
        access_layer: AccessLayerRef,
        request: ScanRequest,
-        cache_manager: CacheManagerRef,
+        cache_manager: Option<CacheManagerRef>,
    ) -> ScanRegion {
        ScanRegion {
            version,
@@ -401,18 +401,27 @@ impl ScanRegion {
        }

        let file_cache = || -> Option<FileCacheRef> {
-            let write_cache = self.cache_manager.write_cache()?;
+            let cache_manager = self.cache_manager.as_ref()?;
+            let write_cache = cache_manager.write_cache()?;
            let file_cache = write_cache.file_cache();
            Some(file_cache)
        }();

-        let index_cache = self.cache_manager.index_cache().cloned();
+        let index_cache = self
+            .cache_manager
+            .as_ref()
+            .and_then(|c| c.index_cache())
+            .cloned();
+
+        let puffin_metadata_cache = self
+            .cache_manager
+            .as_ref()
+            .and_then(|c| c.puffin_metadata_cache())
+            .cloned();

        InvertedIndexApplierBuilder::new(
            self.access_layer.region_dir().to_string(),
            self.access_layer.object_store().clone(),
-            file_cache,
-            index_cache,
            self.version.metadata.as_ref(),
            self.version.metadata.inverted_indexed_column_ids(
                self.version
@@ -424,6 +433,9 @@ impl ScanRegion {
            ),
            self.access_layer.puffin_manager_factory().clone(),
        )
+        .with_file_cache(file_cache)
+        .with_index_cache(index_cache)
+        .with_puffin_metadata_cache(puffin_metadata_cache)
        .build(&self.request.filters)
        .inspect_err(|err| warn!(err; "Failed to build invereted index applier"))
        .ok()
@@ -477,7 +489,7 @@ pub(crate) struct ScanInput {
    /// Handles to SST files to scan.
    pub(crate) files: Vec<FileHandle>,
    /// Cache.
-    pub(crate) cache_manager: CacheManagerRef,
+    pub(crate) cache_manager: Option<CacheManagerRef>,
    /// Ignores file not found error.
    ignore_file_not_found: bool,
    /// Capacity of the channel to send data from parallel scan tasks to the main task.
@@ -508,7 +520,7 @@ impl ScanInput {
            predicate: None,
            memtables: Vec::new(),
            files: Vec::new(),
-            cache_manager: CacheManagerRef::default(),
+            cache_manager: None,
            ignore_file_not_found: false,
            parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
            inverted_index_applier: None,
@@ -551,7 +563,7 @@ impl ScanInput {

    /// Sets cache for this query.
    #[must_use]
-    pub(crate) fn with_cache(mut self, cache: CacheManagerRef) -> Self {
+    pub(crate) fn with_cache(mut self, cache: Option<CacheManagerRef>) -> Self {
        self.cache_manager = cache;
        self
    }
--- a/src/mito2/src/read/scan_util.rs
+++ b/src/mito2/src/read/scan_util.rs
@@ -181,8 +181,9 @@ pub(crate) fn scan_file_ranges(
                }
                yield batch;
            }
-            if let Source::PruneReader(mut reader) = source {
-                reader_metrics.merge_from(reader.metrics());
+            if let Source::PruneReader(reader) = source {
+                let prune_metrics = reader.metrics();
+                reader_metrics.merge_from(&prune_metrics);
            }
        }

--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -257,7 +257,7 @@ impl SeqScan {
                        .await
                        .map_err(BoxedError::new)
                        .context(ExternalSnafu)?;
-                let cache = &stream_ctx.input.cache_manager;
+                let cache = stream_ctx.input.cache_manager.as_deref();
                let mut metrics = ScannerMetrics::default();
                let mut fetch_start = Instant::now();
                #[cfg(debug_assertions)]
--- a/src/mito2/src/read/unordered_scan.rs
+++ b/src/mito2/src/read/unordered_scan.rs
@@ -149,7 +149,7 @@ impl UnorderedScan {
        let stream = try_stream! {
            part_metrics.on_first_poll();

-            let cache = &stream_ctx.input.cache_manager;
+            let cache = stream_ctx.input.cache_manager.as_deref();
            let range_builder_list = Arc::new(RangeBuilderList::new(
                stream_ctx.input.num_memtables(),
                stream_ctx.input.num_files(),
--- a/src/mito2/src/sst/file.rs
+++ b/src/mito2/src/sst/file.rs
@@ -146,12 +146,33 @@ pub enum IndexType {
 }

 impl FileMeta {
+    /// Returns true if the file has an inverted index
    pub fn inverted_index_available(&self) -> bool {
        self.available_indexes.contains(&IndexType::InvertedIndex)
    }
+
+    /// Returns true if the file has a fulltext index
    pub fn fulltext_index_available(&self) -> bool {
        self.available_indexes.contains(&IndexType::FulltextIndex)
    }
+
+    /// Returns the size of the inverted index file
+    pub fn inverted_index_size(&self) -> Option<u64> {
+        if self.available_indexes.len() == 1 && self.inverted_index_available() {
+            Some(self.index_file_size)
+        } else {
+            None
+        }
+    }
+
+    /// Returns the size of the fulltext index file
+    pub fn fulltext_index_size(&self) -> Option<u64> {
+        if self.available_indexes.len() == 1 && self.fulltext_index_available() {
+            Some(self.index_file_size)
+        } else {
+            None
+        }
+    }
 }

 /// Handle to a SST file.
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -18,7 +18,7 @@ pub(crate) mod intermediate;
 pub(crate) mod inverted_index;
 pub(crate) mod puffin_manager;
 mod statistics;
-mod store;
+pub(crate) mod store;

 use std::num::NonZeroUsize;

--- a/src/mito2/src/sst/index/inverted_index/applier.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier.rs
@@ -16,19 +16,23 @@ pub mod builder;

 use std::sync::Arc;

+use common_base::range_read::RangeReader;
 use common_telemetry::warn;
 use index::inverted_index::format::reader::InvertedIndexBlobReader;
 use index::inverted_index::search::index_apply::{
    ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext,
 };
 use object_store::ObjectStore;
+use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
 use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
 use snafu::ResultExt;
 use store_api::storage::RegionId;

 use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
 use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
-use crate::error::{ApplyInvertedIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result};
+use crate::error::{
+    ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result,
+};
 use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE};
 use crate::sst::file::FileId;
 use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
@@ -60,6 +64,9 @@ pub(crate) struct InvertedIndexApplier {

    /// In-memory cache for inverted index.
    inverted_index_cache: Option<InvertedIndexCacheRef>,
+
+    /// Puffin metadata cache.
+    puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
 }

 pub(crate) type InvertedIndexApplierRef = Arc<InvertedIndexApplier>;
@@ -70,8 +77,6 @@ impl InvertedIndexApplier {
        region_dir: String,
        region_id: RegionId,
        store: ObjectStore,
-        file_cache: Option<FileCacheRef>,
-        index_cache: Option<InvertedIndexCacheRef>,
        index_applier: Box<dyn IndexApplier>,
        puffin_manager_factory: PuffinManagerFactory,
    ) -> Self {
@@ -81,15 +86,37 @@ impl InvertedIndexApplier {
            region_dir,
            region_id,
            store,
-            file_cache,
+            file_cache: None,
            index_applier,
            puffin_manager_factory,
-            inverted_index_cache: index_cache,
+            inverted_index_cache: None,
+            puffin_metadata_cache: None,
        }
    }

+    /// Sets the file cache.
+    pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
+        self.file_cache = file_cache;
+        self
+    }
+
+    /// Sets the index cache.
+    pub fn with_index_cache(mut self, index_cache: Option<InvertedIndexCacheRef>) -> Self {
+        self.inverted_index_cache = index_cache;
+        self
+    }
+
+    /// Sets the puffin metadata cache.
+    pub fn with_puffin_metadata_cache(
+        mut self,
+        puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
+    ) -> Self {
+        self.puffin_metadata_cache = puffin_metadata_cache;
+        self
+    }
+
    /// Applies predicates to the provided SST file id and returns the relevant row group ids
-    pub async fn apply(&self, file_id: FileId) -> Result<ApplyOutput> {
+    pub async fn apply(&self, file_id: FileId, file_size_hint: Option<u64>) -> Result<ApplyOutput> {
        let _timer = INDEX_APPLY_ELAPSED
            .with_label_values(&[TYPE_INVERTED_INDEX])
            .start_timer();
@@ -99,19 +126,25 @@ impl InvertedIndexApplier {
            index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty,
        };

-        let blob = match self.cached_blob_reader(file_id).await {
+        let mut blob = match self.cached_blob_reader(file_id).await {
            Ok(Some(puffin_reader)) => puffin_reader,
            other => {
                if let Err(err) = other {
                    warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
                }
-                self.remote_blob_reader(file_id).await?
+                self.remote_blob_reader(file_id, file_size_hint).await?
            }
        };

        if let Some(index_cache) = &self.inverted_index_cache {
+            let file_size = if let Some(file_size) = file_size_hint {
+                file_size
+            } else {
+                blob.metadata().await.context(MetadataSnafu)?.content_length
+            };
            let mut index_reader = CachedInvertedIndexBlobReader::new(
                file_id,
+                file_size,
                InvertedIndexBlobReader::new(blob),
                index_cache.clone(),
            );
@@ -156,13 +189,22 @@ impl InvertedIndexApplier {
    }

    /// Creates a blob reader from the remote index file.
-    async fn remote_blob_reader(&self, file_id: FileId) -> Result<BlobReader> {
-        let puffin_manager = self.puffin_manager_factory.build(self.store.clone());
+    async fn remote_blob_reader(
+        &self,
+        file_id: FileId,
+        file_size_hint: Option<u64>,
+    ) -> Result<BlobReader> {
+        let puffin_manager = self
+            .puffin_manager_factory
+            .build(self.store.clone())
+            .with_puffin_metadata_cache(self.puffin_metadata_cache.clone());
+
        let file_path = location::index_file_path(&self.region_dir, file_id);
        puffin_manager
            .reader(&file_path)
            .await
            .context(PuffinBuildReaderSnafu)?
+            .with_file_size_hint(file_size_hint)
            .blob(INDEX_BLOB_TYPE)
            .await
            .context(PuffinReadBlobSnafu)?
@@ -219,12 +261,10 @@ mod tests {
            region_dir.clone(),
            RegionId::new(0, 0),
            object_store,
-            None,
-            None,
            Box::new(mock_index_applier),
            puffin_manager_factory,
        );
-        let output = sst_index_applier.apply(file_id).await.unwrap();
+        let output = sst_index_applier.apply(file_id, None).await.unwrap();
        assert_eq!(
            output,
            ApplyOutput {
@@ -261,12 +301,10 @@ mod tests {
            region_dir.clone(),
            RegionId::new(0, 0),
            object_store,
-            None,
-            None,
            Box::new(mock_index_applier),
            puffin_manager_factory,
        );
-        let res = sst_index_applier.apply(file_id).await;
+        let res = sst_index_applier.apply(file_id, None).await;
        assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found"));
    }
 }
--- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs
@@ -28,6 +28,7 @@ use datatypes::value::Value;
 use index::inverted_index::search::index_apply::PredicatesIndexApplier;
 use index::inverted_index::search::predicate::Predicate;
 use object_store::ObjectStore;
+use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
 use snafu::{OptionExt, ResultExt};
 use store_api::metadata::RegionMetadata;
 use store_api::storage::ColumnId;
@@ -65,6 +66,9 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> {

    /// Cache for inverted index.
    index_cache: Option<InvertedIndexCacheRef>,
+
+    /// Cache for puffin metadata.
+    puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
 }

 impl<'a> InvertedIndexApplierBuilder<'a> {
@@ -72,8 +76,6 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
    pub fn new(
        region_dir: String,
        object_store: ObjectStore,
-        file_cache: Option<FileCacheRef>,
-        index_cache: Option<InvertedIndexCacheRef>,
        metadata: &'a RegionMetadata,
        indexed_column_ids: HashSet<ColumnId>,
        puffin_manager_factory: PuffinManagerFactory,
@@ -81,15 +83,37 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
        Self {
            region_dir,
            object_store,
-            file_cache,
            metadata,
            indexed_column_ids,
            output: HashMap::default(),
-            index_cache,
            puffin_manager_factory,
+            file_cache: None,
+            index_cache: None,
+            puffin_metadata_cache: None,
        }
    }

+    /// Sets the file cache.
+    pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
+        self.file_cache = file_cache;
+        self
+    }
+
+    /// Sets the puffin metadata cache.
+    pub fn with_puffin_metadata_cache(
+        mut self,
+        puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
+    ) -> Self {
+        self.puffin_metadata_cache = puffin_metadata_cache;
+        self
+    }
+
+    /// Sets the index cache.
+    pub fn with_index_cache(mut self, index_cache: Option<InvertedIndexCacheRef>) -> Self {
+        self.index_cache = index_cache;
+        self
+    }
+
    /// Consumes the builder to construct an [`InvertedIndexApplier`], optionally returned based on
    /// the expressions provided. If no predicates match, returns `None`.
    pub fn build(mut self, exprs: &[Expr]) -> Result<Option<InvertedIndexApplier>> {
@@ -108,15 +132,18 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
            .collect();
        let applier = PredicatesIndexApplier::try_from(predicates);

-        Ok(Some(InvertedIndexApplier::new(
-            self.region_dir,
-            self.metadata.region_id,
-            self.object_store,
-            self.file_cache,
-            self.index_cache,
-            Box::new(applier.context(BuildIndexApplierSnafu)?),
-            self.puffin_manager_factory,
-        )))
+        Ok(Some(
+            InvertedIndexApplier::new(
+                self.region_dir,
+                self.metadata.region_id,
+                self.object_store,
+                Box::new(applier.context(BuildIndexApplierSnafu)?),
+                self.puffin_manager_factory,
+            )
+            .with_file_cache(self.file_cache)
+            .with_puffin_metadata_cache(self.puffin_metadata_cache)
+            .with_index_cache(self.index_cache),
+        ))
    }

    /// Recursively traverses expressions to collect predicates.
@@ -322,8 +349,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs
@@ -75,8 +75,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -118,8 +116,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -144,8 +140,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -187,8 +181,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -214,8 +206,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs
@@ -231,8 +231,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -260,8 +258,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -280,8 +276,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -315,8 +309,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs
@@ -137,8 +137,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -175,8 +173,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -204,8 +200,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -224,8 +218,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -244,8 +236,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -303,8 +293,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -341,8 +329,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs
@@ -68,8 +68,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -101,8 +99,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -126,8 +122,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -159,8 +153,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -186,8 +178,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs
+++ b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs
@@ -62,8 +62,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -91,8 +89,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -120,8 +116,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
@@ -142,8 +136,6 @@ mod tests {
        let mut builder = InvertedIndexApplierBuilder::new(
            "test".to_string(),
            test_object_store(),
-            None,
-            None,
            &metadata,
            HashSet::from_iter([1, 2, 3]),
            facotry,
--- a/src/mito2/src/sst/index/inverted_index/creator.rs
+++ b/src/mito2/src/sst/index/inverted_index/creator.rs
@@ -310,12 +310,14 @@ mod tests {
    use futures::future::BoxFuture;
    use object_store::services::Memory;
    use object_store::ObjectStore;
+    use puffin::puffin_manager::cache::PuffinMetadataCache;
    use puffin::puffin_manager::PuffinManager;
    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
    use store_api::storage::RegionId;

    use super::*;
    use crate::cache::index::InvertedIndexCache;
+    use crate::metrics::CACHE_BYTES;
    use crate::read::BatchColumn;
    use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
    use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
@@ -446,22 +448,23 @@ mod tests {

        move |expr| {
            let _d = &d;
-            let cache = Arc::new(InvertedIndexCache::new(10, 10));
+            let cache = Arc::new(InvertedIndexCache::new(10, 10, 100));
+            let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES));
            let applier = InvertedIndexApplierBuilder::new(
                region_dir.clone(),
                object_store.clone(),
-                None,
-                Some(cache),
                &region_metadata,
                indexed_column_ids.clone(),
                factory.clone(),
            )
+            .with_index_cache(Some(cache))
+            .with_puffin_metadata_cache(Some(puffin_metadata_cache))
            .build(&[expr])
            .unwrap()
            .unwrap();
            Box::pin(async move {
                applier
-                    .apply(sst_file_id)
+                    .apply(sst_file_id, None)
                    .await
                    .unwrap()
                    .matched_segment_ids
--- a/src/mito2/src/sst/index/store.rs
+++ b/src/mito2/src/sst/index/store.rs
@@ -68,6 +68,7 @@ impl InstrumentedStore {
            path: path.to_string(),
            read_byte_count,
            read_count,
+            file_size_hint: None,
        })
    }

@@ -262,15 +263,27 @@ pub(crate) struct InstrumentedRangeReader<'a> {
    path: String,
    read_byte_count: &'a IntCounter,
    read_count: &'a IntCounter,
+    file_size_hint: Option<u64>,
 }

 #[async_trait]
 impl RangeReader for InstrumentedRangeReader<'_> {
+    fn with_file_size_hint(&mut self, file_size_hint: u64) {
+        self.file_size_hint = Some(file_size_hint);
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
-        let stat = self.store.stat(&self.path).await?;
-        Ok(Metadata {
-            content_length: stat.content_length(),
-        })
+        match self.file_size_hint {
+            Some(file_size_hint) => Ok(Metadata {
+                content_length: file_size_hint,
+            }),
+            None => {
+                let stat = self.store.stat(&self.path).await?;
+                Ok(Metadata {
+                    content_length: stat.content_length(),
+                })
+            }
+        }
    }

    async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
--- a/src/mito2/src/sst/parquet.rs
+++ b/src/mito2/src/sst/parquet.rs
@@ -195,11 +195,11 @@ mod tests {
            .unwrap();

        // Enable page cache.
-        let cache = Arc::new(
+        let cache = Some(Arc::new(
            CacheManager::builder()
                .page_cache_size(64 * 1024 * 1024)
                .build(),
-        );
+        ));
        let builder = ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store)
            .cache(cache.clone());
        for _ in 0..3 {
@@ -219,15 +219,15 @@ mod tests {

        // Doesn't have compressed page cached.
        let page_key = PageKey::new_compressed(metadata.region_id, handle.file_id(), 0, 0);
-        assert!(cache.get_pages(&page_key).is_none());
+        assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());

        // Cache 4 row groups.
        for i in 0..4 {
            let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), i, 0);
-            assert!(cache.get_pages(&page_key).is_some());
+            assert!(cache.as_ref().unwrap().get_pages(&page_key).is_some());
        }
        let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), 5, 0);
-        assert!(cache.get_pages(&page_key).is_none());
+        assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());
    }

    #[tokio::test]
--- a/src/mito2/src/sst/parquet/reader.rs
+++ b/src/mito2/src/sst/parquet/reader.rs
@@ -82,7 +82,7 @@ pub struct ParquetReaderBuilder {
    /// can contain columns not in the parquet file.
    projection: Option<Vec<ColumnId>>,
    /// Manager that caches SST data.
-    cache_manager: CacheManagerRef,
+    cache_manager: Option<CacheManagerRef>,
    /// Index appliers.
    inverted_index_applier: Option<InvertedIndexApplierRef>,
    fulltext_index_applier: Option<FulltextIndexApplierRef>,
@@ -106,7 +106,7 @@ impl ParquetReaderBuilder {
            predicate: None,
            time_range: None,
            projection: None,
-            cache_manager: CacheManagerRef::default(),
+            cache_manager: None,
            inverted_index_applier: None,
            fulltext_index_applier: None,
            expected_metadata: None,
@@ -138,7 +138,7 @@ impl ParquetReaderBuilder {

    /// Attaches the cache to the builder.
    #[must_use]
-    pub fn cache(mut self, cache: CacheManagerRef) -> ParquetReaderBuilder {
+    pub fn cache(mut self, cache: Option<CacheManagerRef>) -> ParquetReaderBuilder {
        self.cache_manager = cache;
        self
    }
@@ -313,12 +313,10 @@ impl ParquetReaderBuilder {
        let region_id = self.file_handle.region_id();
        let file_id = self.file_handle.file_id();
        // Tries to get from global cache.
-        if let Some(metadata) = self
-            .cache_manager
-            .get_parquet_meta_data(region_id, file_id)
-            .await
-        {
-            return Ok(metadata);
+        if let Some(manager) = &self.cache_manager {
+            if let Some(metadata) = manager.get_parquet_meta_data(region_id, file_id).await {
+                return Ok(metadata);
+            }
        }

        // Cache miss, load metadata directly.
@@ -326,11 +324,13 @@ impl ParquetReaderBuilder {
        let metadata = metadata_loader.load().await?;
        let metadata = Arc::new(metadata);
        // Cache the metadata.
-        self.cache_manager.put_parquet_meta_data(
-            self.file_handle.region_id(),
-            self.file_handle.file_id(),
-            metadata.clone(),
-        );
+        if let Some(cache) = &self.cache_manager {
+            cache.put_parquet_meta_data(
+                self.file_handle.region_id(),
+                self.file_handle.file_id(),
+                metadata.clone(),
+            );
+        }

        Ok(metadata)
    }
@@ -475,8 +475,11 @@ impl ParquetReaderBuilder {
        if !self.file_handle.meta_ref().inverted_index_available() {
            return false;
        }
-
-        let apply_output = match index_applier.apply(self.file_handle.file_id()).await {
+        let file_size_hint = self.file_handle.meta_ref().inverted_index_size();
+        let apply_output = match index_applier
+            .apply(self.file_handle.file_id(), file_size_hint)
+            .await
+        {
            Ok(output) => output,
            Err(err) => {
                if cfg!(any(test, feature = "test")) {
@@ -846,7 +849,7 @@ pub(crate) struct RowGroupReaderBuilder {
    /// Field levels to read.
    field_levels: FieldLevels,
    /// Cache.
-    cache_manager: CacheManagerRef,
+    cache_manager: Option<CacheManagerRef>,
 }

 impl RowGroupReaderBuilder {
@@ -864,7 +867,7 @@ impl RowGroupReaderBuilder {
        &self.parquet_meta
    }

-    pub(crate) fn cache_manager(&self) -> &CacheManagerRef {
+    pub(crate) fn cache_manager(&self) -> &Option<CacheManagerRef> {
        &self.cache_manager
    }

@@ -915,10 +918,10 @@ enum ReaderState {

 impl ReaderState {
    /// Returns the metrics of the reader.
-    fn metrics(&mut self) -> &ReaderMetrics {
+    fn metrics(&self) -> ReaderMetrics {
        match self {
            ReaderState::Readable(reader) => reader.metrics(),
-            ReaderState::Exhausted(m) => m,
+            ReaderState::Exhausted(m) => m.clone(),
        }
    }
 }
--- a/src/mito2/src/sst/parquet/row_group.rs
+++ b/src/mito2/src/sst/parquet/row_group.rs
@@ -48,7 +48,7 @@ pub struct InMemoryRowGroup<'a> {
    region_id: RegionId,
    file_id: FileId,
    row_group_idx: usize,
-    cache_manager: CacheManagerRef,
+    cache_manager: Option<CacheManagerRef>,
    /// Row group level cached pages for each column.
    ///
    /// These pages are uncompressed pages of a row group.
@@ -69,7 +69,7 @@ impl<'a> InMemoryRowGroup<'a> {
        file_id: FileId,
        parquet_meta: &'a ParquetMetaData,
        row_group_idx: usize,
-        cache_manager: CacheManagerRef,
+        cache_manager: Option<CacheManagerRef>,
        file_path: &'a str,
        object_store: ObjectStore,
    ) -> Self {
@@ -208,18 +208,19 @@ impl<'a> InMemoryRowGroup<'a> {
                };

                let column = self.metadata.column(idx);
-
-                if !cache_uncompressed_pages(column) {
-                    // For columns that have multiple uncompressed pages, we only cache the compressed page
-                    // to save memory.
-                    let page_key = PageKey::new_compressed(
-                        self.region_id,
-                        self.file_id,
-                        self.row_group_idx,
-                        idx,
-                    );
-                    self.cache_manager
-                        .put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
+                if let Some(cache) = &self.cache_manager {
+                    if !cache_uncompressed_pages(column) {
+                        // For columns that have multiple uncompressed pages, we only cache the compressed page
+                        // to save memory.
+                        let page_key = PageKey::new_compressed(
+                            self.region_id,
+                            self.file_id,
+                            self.row_group_idx,
+                            idx,
+                        );
+                        cache
+                            .put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
+                    }
                }

                *chunk = Some(Arc::new(ColumnChunkData::Dense {
@@ -241,6 +242,9 @@ impl<'a> InMemoryRowGroup<'a> {
            .enumerate()
            .filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx))
            .for_each(|(idx, chunk)| {
+                let Some(cache) = &self.cache_manager else {
+                    return;
+                };
                let column = self.metadata.column(idx);
                if cache_uncompressed_pages(column) {
                    // Fetches uncompressed pages for the row group.
@@ -250,7 +254,7 @@ impl<'a> InMemoryRowGroup<'a> {
                        self.row_group_idx,
                        idx,
                    );
-                    self.column_uncompressed_pages[idx] = self.cache_manager.get_pages(&page_key);
+                    self.column_uncompressed_pages[idx] = cache.get_pages(&page_key);
                } else {
                    // Fetches the compressed page from the cache.
                    let page_key = PageKey::new_compressed(
@@ -260,7 +264,7 @@ impl<'a> InMemoryRowGroup<'a> {
                        idx,
                    );

-                    *chunk = self.cache_manager.get_pages(&page_key).map(|page_value| {
+                    *chunk = cache.get_pages(&page_key).map(|page_value| {
                        Arc::new(ColumnChunkData::Dense {
                            offset: column.byte_range().0 as usize,
                            data: page_value.compressed.clone(),
@@ -296,7 +300,7 @@ impl<'a> InMemoryRowGroup<'a> {
        key: IndexKey,
        ranges: &[Range<u64>],
    ) -> Option<Vec<Bytes>> {
-        if let Some(cache) = self.cache_manager.write_cache() {
+        if let Some(cache) = self.cache_manager.as_ref()?.write_cache() {
            return cache.file_cache().read_ranges(key, ranges).await;
        }
        None
@@ -327,6 +331,10 @@ impl<'a> InMemoryRowGroup<'a> {
            }
        };

+        let Some(cache) = &self.cache_manager else {
+            return Ok(Box::new(page_reader));
+        };
+
        let column = self.metadata.column(i);
        if cache_uncompressed_pages(column) {
            // This column use row group level page cache.
@@ -335,7 +343,7 @@ impl<'a> InMemoryRowGroup<'a> {
            let page_value = Arc::new(PageValue::new_row_group(pages));
            let page_key =
                PageKey::new_uncompressed(self.region_id, self.file_id, self.row_group_idx, i);
-            self.cache_manager.put_pages(page_key, page_value.clone());
+            cache.put_pages(page_key, page_value.clone());

            return Ok(Box::new(RowGroupCachedReader::new(&page_value.row_group)));
        }
--- a/src/mito2/src/test_util.rs
+++ b/src/mito2/src/test_util.rs
@@ -35,8 +35,7 @@ use api::v1::{OpType, Row, Rows, SemanticType};
 use common_base::readable_size::ReadableSize;
 use common_base::Plugins;
 use common_datasource::compression::CompressionType;
-use common_meta::cache::{new_schema_cache, new_table_info_cache, new_table_schema_cache};
-use common_meta::key::schema_name::{SchemaName, SchemaNameValue};
+use common_meta::cache::{new_schema_cache, new_table_schema_cache};
 use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
 use common_meta::kv_backend::memory::MemoryKvBackend;
 use common_meta::kv_backend::KvBackendRef;
@@ -49,7 +48,7 @@ use datatypes::schema::ColumnSchema;
 use log_store::kafka::log_store::KafkaLogStore;
 use log_store::raft_engine::log_store::RaftEngineLogStore;
 use log_store::test_util::log_store_util;
-use moka::future::{Cache, CacheBuilder};
+use moka::future::CacheBuilder;
 use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
 use object_store::services::Fs;
 use object_store::ObjectStore;
--- a/src/mito2/src/worker.rs
+++ b/src/mito2/src/worker.rs
@@ -170,6 +170,8 @@ impl WorkerGroup {
                .selector_result_cache_size(config.selector_result_cache_size.as_bytes())
                .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes())
                .index_content_size(config.inverted_index.content_cache_size.as_bytes())
+                .index_content_page_size(config.inverted_index.content_cache_page_size.as_bytes())
+                .puffin_metadata_size(config.index.metadata_cache_size.as_bytes())
                .write_cache(write_cache)
                .build(),
        );
--- a/src/pipeline/benches/processor.rs
+++ b/src/pipeline/benches/processor.rs
@@ -223,7 +223,7 @@ transform:
    type: uint32
 "#;

-    parse(&Content::Yaml(pipeline_yaml.into())).unwrap()
+    parse(&Content::Yaml(pipeline_yaml)).unwrap()
 }

 fn criterion_benchmark(c: &mut Criterion) {
--- a/src/pipeline/src/etl.rs
+++ b/src/pipeline/src/etl.rs
@@ -37,9 +37,9 @@ const PROCESSORS: &str = "processors";
 const TRANSFORM: &str = "transform";
 const TRANSFORMS: &str = "transforms";

-pub enum Content {
-    Json(String),
-    Yaml(String),
+pub enum Content<'a> {
+    Json(&'a str),
+    Yaml(&'a str),
 }

 pub fn parse<T>(input: &Content) -> Result<Pipeline<T>>
@@ -379,8 +379,7 @@ transform:
  - field: field2
    type: uint32
 "#;
-        let pipeline: Pipeline<GreptimeTransformer> =
-            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
        let mut payload = pipeline.init_intermediate_state();
        pipeline.prepare(input_value, &mut payload).unwrap();
        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
@@ -432,8 +431,7 @@ transform:
  - field: ts
    type: timestamp, ns
    index: time"#;
-        let pipeline: Pipeline<GreptimeTransformer> =
-            parse(&Content::Yaml(pipeline_str.into())).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_str)).unwrap();
        let mut payload = pipeline.init_intermediate_state();
        pipeline
            .prepare(serde_json::Value::String(message), &mut payload)
@@ -509,8 +507,7 @@ transform:
    type: uint32
 "#;

-        let pipeline: Pipeline<GreptimeTransformer> =
-            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
        let mut payload = pipeline.init_intermediate_state();
        pipeline.prepare(input_value, &mut payload).unwrap();
        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
@@ -554,8 +551,7 @@ transform:
    index: time
 "#;

-        let pipeline: Pipeline<GreptimeTransformer> =
-            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
        let schema = pipeline.schemas().clone();
        let mut result = pipeline.init_intermediate_state();
        pipeline.prepare(input_value, &mut result).unwrap();
--- a/src/pipeline/src/etl/transform/transformer/greptime.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime.rs
@@ -15,6 +15,7 @@
 pub mod coerce;

 use std::collections::HashSet;
+use std::sync::Arc;

 use ahash::HashMap;
 use api::helper::proto_value_type;
@@ -367,20 +368,15 @@ fn json_value_to_row(
    Ok(Row { values: row })
 }

-/// Identity pipeline for Greptime
-/// This pipeline will convert the input JSON array to Greptime Rows
-/// 1. The pipeline will add a default timestamp column to the schema
-/// 2. The pipeline not resolve NULL value
-/// 3. The pipeline assumes that the json format is fixed
-/// 4. The pipeline will return an error if the same column datatype is mismatched
-/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
-pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
+fn identity_pipeline_inner<'a>(
+    array: Vec<serde_json::Value>,
+    tag_column_names: Option<impl Iterator<Item = &'a String>>,
+) -> Result<Rows> {
    let mut rows = Vec::with_capacity(array.len());
-
-    let mut schema = SchemaInfo::default();
+    let mut schema_info = SchemaInfo::default();
    for value in array {
        if let serde_json::Value::Object(map) = value {
-            let row = json_value_to_row(&mut schema, map)?;
+            let row = json_value_to_row(&mut schema_info, map)?;
            rows.push(row);
        }
    }
@@ -395,7 +391,7 @@ pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
    let ts = GreptimeValue {
        value_data: Some(ValueData::TimestampNanosecondValue(ns)),
    };
-    let column_count = schema.schema.len();
+    let column_count = schema_info.schema.len();
    for row in rows.iter_mut() {
        let diff = column_count - row.values.len();
        for _ in 0..diff {
@@ -403,15 +399,49 @@ pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
        }
        row.values.push(ts.clone());
    }
-    schema.schema.push(greptime_timestamp_schema);
+    schema_info.schema.push(greptime_timestamp_schema);
+
+    // set the semantic type of the row key column to Tag
+    if let Some(tag_column_names) = tag_column_names {
+        tag_column_names.for_each(|tag_column_name| {
+            if let Some(index) = schema_info.index.get(tag_column_name) {
+                schema_info.schema[*index].semantic_type = SemanticType::Tag as i32;
+            }
+        });
+    }
    Ok(Rows {
-        schema: schema.schema,
+        schema: schema_info.schema,
        rows,
    })
 }

+/// Identity pipeline for Greptime
+/// This pipeline will convert the input JSON array to Greptime Rows
+/// params table is used to set the semantic type of the row key column to Tag
+/// 1. The pipeline will add a default timestamp column to the schema
+/// 2. The pipeline not resolve NULL value
+/// 3. The pipeline assumes that the json format is fixed
+/// 4. The pipeline will return an error if the same column datatype is mismatched
+/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
+pub fn identity_pipeline(
+    array: Vec<serde_json::Value>,
+    table: Option<Arc<table::Table>>,
+) -> Result<Rows> {
+    match table {
+        Some(table) => {
+            let table_info = table.table_info();
+            let tag_column_names = table_info.meta.row_key_column_names();
+            identity_pipeline_inner(array, Some(tag_column_names))
+        }
+        None => identity_pipeline_inner(array, None::<std::iter::Empty<&String>>),
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use api::v1::SemanticType;
+
+    use crate::etl::transform::transformer::greptime::identity_pipeline_inner;
    use crate::identity_pipeline;

    #[test]
@@ -437,7 +467,7 @@ mod tests {
                    "gaga": "gaga"
                }),
            ];
-            let rows = identity_pipeline(array);
+            let rows = identity_pipeline(array, None);
            assert!(rows.is_err());
            assert_eq!(
                rows.err().unwrap().to_string(),
@@ -465,7 +495,7 @@ mod tests {
                    "gaga": "gaga"
                }),
            ];
-            let rows = identity_pipeline(array);
+            let rows = identity_pipeline(array, None);
            assert!(rows.is_err());
            assert_eq!(
                rows.err().unwrap().to_string(),
@@ -493,7 +523,7 @@ mod tests {
                    "gaga": "gaga"
                }),
            ];
-            let rows = identity_pipeline(array);
+            let rows = identity_pipeline(array, None);
            assert!(rows.is_ok());
            let rows = rows.unwrap();
            assert_eq!(rows.schema.len(), 8);
@@ -501,5 +531,58 @@ mod tests {
            assert_eq!(8, rows.rows[0].values.len());
            assert_eq!(8, rows.rows[1].values.len());
        }
+        {
+            let array = vec![
+                serde_json::json!({
+                    "woshinull": null,
+                    "name": "Alice",
+                    "age": 20,
+                    "is_student": true,
+                    "score": 99.5,
+                    "hobbies": "reading",
+                    "address": "Beijing",
+                }),
+                serde_json::json!({
+                    "name": "Bob",
+                    "age": 21,
+                    "is_student": false,
+                    "score": 88.5,
+                    "hobbies": "swimming",
+                    "address": "Shanghai",
+                    "gaga": "gaga"
+                }),
+            ];
+            let tag_column_names = ["name".to_string(), "address".to_string()];
+            let rows = identity_pipeline_inner(array, Some(tag_column_names.iter()));
+            assert!(rows.is_ok());
+            let rows = rows.unwrap();
+            assert_eq!(rows.schema.len(), 8);
+            assert_eq!(rows.rows.len(), 2);
+            assert_eq!(8, rows.rows[0].values.len());
+            assert_eq!(8, rows.rows[1].values.len());
+            assert_eq!(
+                rows.schema
+                    .iter()
+                    .find(|x| x.column_name == "name")
+                    .unwrap()
+                    .semantic_type,
+                SemanticType::Tag as i32
+            );
+            assert_eq!(
+                rows.schema
+                    .iter()
+                    .find(|x| x.column_name == "address")
+                    .unwrap()
+                    .semantic_type,
+                SemanticType::Tag as i32
+            );
+            assert_eq!(
+                rows.schema
+                    .iter()
+                    .filter(|x| x.semantic_type == SemanticType::Tag as i32)
+                    .count(),
+                2
+            );
+        }
    }
 }
--- a/src/pipeline/src/manager/pipeline_operator.rs
+++ b/src/pipeline/src/manager/pipeline_operator.rs
@@ -243,4 +243,9 @@ impl PipelineOperator {
            })
            .await
    }
+
+    /// Compile a pipeline.
+    pub fn build_pipeline(pipeline: &str) -> Result<Pipeline<GreptimeTransformer>> {
+        PipelineTable::compile_pipeline(pipeline)
+    }
 }
--- a/src/pipeline/src/manager/table.rs
+++ b/src/pipeline/src/manager/table.rs
@@ -203,7 +203,7 @@ impl PipelineTable {

    /// Compile a pipeline from a string.
    pub fn compile_pipeline(pipeline: &str) -> Result<Pipeline<GreptimeTransformer>> {
-        let yaml_content = Content::Yaml(pipeline.into());
+        let yaml_content = Content::Yaml(pipeline);
        parse::<GreptimeTransformer>(&yaml_content).context(CompilePipelineSnafu)
    }

--- a/src/pipeline/tests/common.rs
+++ b/src/pipeline/tests/common.rs
@@ -19,7 +19,7 @@ use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
 pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
    let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();

-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> =
        parse(&yaml_content).expect("failed to parse pipeline");
    let mut result = pipeline.init_intermediate_state();
--- a/src/pipeline/tests/dissect.rs
+++ b/src/pipeline/tests/dissect.rs
@@ -270,7 +270,7 @@ transform:

    let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();

-    let yaml_content = pipeline::Content::Yaml(pipeline_yaml.into());
+    let yaml_content = pipeline::Content::Yaml(pipeline_yaml);
    let pipeline: pipeline::Pipeline<pipeline::GreptimeTransformer> =
        pipeline::parse(&yaml_content).expect("failed to parse pipeline");
    let mut result = pipeline.init_intermediate_state();
--- a/src/pipeline/tests/pipeline.rs
+++ b/src/pipeline/tests/pipeline.rs
@@ -417,7 +417,7 @@ transform:
    .map(|(_, d)| GreptimeValue { value_data: d })
    .collect::<Vec<GreptimeValue>>();

-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> =
        parse(&yaml_content).expect("failed to parse pipeline");
    let mut stats = pipeline.init_intermediate_state();
@@ -487,7 +487,7 @@ transform:
      type: json
 "#;

-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();

    let mut status = pipeline.init_intermediate_state();
@@ -592,7 +592,7 @@ transform:
      type: json
 "#;

-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();

    let mut status = pipeline.init_intermediate_state();
@@ -655,7 +655,7 @@ transform:
    index: timestamp
 "#;

-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();

    let mut status = pipeline.init_intermediate_state();
@@ -691,7 +691,7 @@ transform:
      - message
    type: string
 "#;
-    let yaml_content = Content::Yaml(pipeline_yaml.into());
+    let yaml_content = Content::Yaml(pipeline_yaml);
    let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();

    let mut status = pipeline.init_intermediate_state();
--- a/src/puffin/Cargo.toml
+++ b/src/puffin/Cargo.toml
@@ -25,6 +25,7 @@ futures.workspace = true
 lz4_flex = "0.11"
 moka = { workspace = true, features = ["future", "sync"] }
 pin-project.workspace = true
+prometheus.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 sha2 = "0.10.8"
--- a/src/puffin/src/blob_metadata.rs
+++ b/src/puffin/src/blob_metadata.rs
@@ -68,6 +68,20 @@ pub struct BlobMetadata {
    pub properties: HashMap<String, String>,
 }

+impl BlobMetadata {
+    /// Calculates the memory usage of the blob metadata in bytes.
+    pub fn memory_usage(&self) -> usize {
+        self.blob_type.len()
+            + self.input_fields.len() * std::mem::size_of::<i32>()
+            + self
+                .properties
+                .iter()
+                .map(|(k, v)| k.len() + v.len())
+                .sum::<usize>()
+            + std::mem::size_of::<Self>()
+    }
+}
+
 /// Compression codec used to compress the blob
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
--- a/src/puffin/src/error.rs
+++ b/src/puffin/src/error.rs
@@ -25,14 +25,6 @@ use snafu::{Location, Snafu};
 #[snafu(visibility(pub))]
 #[stack_trace_debug]
 pub enum Error {
-    #[snafu(display("Failed to seek"))]
-    Seek {
-        #[snafu(source)]
-        error: IoError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to read"))]
    Read {
        #[snafu(source)]
@@ -119,14 +111,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Failed to convert bytes to integer"))]
-    BytesToInteger {
-        #[snafu(source)]
-        error: std::array::TryFromSliceError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Unsupported decompression: {}", decompression))]
    UnsupportedDecompression {
        decompression: String,
@@ -150,17 +134,15 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Parse stage not match, expected: {}, actual: {}", expected, actual))]
-    ParseStageNotMatch {
-        expected: String,
-        actual: String,
+    #[snafu(display("Unexpected footer payload size: {}", size))]
+    UnexpectedFooterPayloadSize {
+        size: i32,
        #[snafu(implicit)]
        location: Location,
    },

-    #[snafu(display("Unexpected footer payload size: {}", size))]
-    UnexpectedFooterPayloadSize {
-        size: i32,
+    #[snafu(display("Invalid puffin footer"))]
+    InvalidPuffinFooter {
        #[snafu(implicit)]
        location: Location,
    },
@@ -177,20 +159,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Invalid blob offset: {}, location: {:?}", offset, location))]
-    InvalidBlobOffset {
-        offset: i64,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Invalid blob area end: {}, location: {:?}", offset, location))]
-    InvalidBlobAreaEnd {
-        offset: u64,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to compress lz4"))]
    Lz4Compression {
        #[snafu(source)]
@@ -262,8 +230,7 @@ impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        use Error::*;
        match self {
-            Seek { .. }
-            | Read { .. }
+            Read { .. }
            | MagicNotMatched { .. }
            | DeserializeJson { .. }
            | Write { .. }
@@ -275,18 +242,15 @@ impl ErrorExt for Error {
            | Remove { .. }
            | Rename { .. }
            | SerializeJson { .. }
-            | BytesToInteger { .. }
-            | ParseStageNotMatch { .. }
            | UnexpectedFooterPayloadSize { .. }
            | UnexpectedPuffinFileSize { .. }
-            | InvalidBlobOffset { .. }
-            | InvalidBlobAreaEnd { .. }
            | Lz4Compression { .. }
            | Lz4Decompression { .. }
            | BlobNotFound { .. }
            | BlobIndexOutOfBound { .. }
            | FileKeyNotMatch { .. }
-            | WalkDir { .. } => StatusCode::Unexpected,
+            | WalkDir { .. }
+            | InvalidPuffinFooter { .. } => StatusCode::Unexpected,

            UnsupportedCompression { .. } | UnsupportedDecompression { .. } => {
                StatusCode::Unsupported
--- a/src/puffin/src/file_format/reader.rs
+++ b/src/puffin/src/file_format/reader.rs
@@ -21,21 +21,9 @@ use common_base::range_read::RangeReader;
 use crate::blob_metadata::BlobMetadata;
 use crate::error::Result;
 pub use crate::file_format::reader::file::PuffinFileReader;
+pub use crate::file_format::reader::footer::PuffinFileFooterReader;
 use crate::file_metadata::FileMetadata;

-/// `SyncReader` defines a synchronous reader for puffin data.
-pub trait SyncReader<'a> {
-    type Reader: std::io::Read + std::io::Seek;
-
-    /// Fetches the FileMetadata.
-    fn metadata(&'a mut self) -> Result<FileMetadata>;
-
-    /// Reads particular blob data based on given metadata.
-    ///
-    /// Data read from the reader is compressed leaving the caller to decompress the data.
-    fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result<Self::Reader>;
-}
-
 /// `AsyncReader` defines an asynchronous reader for puffin data.
 #[async_trait]
 pub trait AsyncReader<'a> {
--- a/src/puffin/src/file_format/reader/file.rs
+++ b/src/puffin/src/file_format/reader/file.rs
@@ -12,20 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::io::{self, SeekFrom};
-
 use async_trait::async_trait;
 use common_base::range_read::RangeReader;
 use snafu::{ensure, ResultExt};

 use crate::blob_metadata::BlobMetadata;
-use crate::error::{
-    MagicNotMatchedSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedPuffinFileSizeSnafu,
-    UnsupportedDecompressionSnafu,
-};
-use crate::file_format::reader::footer::FooterParser;
-use crate::file_format::reader::{AsyncReader, SyncReader};
-use crate::file_format::{MAGIC, MAGIC_SIZE, MIN_FILE_SIZE};
+use crate::error::{ReadSnafu, Result, UnexpectedPuffinFileSizeSnafu};
+use crate::file_format::reader::footer::DEFAULT_PREFETCH_SIZE;
+use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader};
+use crate::file_format::MIN_FILE_SIZE;
 use crate::file_metadata::FileMetadata;
 use crate::partial_reader::PartialReader;

@@ -51,6 +46,11 @@ impl<R> PuffinFileReader<R> {
        }
    }

+    pub fn with_metadata(mut self, metadata: Option<FileMetadata>) -> Self {
+        self.metadata = metadata;
+        self
+    }
+
    fn validate_file_size(file_size: u64) -> Result<()> {
        ensure!(
            file_size >= MIN_FILE_SIZE,
@@ -72,45 +72,6 @@ impl<R> PuffinFileReader<R> {
    }
 }

-impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader<R> {
-    type Reader = PartialReader<&'a mut R>;
-
-    fn metadata(&mut self) -> Result<FileMetadata> {
-        if let Some(metadata) = &self.metadata {
-            return Ok(metadata.clone());
-        }
-
-        // check the magic
-        let mut magic = [0; MAGIC_SIZE as usize];
-        self.source.read_exact(&mut magic).context(ReadSnafu)?;
-        ensure!(magic == MAGIC, MagicNotMatchedSnafu);
-
-        let file_size = self.get_file_size_sync()?;
-
-        // parse the footer
-        let metadata = FooterParser::new(&mut self.source, file_size).parse_sync()?;
-        self.metadata = Some(metadata.clone());
-        Ok(metadata)
-    }
-
-    fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result<Self::Reader> {
-        // TODO(zhongzc): support decompression
-        let compression = blob_metadata.compression_codec.as_ref();
-        ensure!(
-            compression.is_none(),
-            UnsupportedDecompressionSnafu {
-                decompression: compression.unwrap().to_string()
-            }
-        );
-
-        Ok(PartialReader::new(
-            &mut self.source,
-            blob_metadata.offset as _,
-            blob_metadata.length as _,
-        ))
-    }
-}
-
 #[async_trait]
 impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
    type Reader = PartialReader<&'a mut R>;
@@ -119,17 +80,10 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
        if let Some(metadata) = &self.metadata {
            return Ok(metadata.clone());
        }
-
-        // check the magic
-        let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?;
-        ensure!(*magic == MAGIC, MagicNotMatchedSnafu);
-
        let file_size = self.get_file_size_async().await?;
-
-        // parse the footer
-        let metadata = FooterParser::new(&mut self.source, file_size)
-            .parse_async()
-            .await?;
+        let mut reader = PuffinFileFooterReader::new(&mut self.source, file_size)
+            .with_prefetch_size(DEFAULT_PREFETCH_SIZE);
+        let metadata = reader.metadata().await?;
        self.metadata = Some(metadata.clone());
        Ok(metadata)
    }
@@ -143,14 +97,6 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
    }
 }

-impl<R: io::Read + io::Seek> PuffinFileReader<R> {
-    fn get_file_size_sync(&mut self) -> Result<u64> {
-        let file_size = self.source.seek(SeekFrom::End(0)).context(SeekSnafu)?;
-        Self::validate_file_size(file_size)?;
-        Ok(file_size)
-    }
-}
-
 impl<R: RangeReader> PuffinFileReader<R> {
    async fn get_file_size_async(&mut self) -> Result<u64> {
        let file_size = self
--- a/src/puffin/src/file_format/reader/footer.rs
+++ b/src/puffin/src/file_format/reader/footer.rs
@@ -12,240 +12,98 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::io::{self, Cursor, SeekFrom};
+use std::io::Cursor;

 use common_base::range_read::RangeReader;
 use snafu::{ensure, ResultExt};

 use crate::error::{
-    BytesToIntegerSnafu, DeserializeJsonSnafu, InvalidBlobAreaEndSnafu, InvalidBlobOffsetSnafu,
-    Lz4DecompressionSnafu, MagicNotMatchedSnafu, ParseStageNotMatchSnafu, ReadSnafu, Result,
-    SeekSnafu, UnexpectedFooterPayloadSizeSnafu,
+    DeserializeJsonSnafu, InvalidPuffinFooterSnafu, Lz4DecompressionSnafu, MagicNotMatchedSnafu,
+    ReadSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
 };
 use crate::file_format::{Flags, FLAGS_SIZE, MAGIC, MAGIC_SIZE, MIN_FILE_SIZE, PAYLOAD_SIZE_SIZE};
 use crate::file_metadata::FileMetadata;

-/// Parser for the footer of a Puffin data file
+/// The default prefetch size for the footer reader.
+pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
+
+/// Reader for the footer of a Puffin data file
 ///
 /// The footer has a specific layout that needs to be read and parsed to
 /// extract metadata about the file, which is encapsulated in the [`FileMetadata`] type.
 ///
+/// This reader supports prefetching, allowing for more efficient reading
+/// of the footer by fetching additional data ahead of time.
+///
 /// ```text
 /// Footer layout: HeadMagic Payload PayloadSize Flags FootMagic
 ///                [4]       [?]     [4]         [4]   [4]
 /// ```
-pub struct FooterParser<R> {
-    // The underlying IO source
+pub struct PuffinFileFooterReader<R> {
+    /// The source of the puffin file
    source: R,
-
-    // The size of the file, used for calculating offsets to read from
+    /// The content length of the puffin file
    file_size: u64,
+    /// The prefetch footer size
+    prefetch_size: Option<u64>,
 }

-impl<R> FooterParser<R> {
-    pub fn new(source: R, file_size: u64) -> Self {
-        Self { source, file_size }
-    }
-}
-
-impl<R: io::Read + io::Seek> FooterParser<R> {
-    /// Parses the footer from the IO source in a synchronous manner.
-    pub fn parse_sync(&mut self) -> Result<FileMetadata> {
-        let mut parser = StageParser::new(self.file_size);
-
-        let mut buf = vec![];
-        while let Some(byte_to_read) = parser.next_to_read() {
-            self.source
-                .seek(SeekFrom::Start(byte_to_read.offset))
-                .context(SeekSnafu)?;
-            let size = byte_to_read.size as usize;
-
-            buf.resize(size, 0);
-            let buf = &mut buf[..size];
-
-            self.source.read_exact(buf).context(ReadSnafu)?;
-
-            parser.consume_bytes(buf)?;
+impl<'a, R: RangeReader + 'a> PuffinFileFooterReader<R> {
+    pub fn new(source: R, content_len: u64) -> Self {
+        Self {
+            source,
+            file_size: content_len,
+            prefetch_size: None,
        }
-
-        parser.finish()
    }
-}

-impl<R: RangeReader> FooterParser<R> {
-    /// Parses the footer from the IO source in a asynchronous manner.
-    pub async fn parse_async(&mut self) -> Result<FileMetadata> {
-        let mut parser = StageParser::new(self.file_size);
+    fn prefetch_size(&self) -> u64 {
+        self.prefetch_size.unwrap_or(MIN_FILE_SIZE)
+    }

-        let mut buf = vec![];
-        while let Some(byte_to_read) = parser.next_to_read() {
-            buf.clear();
-            let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size;
-            self.source
-                .read_into(range, &mut buf)
+    pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self {
+        self.prefetch_size = Some(prefetch_size.max(MIN_FILE_SIZE));
+        self
+    }
+
+    pub async fn metadata(&'a mut self) -> Result<FileMetadata> {
+        // Note: prefetch > content_len is allowed, since we're using saturating_sub.
+        let footer_start = self.file_size.saturating_sub(self.prefetch_size());
+        let suffix = self
+            .source
+            .read(footer_start..self.file_size)
+            .await
+            .context(ReadSnafu)?;
+        let suffix_len = suffix.len();
+
+        // check the magic
+        let magic = Self::read_tailing_four_bytes(&suffix)?;
+        ensure!(magic == MAGIC, MagicNotMatchedSnafu);
+
+        let flags = self.decode_flags(&suffix[..suffix_len - MAGIC_SIZE as usize])?;
+        let length = self.decode_payload_size(
+            &suffix[..suffix_len - MAGIC_SIZE as usize - FLAGS_SIZE as usize],
+        )?;
+        let footer_size = PAYLOAD_SIZE_SIZE + FLAGS_SIZE + MAGIC_SIZE;
+
+        // Did not fetch the entire file metadata in the initial read, need to make a second request.
+        if length > suffix_len as u64 - footer_size {
+            let metadata_start = self.file_size - length - footer_size;
+            let meta = self
+                .source
+                .read(metadata_start..self.file_size - footer_size)
                .await
                .context(ReadSnafu)?;
-            parser.consume_bytes(&buf)?;
-        }
-
-        parser.finish()
-    }
-}
-
-/// The internal stages of parsing the footer.
-/// This enum allows the StageParser to keep track of which part
-/// of the footer needs to be parsed next.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-enum ParseStage {
-    FootMagic,
-    Flags,
-    PayloadSize,
-    Payload,
-    HeadMagic,
-    Done,
-}
-
-/// Manages the parsing process of the file's footer.
-struct StageParser {
-    /// Current stage in the parsing sequence of the footer.
-    stage: ParseStage,
-
-    /// Total file size; used for calculating offsets to read from.
-    file_size: u64,
-
-    /// Flags from the footer, set when the `Flags` field is parsed.
-    flags: Flags,
-
-    /// Size of the footer's payload, set when the `PayloadSize` is parsed.
-    payload_size: u64,
-
-    /// Metadata from the footer's payload, set when the `Payload` is parsed.
-    metadata: Option<FileMetadata>,
-}
-
-/// Represents a read operation that needs to be performed, including the
-/// offset from the start of the file and the number of bytes to read.
-struct BytesToRead {
-    offset: u64,
-    size: u64,
-}
-
-impl StageParser {
-    fn new(file_size: u64) -> Self {
-        Self {
-            stage: ParseStage::FootMagic,
-            file_size,
-            payload_size: 0,
-            flags: Flags::empty(),
-            metadata: None,
+            self.parse_payload(&flags, &meta)
+        } else {
+            let metadata_start = self.file_size - length - footer_size - footer_start;
+            let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
+            self.parse_payload(&flags, meta)
        }
    }

-    /// Determines the next segment of bytes to read based on the current parsing stage.
-    /// This method returns information like the offset and size of the next read,
-    /// or None if parsing is complete.
-    fn next_to_read(&self) -> Option<BytesToRead> {
-        if self.stage == ParseStage::Done {
-            return None;
-        }
-
-        let btr = match self.stage {
-            ParseStage::FootMagic => BytesToRead {
-                offset: self.foot_magic_offset(),
-                size: MAGIC_SIZE,
-            },
-            ParseStage::Flags => BytesToRead {
-                offset: self.flags_offset(),
-                size: FLAGS_SIZE,
-            },
-            ParseStage::PayloadSize => BytesToRead {
-                offset: self.payload_size_offset(),
-                size: PAYLOAD_SIZE_SIZE,
-            },
-            ParseStage::Payload => BytesToRead {
-                offset: self.payload_offset(),
-                size: self.payload_size,
-            },
-            ParseStage::HeadMagic => BytesToRead {
-                offset: self.head_magic_offset(),
-                size: MAGIC_SIZE,
-            },
-            ParseStage::Done => unreachable!(),
-        };
-
-        Some(btr)
-    }
-
-    /// Processes the bytes that have been read according to the current parsing stage
-    /// and advances the parsing stage. It ensures the correct sequence of bytes is
-    /// encountered and stores the necessary information in the `StageParser`.
-    fn consume_bytes(&mut self, bytes: &[u8]) -> Result<()> {
-        match self.stage {
-            ParseStage::FootMagic => {
-                ensure!(bytes == MAGIC, MagicNotMatchedSnafu);
-                self.stage = ParseStage::Flags;
-            }
-            ParseStage::Flags => {
-                self.flags = Self::parse_flags(bytes)?;
-                self.stage = ParseStage::PayloadSize;
-            }
-            ParseStage::PayloadSize => {
-                self.payload_size = Self::parse_payload_size(bytes)?;
-                self.validate_payload_size()?;
-                self.stage = ParseStage::Payload;
-            }
-            ParseStage::Payload => {
-                self.metadata = Some(self.parse_payload(bytes)?);
-                self.validate_metadata()?;
-                self.stage = ParseStage::HeadMagic;
-            }
-            ParseStage::HeadMagic => {
-                ensure!(bytes == MAGIC, MagicNotMatchedSnafu);
-                self.stage = ParseStage::Done;
-            }
-            ParseStage::Done => unreachable!(),
-        }
-
-        Ok(())
-    }
-
-    /// Finalizes the parsing process, ensuring all stages are complete, and returns
-    /// the parsed `FileMetadata`. It converts the raw footer payload into structured data.
-    fn finish(self) -> Result<FileMetadata> {
-        ensure!(
-            self.stage == ParseStage::Done,
-            ParseStageNotMatchSnafu {
-                expected: format!("{:?}", ParseStage::Done),
-                actual: format!("{:?}", self.stage),
-            }
-        );
-
-        Ok(self.metadata.unwrap())
-    }
-
-    fn parse_flags(bytes: &[u8]) -> Result<Flags> {
-        let n = u32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?);
-        Ok(Flags::from_bits_truncate(n))
-    }
-
-    fn parse_payload_size(bytes: &[u8]) -> Result<u64> {
-        let n = i32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?);
-        ensure!(n >= 0, UnexpectedFooterPayloadSizeSnafu { size: n });
-        Ok(n as u64)
-    }
-
-    fn validate_payload_size(&self) -> Result<()> {
-        ensure!(
-            self.payload_size <= self.file_size - MIN_FILE_SIZE,
-            UnexpectedFooterPayloadSizeSnafu {
-                size: self.payload_size as i32
-            }
-        );
-        Ok(())
-    }
-
-    fn parse_payload(&self, bytes: &[u8]) -> Result<FileMetadata> {
-        if self.flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) {
+    fn parse_payload(&self, flags: &Flags, bytes: &[u8]) -> Result<FileMetadata> {
+        if flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) {
            let decoder = lz4_flex::frame::FrameDecoder::new(Cursor::new(bytes));
            let res = serde_json::from_reader(decoder).context(Lz4DecompressionSnafu)?;
            Ok(res)
@@ -254,54 +112,35 @@ impl StageParser {
        }
    }

-    fn validate_metadata(&self) -> Result<()> {
-        let metadata = self.metadata.as_ref().expect("metadata is not set");
+    fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
+        let suffix_len = suffix.len();
+        ensure!(suffix_len >= 4, InvalidPuffinFooterSnafu);
+        let mut bytes = [0; 4];
+        bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);

-        let mut next_blob_offset = MAGIC_SIZE;
-        // check blob offsets
-        for blob in &metadata.blobs {
-            ensure!(
-                blob.offset as u64 == next_blob_offset,
-                InvalidBlobOffsetSnafu {
-                    offset: blob.offset
-                }
-            );
-            next_blob_offset += blob.length as u64;
-        }
+        Ok(bytes)
+    }
+
+    fn decode_flags(&self, suffix: &[u8]) -> Result<Flags> {
+        let flags = u32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?);
+        Ok(Flags::from_bits_truncate(flags))
+    }
+
+    fn decode_payload_size(&self, suffix: &[u8]) -> Result<u64> {
+        let payload_size = i32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?);

-        let blob_area_end = metadata
-            .blobs
-            .last()
-            .map_or(MAGIC_SIZE, |b| (b.offset + b.length) as u64);
        ensure!(
-            blob_area_end == self.head_magic_offset(),
-            InvalidBlobAreaEndSnafu {
-                offset: blob_area_end
+            payload_size >= 0,
+            UnexpectedFooterPayloadSizeSnafu { size: payload_size }
+        );
+        let payload_size = payload_size as u64;
+        ensure!(
+            payload_size <= self.file_size - MIN_FILE_SIZE,
+            UnexpectedFooterPayloadSizeSnafu {
+                size: self.file_size as i32
            }
        );

-        Ok(())
-    }
-
-    fn foot_magic_offset(&self) -> u64 {
-        self.file_size - MAGIC_SIZE
-    }
-
-    fn flags_offset(&self) -> u64 {
-        self.file_size - MAGIC_SIZE - FLAGS_SIZE
-    }
-
-    fn payload_size_offset(&self) -> u64 {
-        self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE
-    }
-
-    fn payload_offset(&self) -> u64 {
-        // `validate_payload_size` ensures that this subtraction will not overflow
-        self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size
-    }
-
-    fn head_magic_offset(&self) -> u64 {
-        // `validate_payload_size` ensures that this subtraction will not overflow
-        self.file_size - MAGIC_SIZE * 2 - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size
+        Ok(payload_size)
    }
 }
--- a/src/puffin/src/file_metadata.rs
+++ b/src/puffin/src/file_metadata.rs
@@ -33,6 +33,22 @@ pub struct FileMetadata {
    pub properties: HashMap<String, String>,
 }

+impl FileMetadata {
+    /// Calculates the memory usage of the file metadata in bytes.
+    pub fn memory_usage(&self) -> usize {
+        self.blobs
+            .iter()
+            .map(|blob| blob.memory_usage())
+            .sum::<usize>()
+            + self
+                .properties
+                .iter()
+                .map(|(k, v)| k.len() + v.len())
+                .sum::<usize>()
+            + std::mem::size_of::<Self>()
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::collections::HashMap;
--- a/src/puffin/src/partial_reader/async.rs
+++ b/src/puffin/src/partial_reader/async.rs
@@ -23,6 +23,10 @@ use crate::partial_reader::PartialReader;

 #[async_trait]
 impl<R: RangeReader> RangeReader for PartialReader<R> {
+    fn with_file_size_hint(&mut self, _file_size_hint: u64) {
+        // do nothing
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
        Ok(Metadata {
            content_length: self.size,
--- a/src/puffin/src/puffin_manager.rs
+++ b/src/puffin/src/puffin_manager.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+pub mod cache;
 pub mod file_accessor;
 pub mod fs_puffin_manager;
 pub mod stager;
@@ -72,11 +73,12 @@ pub struct PutOptions {

 /// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file.
 #[async_trait]
-#[auto_impl::auto_impl(Arc)]
 pub trait PuffinReader {
    type Blob: BlobGuard;
    type Dir: DirGuard;

+    fn with_file_size_hint(self, file_size_hint: Option<u64>) -> Self;
+
    /// Reads a blob from the Puffin file.
    ///
    /// The returned `BlobGuard` is used to access the blob data.
--- a/src/puffin/src/puffin_manager/cache.rs
+++ b/src/puffin/src/puffin_manager/cache.rs
@@ -0,0 +1,60 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use prometheus::IntGaugeVec;
+
+use crate::file_metadata::FileMetadata;
+/// Metrics for index metadata.
+const PUFFIN_METADATA_TYPE: &str = "puffin_metadata";
+
+pub type PuffinMetadataCacheRef = Arc<PuffinMetadataCache>;
+
+/// A cache for storing the metadata of the index files.
+pub struct PuffinMetadataCache {
+    cache: moka::sync::Cache<String, Arc<FileMetadata>>,
+}
+
+fn puffin_metadata_weight(k: &String, v: &Arc<FileMetadata>) -> u32 {
+    (k.as_bytes().len() + v.memory_usage()) as u32
+}
+
+impl PuffinMetadataCache {
+    pub fn new(capacity: u64, cache_bytes: &'static IntGaugeVec) -> Self {
+        common_telemetry::debug!("Building PuffinMetadataCache with capacity: {capacity}");
+        Self {
+            cache: moka::sync::CacheBuilder::new(capacity)
+                .name("puffin_metadata")
+                .weigher(puffin_metadata_weight)
+                .eviction_listener(|k, v, _cause| {
+                    let size = puffin_metadata_weight(&k, &v);
+                    cache_bytes
+                        .with_label_values(&[PUFFIN_METADATA_TYPE])
+                        .sub(size.into());
+                })
+                .build(),
+        }
+    }
+
+    /// Gets the metadata from the cache.
+    pub fn get_metadata(&self, file_id: &str) -> Option<Arc<FileMetadata>> {
+        self.cache.get(file_id)
+    }
+
+    /// Puts the metadata into the cache.
+    pub fn put_metadata(&self, file_id: String, metadata: Arc<FileMetadata>) {
+        self.cache.insert(file_id, metadata);
+    }
+}
--- a/src/puffin/src/puffin_manager/fs_puffin_manager.rs
+++ b/src/puffin/src/puffin_manager/fs_puffin_manager.rs
@@ -21,6 +21,7 @@ pub use reader::FsPuffinReader;
 pub use writer::FsPuffinWriter;

 use crate::error::Result;
+use crate::puffin_manager::cache::PuffinMetadataCacheRef;
 use crate::puffin_manager::file_accessor::PuffinFileAccessor;
 use crate::puffin_manager::stager::Stager;
 use crate::puffin_manager::PuffinManager;
@@ -31,16 +32,29 @@ pub struct FsPuffinManager<S, F> {
    stager: S,
    /// The puffin file accessor.
    puffin_file_accessor: F,
+    /// The puffin metadata cache.
+    puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
 }

 impl<S, F> FsPuffinManager<S, F> {
-    /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`.
+    /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`,
+    /// and optionally with a `puffin_metadata_cache`.
    pub fn new(stager: S, puffin_file_accessor: F) -> Self {
        Self {
            stager,
            puffin_file_accessor,
+            puffin_metadata_cache: None,
        }
    }
+
+    /// Sets the puffin metadata cache.
+    pub fn with_puffin_metadata_cache(
+        mut self,
+        puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
+    ) -> Self {
+        self.puffin_metadata_cache = puffin_metadata_cache;
+        self
+    }
 }

 #[async_trait]
@@ -57,6 +71,7 @@ where
            puffin_file_name.to_string(),
            self.stager.clone(),
            self.puffin_file_accessor.clone(),
+            self.puffin_metadata_cache.clone(),
        ))
    }

--- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs
+++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs
@@ -14,6 +14,7 @@

 use std::io;
 use std::ops::Range;
+use std::sync::Arc;

 use async_compression::futures::bufread::ZstdDecoder;
 use async_trait::async_trait;
@@ -23,12 +24,14 @@ use futures::io::BufReader;
 use futures::{AsyncRead, AsyncWrite};
 use snafu::{ensure, OptionExt, ResultExt};

+use super::PuffinMetadataCacheRef;
 use crate::blob_metadata::{BlobMetadata, CompressionCodec};
 use crate::error::{
    BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu,
    MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
 };
 use crate::file_format::reader::{AsyncReader, PuffinFileReader};
+use crate::file_metadata::FileMetadata;
 use crate::partial_reader::PartialReader;
 use crate::puffin_manager::file_accessor::PuffinFileAccessor;
 use crate::puffin_manager::fs_puffin_manager::dir_meta::DirMetadata;
@@ -40,19 +43,32 @@ pub struct FsPuffinReader<S, F> {
    /// The name of the puffin file.
    puffin_file_name: String,

+    /// The file size hint.
+    file_size_hint: Option<u64>,
+
    /// The stager.
    stager: S,

    /// The puffin file accessor.
    puffin_file_accessor: F,
+
+    /// The puffin file metadata cache.
+    puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
 }

 impl<S, F> FsPuffinReader<S, F> {
-    pub(crate) fn new(puffin_file_name: String, stager: S, puffin_file_accessor: F) -> Self {
+    pub(crate) fn new(
+        puffin_file_name: String,
+        stager: S,
+        puffin_file_accessor: F,
+        puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
+    ) -> Self {
        Self {
            puffin_file_name,
+            file_size_hint: None,
            stager,
            puffin_file_accessor,
+            puffin_file_metadata_cache,
        }
    }
 }
@@ -66,20 +82,28 @@ where
    type Blob = Either<RandomReadBlob<F>, S::Blob>;
    type Dir = S::Dir;

+    fn with_file_size_hint(mut self, file_size_hint: Option<u64>) -> Self {
+        self.file_size_hint = file_size_hint;
+        self
+    }
+
    async fn blob(&self, key: &str) -> Result<Self::Blob> {
-        let reader = self
+        let mut reader = self
            .puffin_file_accessor
            .reader(&self.puffin_file_name)
            .await?;
+        if let Some(file_size_hint) = self.file_size_hint {
+            reader.with_file_size_hint(file_size_hint);
+        }
        let mut file = PuffinFileReader::new(reader);

-        // TODO(zhongzc): cache the metadata.
-        let metadata = file.metadata().await?;
+        let metadata = self.get_puffin_file_metadata(&mut file).await?;
        let blob_metadata = metadata
            .blobs
-            .into_iter()
+            .iter()
            .find(|m| m.blob_type == key)
-            .context(BlobNotFoundSnafu { blob: key })?;
+            .context(BlobNotFoundSnafu { blob: key })?
+            .clone();

        let blob = if blob_metadata.compression_codec.is_none() {
            // If the blob is not compressed, we can directly read it from the puffin file.
@@ -133,6 +157,23 @@ where
    S: Stager,
    F: PuffinFileAccessor + Clone,
 {
+    async fn get_puffin_file_metadata(
+        &self,
+        reader: &mut PuffinFileReader<F::Reader>,
+    ) -> Result<Arc<FileMetadata>> {
+        if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
+            if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) {
+                return Ok(metadata);
+            }
+        }
+
+        let metadata = Arc::new(reader.metadata().await?);
+        if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
+            cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone());
+        }
+        Ok(metadata)
+    }
+
    async fn init_blob_to_stager(
        reader: PuffinFileReader<F::Reader>,
        blob_metadata: BlobMetadata,
@@ -274,6 +315,13 @@ where
    A: RangeReader,
    B: RangeReader,
 {
+    fn with_file_size_hint(&mut self, file_size_hint: u64) {
+        match self {
+            Either::L(a) => a.with_file_size_hint(file_size_hint),
+            Either::R(b) => b.with_file_size_hint(file_size_hint),
+        }
+    }
+
    async fn metadata(&mut self) -> io::Result<Metadata> {
        match self {
            Either::L(a) => a.metadata().await,
--- a/src/puffin/src/tests.rs
+++ b/src/puffin/src/tests.rs
@@ -13,26 +13,14 @@
 // limitations under the License.

 use std::collections::HashMap;
-use std::fs::File;
-use std::io::{Cursor, Read};
 use std::vec;

 use common_base::range_read::{FileReader, RangeReader};
 use futures::io::Cursor as AsyncCursor;

-use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader};
-use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter};
-
-#[test]
-fn test_read_empty_puffin_sync() {
-    let path = "src/tests/resources/empty-puffin-uncompressed.puffin";
-
-    let file = File::open(path).unwrap();
-    let mut reader = PuffinFileReader::new(file);
-    let metadata = reader.metadata().unwrap();
-    assert_eq!(metadata.properties.len(), 0);
-    assert_eq!(metadata.blobs.len(), 0);
-}
+use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader, PuffinFileReader};
+use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter};
+use crate::file_metadata::FileMetadata;

 #[tokio::test]
 async fn test_read_empty_puffin_async() {
@@ -45,39 +33,37 @@ async fn test_read_empty_puffin_async() {
    assert_eq!(metadata.blobs.len(), 0);
 }

-#[test]
-fn test_sample_metric_data_puffin_sync() {
-    let path = "src/tests/resources/sample-metric-data-uncompressed.puffin";
+async fn test_read_puffin_file_metadata(
+    path: &str,
+    file_size: u64,
+    expeccted_metadata: FileMetadata,
+) {
+    for prefetch_size in [0, file_size / 2, file_size, file_size + 10] {
+        let reader = FileReader::new(path).await.unwrap();
+        let mut footer_reader = PuffinFileFooterReader::new(reader, file_size);
+        if prefetch_size > 0 {
+            footer_reader = footer_reader.with_prefetch_size(prefetch_size);
+        }
+        let metadata = footer_reader.metadata().await.unwrap();
+        assert_eq!(metadata.properties, expeccted_metadata.properties,);
+        assert_eq!(metadata.blobs, expeccted_metadata.blobs);
+    }
+}

-    let file = File::open(path).unwrap();
-    let mut reader = PuffinFileReader::new(file);
-    let metadata = reader.metadata().unwrap();
+#[tokio::test]
+async fn test_read_puffin_file_metadata_async() {
+    let paths = vec![
+        "src/tests/resources/empty-puffin-uncompressed.puffin",
+        "src/tests/resources/sample-metric-data-uncompressed.puffin",
+    ];
+    for path in paths {
+        let mut reader = FileReader::new(path).await.unwrap();
+        let file_size = reader.metadata().await.unwrap().content_length;
+        let mut reader = PuffinFileReader::new(reader);
+        let metadata = reader.metadata().await.unwrap();

-    assert_eq!(metadata.properties.len(), 1);
-    assert_eq!(
-        metadata.properties.get("created-by"),
-        Some(&"Test 1234".to_string())
-    );
-
-    assert_eq!(metadata.blobs.len(), 2);
-    assert_eq!(metadata.blobs[0].blob_type, "some-blob");
-    assert_eq!(metadata.blobs[0].offset, 4);
-    assert_eq!(metadata.blobs[0].length, 9);
-
-    assert_eq!(metadata.blobs[1].blob_type, "some-other-blob");
-    assert_eq!(metadata.blobs[1].offset, 13);
-    assert_eq!(metadata.blobs[1].length, 83);
-
-    let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
-    let mut buf = String::new();
-    some_blob.read_to_string(&mut buf).unwrap();
-    assert_eq!(buf, "abcdefghi");
-
-    let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
-    let mut buf = Vec::new();
-    some_other_blob.read_to_end(&mut buf).unwrap();
-    let expected = include_bytes!("tests/resources/sample-metric-data.blob");
-    assert_eq!(buf, expected);
+        test_read_puffin_file_metadata(path, file_size, metadata).await;
+    }
 }

 #[tokio::test]
@@ -113,38 +99,6 @@ async fn test_sample_metric_data_puffin_async() {
    assert_eq!(buf, expected);
 }

-#[test]
-fn test_writer_reader_with_empty_sync() {
-    fn test_writer_reader_with_empty_sync(footer_compressed: bool) {
-        let mut buf = Cursor::new(vec![]);
-
-        let mut writer = PuffinFileWriter::new(&mut buf);
-        writer.set_properties(HashMap::from([(
-            "created-by".to_string(),
-            "Test 1234".to_string(),
-        )]));
-
-        writer.set_footer_lz4_compressed(footer_compressed);
-        let written_bytes = writer.finish().unwrap();
-        assert!(written_bytes > 0);
-
-        let mut buf = Cursor::new(buf.into_inner());
-        let mut reader = PuffinFileReader::new(&mut buf);
-        let metadata = reader.metadata().unwrap();
-
-        assert_eq!(metadata.properties.len(), 1);
-        assert_eq!(
-            metadata.properties.get("created-by"),
-            Some(&"Test 1234".to_string())
-        );
-
-        assert_eq!(metadata.blobs.len(), 0);
-    }
-
-    test_writer_reader_with_empty_sync(false);
-    test_writer_reader_with_empty_sync(true);
-}
-
 #[tokio::test]
 async fn test_writer_reader_empty_async() {
    async fn test_writer_reader_empty_async(footer_compressed: bool) {
@@ -176,76 +130,6 @@ async fn test_writer_reader_empty_async() {
    test_writer_reader_empty_async(true).await;
 }

-#[test]
-fn test_writer_reader_sync() {
-    fn test_writer_reader_sync(footer_compressed: bool) {
-        let mut buf = Cursor::new(vec![]);
-
-        let mut writer = PuffinFileWriter::new(&mut buf);
-
-        let blob1 = "abcdefghi";
-        writer
-            .add_blob(Blob {
-                compressed_data: Cursor::new(&blob1),
-                blob_type: "some-blob".to_string(),
-                properties: Default::default(),
-                compression_codec: None,
-            })
-            .unwrap();
-
-        let blob2 = include_bytes!("tests/resources/sample-metric-data.blob");
-        writer
-            .add_blob(Blob {
-                compressed_data: Cursor::new(&blob2),
-                blob_type: "some-other-blob".to_string(),
-                properties: Default::default(),
-                compression_codec: None,
-            })
-            .unwrap();
-
-        writer.set_properties(HashMap::from([(
-            "created-by".to_string(),
-            "Test 1234".to_string(),
-        )]));
-
-        writer.set_footer_lz4_compressed(footer_compressed);
-        let written_bytes = writer.finish().unwrap();
-        assert!(written_bytes > 0);
-
-        let mut buf = Cursor::new(buf.into_inner());
-        let mut reader = PuffinFileReader::new(&mut buf);
-        let metadata = reader.metadata().unwrap();
-
-        assert_eq!(metadata.properties.len(), 1);
-        assert_eq!(
-            metadata.properties.get("created-by"),
-            Some(&"Test 1234".to_string())
-        );
-
-        assert_eq!(metadata.blobs.len(), 2);
-        assert_eq!(metadata.blobs[0].blob_type, "some-blob");
-        assert_eq!(metadata.blobs[0].offset, 4);
-        assert_eq!(metadata.blobs[0].length, 9);
-
-        assert_eq!(metadata.blobs[1].blob_type, "some-other-blob");
-        assert_eq!(metadata.blobs[1].offset, 13);
-        assert_eq!(metadata.blobs[1].length, 83);
-
-        let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
-        let mut buf = String::new();
-        some_blob.read_to_string(&mut buf).unwrap();
-        assert_eq!(buf, blob1);
-
-        let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
-        let mut buf = Vec::new();
-        some_other_blob.read_to_end(&mut buf).unwrap();
-        assert_eq!(buf, blob2);
-    }
-
-    test_writer_reader_sync(false);
-    test_writer_reader_sync(true);
-}
-
 #[tokio::test]
 async fn test_writer_reader_async() {
    async fn test_writer_reader_async(footer_compressed: bool) {
--- a/src/query/src/dist_plan/commutativity.rs
+++ b/src/query/src/dist_plan/commutativity.rs
@@ -146,6 +146,7 @@ impl Categorizer {
            | Expr::Between(_)
            | Expr::Sort(_)
            | Expr::Exists(_)
+            | Expr::InList(_)
            | Expr::ScalarFunction(_) => Commutativity::Commutative,

            Expr::Like(_)
@@ -157,7 +158,6 @@ impl Categorizer {
            | Expr::TryCast(_)
            | Expr::AggregateFunction(_)
            | Expr::WindowFunction(_)
-            | Expr::InList(_)
            | Expr::InSubquery(_)
            | Expr::ScalarSubquery(_)
            | Expr::Wildcard { .. } => Commutativity::Unimplemented,
--- a/src/servers/Cargo.toml
+++ b/src/servers/Cargo.toml
@@ -139,11 +139,6 @@ tokio-test = "0.4"
 [target.'cfg(unix)'.dev-dependencies]
 pprof = { version = "0.13", features = ["criterion", "flamegraph"] }

-[target.'cfg(windows)'.dependencies]
-aws-lc-sys = { version = "0.21.0", features = [
-    "prebuilt-nasm",
-] } # use prebuilt nasm on windows per https://github.com/aws/aws-lc-rs/blob/main/aws-lc-sys/README.md#use-of-prebuilt-nasm-objects
-
 [build-dependencies]
 common-version.workspace = true

--- a/src/servers/dashboard/VERSION
+++ b/src/servers/dashboard/VERSION
@@ -1 +1 @@
-v0.7.1
+v0.7.2
--- a/src/servers/src/error.rs
+++ b/src/servers/src/error.rs
@@ -189,6 +189,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to parse query"))]
+    FailedToParseQuery {
+        #[snafu(implicit)]
+        location: Location,
+        source: sql::error::Error,
+    },
+
    #[snafu(display("Failed to parse InfluxDB line protocol"))]
    InfluxdbLineProtocol {
        #[snafu(implicit)]
@@ -651,7 +658,8 @@ impl ErrorExt for Error {
            | OpenTelemetryLog { .. }
            | UnsupportedJsonDataTypeForTag { .. }
            | InvalidTableName { .. }
-            | PrepareStatementNotFound { .. } => StatusCode::InvalidArguments,
+            | PrepareStatementNotFound { .. }
+            | FailedToParseQuery { .. } => StatusCode::InvalidArguments,

            Catalog { source, .. } => source.status_code(),
            RowWriter { source, .. } => source.status_code(),
--- a/src/servers/src/http.rs
+++ b/src/servers/src/http.rs
@@ -638,10 +638,15 @@ impl HttpServer {
            router.clone()
        };

-        router = router.route(
-            "/health",
-            routing::get(handler::health).post(handler::health),
-        );
+        router = router
+            .route(
+                "/health",
+                routing::get(handler::health).post(handler::health),
+            )
+            .route(
+                "/ready",
+                routing::get(handler::health).post(handler::health),
+            );

        router = router.route("/status", routing::get(handler::status));

@@ -750,6 +755,10 @@ impl HttpServer {
    fn route_sql<S>(api_state: ApiState) -> Router<S> {
        Router::new()
            .route("/sql", routing::get(handler::sql).post(handler::sql))
+            .route(
+                "/sql/parse",
+                routing::get(handler::sql_parse).post(handler::sql_parse),
+            )
            .route(
                "/promql",
                routing::get(handler::promql).post(handler::promql),
--- a/src/servers/src/http/event.rs
+++ b/src/servers/src/http/event.rs
@@ -38,7 +38,7 @@ use lazy_static::lazy_static;
 use loki_api::prost_types::Timestamp;
 use pipeline::error::PipelineTransformSnafu;
 use pipeline::util::to_pipeline_version;
-use pipeline::PipelineVersion;
+use pipeline::{GreptimeTransformer, PipelineVersion};
 use prost::Message;
 use serde::{Deserialize, Serialize};
 use serde_json::{Deserializer, Map, Value};
@@ -46,8 +46,8 @@ use session::context::{Channel, QueryContext, QueryContextRef};
 use snafu::{ensure, OptionExt, ResultExt};

 use crate::error::{
-    DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, ParseJsonSnafu,
-    PipelineSnafu, Result, UnsupportedContentTypeSnafu,
+    CatalogSnafu, DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu,
+    ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu,
 };
 use crate::http::extractor::LogTableName;
 use crate::http::header::CONTENT_TYPE_PROTOBUF_STR;
@@ -276,39 +276,11 @@ fn transform_ndjson_array_factory(
        })
 }

-#[axum_macros::debug_handler]
-pub async fn pipeline_dryrun(
-    State(log_state): State<LogState>,
-    Query(query_params): Query<LogIngesterQueryParams>,
-    Extension(mut query_ctx): Extension<QueryContext>,
-    TypedHeader(content_type): TypedHeader<ContentType>,
-    payload: String,
+/// Dryrun pipeline with given data
+fn dryrun_pipeline_inner(
+    value: Vec<Value>,
+    pipeline: &pipeline::Pipeline<GreptimeTransformer>,
 ) -> Result<Response> {
-    let handler = log_state.log_handler;
-    let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu {
-        reason: "pipeline_name is required",
-    })?;
-
-    let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
-
-    let ignore_errors = query_params.ignore_errors.unwrap_or(false);
-
-    let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
-
-    ensure!(
-        value.len() <= 10,
-        InvalidParameterSnafu {
-            reason: "too many rows for dryrun",
-        }
-    );
-
-    query_ctx.set_channel(Channel::Http);
-    let query_ctx = Arc::new(query_ctx);
-
-    let pipeline = handler
-        .get_pipeline(&pipeline_name, version, query_ctx.clone())
-        .await?;
-
    let mut intermediate_state = pipeline.init_intermediate_state();

    let mut results = Vec::with_capacity(value.len());
@@ -387,6 +359,110 @@ pub async fn pipeline_dryrun(
    Ok(Json(result).into_response())
 }

+/// Dryrun pipeline with given data
+/// pipeline_name and pipeline_version to specify pipeline stored in db
+/// pipeline to specify pipeline raw content
+/// data to specify data
+/// data maght be list of string or list of object
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct PipelineDryrunParams {
+    pub pipeline_name: Option<String>,
+    pub pipeline_version: Option<String>,
+    pub pipeline: Option<String>,
+    pub data: Vec<Value>,
+}
+
+/// Check if the payload is valid json
+/// Check if the payload contains pipeline or pipeline_name and data
+/// Return Some if valid, None if invalid
+fn check_pipeline_dryrun_params_valid(payload: &str) -> Option<PipelineDryrunParams> {
+    match serde_json::from_str::<PipelineDryrunParams>(payload) {
+        // payload with pipeline or pipeline_name and data is array
+        Ok(params) if params.pipeline.is_some() || params.pipeline_name.is_some() => Some(params),
+        // because of the pipeline_name or pipeline is required
+        Ok(_) => None,
+        // invalid json
+        Err(_) => None,
+    }
+}
+
+/// Check if the pipeline_name exists
+fn check_pipeline_name_exists(pipeline_name: Option<String>) -> Result<String> {
+    pipeline_name.context(InvalidParameterSnafu {
+        reason: "pipeline_name is required",
+    })
+}
+
+/// Check if the data length less than 10
+fn check_data_valid(data_len: usize) -> Result<()> {
+    ensure!(
+        data_len <= 10,
+        InvalidParameterSnafu {
+            reason: "data is required",
+        }
+    );
+    Ok(())
+}
+
+#[axum_macros::debug_handler]
+pub async fn pipeline_dryrun(
+    State(log_state): State<LogState>,
+    Query(query_params): Query<LogIngesterQueryParams>,
+    Extension(mut query_ctx): Extension<QueryContext>,
+    TypedHeader(content_type): TypedHeader<ContentType>,
+    payload: String,
+) -> Result<Response> {
+    let handler = log_state.log_handler;
+
+    match check_pipeline_dryrun_params_valid(&payload) {
+        Some(params) => {
+            let data = params.data;
+
+            check_data_valid(data.len())?;
+
+            match params.pipeline {
+                None => {
+                    let version =
+                        to_pipeline_version(params.pipeline_version).context(PipelineSnafu)?;
+                    let pipeline_name = check_pipeline_name_exists(params.pipeline_name)?;
+                    let pipeline = handler
+                        .get_pipeline(&pipeline_name, version, Arc::new(query_ctx))
+                        .await?;
+                    dryrun_pipeline_inner(data, &pipeline)
+                }
+                Some(pipeline) => {
+                    let pipeline = handler.build_pipeline(&pipeline)?;
+                    dryrun_pipeline_inner(data, &pipeline)
+                }
+            }
+        }
+        None => {
+            // This path is for back compatibility with the previous dry run code
+            // where the payload is just data (JSON or plain text) and the pipeline name
+            // is specified using query param.
+            let pipeline_name = check_pipeline_name_exists(query_params.pipeline_name)?;
+
+            let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
+
+            let ignore_errors = query_params.ignore_errors.unwrap_or(false);
+
+            let value =
+                extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
+
+            check_data_valid(value.len())?;
+
+            query_ctx.set_channel(Channel::Http);
+            let query_ctx = Arc::new(query_ctx);
+
+            let pipeline = handler
+                .get_pipeline(&pipeline_name, version, query_ctx.clone())
+                .await?;
+
+            dryrun_pipeline_inner(value, &pipeline)
+        }
+    }
+}
+
 #[axum_macros::debug_handler]
 pub async fn loki_ingest(
    State(log_state): State<LogState>,
@@ -438,8 +514,8 @@ pub async fn loki_ingest(
            let line = entry.line;

            // create and init row
-            let mut row = Vec::with_capacity(schemas.capacity());
-            for _ in 0..row.capacity() {
+            let mut row = Vec::with_capacity(schemas.len());
+            for _ in 0..schemas.len() {
                row.push(GreptimeValue { value_data: None });
            }
            // insert ts and line
@@ -612,10 +688,15 @@ async fn ingest_logs_inner(
    let mut results = Vec::with_capacity(pipeline_data.len());
    let transformed_data: Rows;
    if pipeline_name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME {
-        let rows = pipeline::identity_pipeline(pipeline_data)
+        let table = state
+            .get_table(&table_name, &query_ctx)
+            .await
+            .context(CatalogSnafu)?;
+        let rows = pipeline::identity_pipeline(pipeline_data, table)
            .context(PipelineTransformSnafu)
            .context(PipelineSnafu)?;
-        transformed_data = rows;
+
+        transformed_data = rows
    } else {
        let pipeline = state
            .get_pipeline(&pipeline_name, version, query_ctx.clone())
--- a/src/servers/src/http/handler.rs
+++ b/src/servers/src/http/handler.rs
@@ -30,8 +30,13 @@ use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use session::context::{Channel, QueryContext, QueryContextRef};
+use snafu::ResultExt;
+use sql::dialect::GreptimeDbDialect;
+use sql::parser::{ParseOptions, ParserContext};
+use sql::statements::statement::Statement;

 use super::header::collect_plan_metrics;
+use crate::error::{FailedToParseQuerySnafu, InvalidQuerySnafu, Result};
 use crate::http::result::arrow_result::ArrowResponse;
 use crate::http::result::csv_result::CsvResponse;
 use crate::http::result::error_result::ErrorResponse;
@@ -146,10 +151,31 @@ pub async fn sql(
    resp.with_execution_time(start.elapsed().as_millis() as u64)
 }

+/// Handler to parse sql
+#[axum_macros::debug_handler]
+#[tracing::instrument(skip_all, fields(protocol = "http", request_type = "sql"))]
+pub async fn sql_parse(
+    Query(query_params): Query<SqlQuery>,
+    Form(form_params): Form<SqlQuery>,
+) -> Result<Json<Vec<Statement>>> {
+    let Some(sql) = query_params.sql.or(form_params.sql) else {
+        return InvalidQuerySnafu {
+            reason: "sql parameter is required.",
+        }
+        .fail();
+    };
+
+    let stmts =
+        ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default())
+            .context(FailedToParseQuerySnafu)?;
+
+    Ok(stmts.into())
+}
+
 /// Create a response from query result
 pub async fn from_output(
    outputs: Vec<crate::error::Result<Output>>,
-) -> Result<(Vec<GreptimeQueryOutput>, HashMap<String, Value>), ErrorResponse> {
+) -> std::result::Result<(Vec<GreptimeQueryOutput>, HashMap<String, Value>), ErrorResponse> {
    // TODO(sunng87): this api response structure cannot represent error well.
    //  It hides successful execution results from error response
    let mut results = Vec::with_capacity(outputs.len());
--- a/src/servers/src/query_handler.rs
+++ b/src/servers/src/query_handler.rs
@@ -39,7 +39,7 @@ use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequ
 use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
 use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion, PipelineWay};
 use serde_json::Value;
-use session::context::QueryContextRef;
+use session::context::{QueryContext, QueryContextRef};

 use crate::error::Result;
 use crate::influxdb::InfluxdbRequest;
@@ -164,4 +164,13 @@ pub trait PipelineHandler {
        version: PipelineVersion,
        query_ctx: QueryContextRef,
    ) -> Result<Option<()>>;
+
+    async fn get_table(
+        &self,
+        table: &str,
+        query_ctx: &QueryContext,
+    ) -> std::result::Result<Option<Arc<table::Table>>, catalog::error::Error>;
+
+    //// Build a pipeline from a string.
+    fn build_pipeline(&self, pipeline: &str) -> Result<Pipeline<GreptimeTransformer>>;
 }
--- a/src/sql/Cargo.toml
+++ b/src/sql/Cargo.toml
@@ -30,6 +30,7 @@ itertools.workspace = true
 jsonb.workspace = true
 lazy_static.workspace = true
 regex.workspace = true
+serde.workspace = true
 serde_json.workspace = true
 snafu.workspace = true
 sqlparser.workspace = true
--- a/src/sql/src/statements/admin.rs
+++ b/src/sql/src/statements/admin.rs
@@ -14,12 +14,13 @@

 use std::fmt::Display;

+use serde::Serialize;
 use sqlparser_derive::{Visit, VisitMut};

 use crate::ast::Function;

 /// `ADMIN` statement to execute some administration commands.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum Admin {
    /// Run a admin function.
    Func(Function),
--- a/src/sql/src/statements/alter.rs
+++ b/src/sql/src/statements/alter.rs
@@ -18,10 +18,11 @@ use api::v1;
 use common_query::AddColumnLocation;
 use datatypes::schema::FulltextOptions;
 use itertools::Itertools;
+use serde::Serialize;
 use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint};
 use sqlparser_derive::{Visit, VisitMut};

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct AlterTable {
    pub table_name: ObjectName,
    pub alter_operation: AlterTableOperation,
@@ -56,7 +57,7 @@ impl Display for AlterTable {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum AlterTableOperation {
    /// `ADD <table_constraint>`
    AddConstraint(TableConstraint),
@@ -71,29 +72,20 @@ pub enum AlterTableOperation {
        target_type: DataType,
    },
    /// `SET <table attrs key> = <table attr value>`
-    SetTableOptions {
-        options: Vec<KeyValueOption>,
-    },
-    UnsetTableOptions {
-        keys: Vec<String>,
-    },
+    SetTableOptions { options: Vec<KeyValueOption> },
+    /// `UNSET <table attrs key>`
+    UnsetTableOptions { keys: Vec<String> },
    /// `DROP COLUMN <name>`
-    DropColumn {
-        name: Ident,
-    },
+    DropColumn { name: Ident },
    /// `RENAME <new_table_name>`
-    RenameTable {
-        new_table_name: String,
-    },
+    RenameTable { new_table_name: String },
    /// `MODIFY COLUMN <column_name> SET FULLTEXT [WITH <options>]`
    SetColumnFulltext {
        column_name: Ident,
        options: FulltextOptions,
    },
    /// `MODIFY COLUMN <column_name> UNSET FULLTEXT`
-    UnsetColumnFulltext {
-        column_name: Ident,
-    },
+    UnsetColumnFulltext { column_name: Ident },
 }

 impl Display for AlterTableOperation {
@@ -151,7 +143,7 @@ impl Display for AlterTableOperation {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct KeyValueOption {
    pub key: String,
    pub value: String,
@@ -166,7 +158,7 @@ impl From<KeyValueOption> for v1::Option {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct AlterDatabase {
    pub database_name: ObjectName,
    pub alter_operation: AlterDatabaseOperation,
@@ -197,7 +189,7 @@ impl Display for AlterDatabase {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum AlterDatabaseOperation {
    SetDatabaseOption { options: Vec<KeyValueOption> },
    UnsetDatabaseOption { keys: Vec<String> },
--- a/src/sql/src/statements/copy.rs
+++ b/src/sql/src/statements/copy.rs
@@ -14,12 +14,13 @@

 use std::fmt::Display;

+use serde::Serialize;
 use sqlparser::ast::ObjectName;
 use sqlparser_derive::{Visit, VisitMut};

 use crate::statements::OptionMap;

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum Copy {
    CopyTable(CopyTable),
    CopyDatabase(CopyDatabase),
@@ -34,7 +35,7 @@ impl Display for Copy {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum CopyTable {
    To(CopyTableArgument),
    From(CopyTableArgument),
@@ -65,7 +66,7 @@ impl Display for CopyTable {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub enum CopyDatabase {
    To(CopyDatabaseArgument),
    From(CopyDatabaseArgument),
@@ -96,7 +97,7 @@ impl Display for CopyDatabase {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct CopyDatabaseArgument {
    pub database_name: ObjectName,
    pub with: OptionMap,
@@ -104,7 +105,7 @@ pub struct CopyDatabaseArgument {
    pub location: String,
 }

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct CopyTableArgument {
    pub table_name: ObjectName,
    pub with: OptionMap,
--- a/src/sql/src/statements/create.rs
+++ b/src/sql/src/statements/create.rs
@@ -18,6 +18,7 @@ use std::fmt::{Display, Formatter};
 use common_catalog::consts::FILE_ENGINE;
 use datatypes::schema::FulltextOptions;
 use itertools::Itertools;
+use serde::Serialize;
 use snafu::ResultExt;
 use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
 use sqlparser_derive::{Visit, VisitMut};
@@ -58,7 +59,7 @@ fn format_table_constraint(constraints: &[TableConstraint]) -> String {
 }

 /// Table constraint for create table statement.
-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub enum TableConstraint {
    /// Primary key constraint.
    PrimaryKey { columns: Vec<Ident> },
@@ -84,7 +85,7 @@ impl Display for TableConstraint {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateTable {
    /// Create if not exists
    pub if_not_exists: bool,
@@ -100,7 +101,7 @@ pub struct CreateTable {
 }

 /// Column definition in `CREATE TABLE` statement.
-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct Column {
    /// `ColumnDef` from `sqlparser::ast`
    pub column_def: ColumnDef,
@@ -109,7 +110,7 @@ pub struct Column {
 }

 /// Column extensions for greptimedb dialect.
-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default, Serialize)]
 pub struct ColumnExtensions {
    /// Fulltext options.
    pub fulltext_options: Option<OptionMap>,
@@ -172,7 +173,7 @@ impl ColumnExtensions {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct Partitions {
    pub column_list: Vec<Ident>,
    pub exprs: Vec<Expr>,
@@ -244,7 +245,7 @@ impl Display for CreateTable {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateDatabase {
    pub name: ObjectName,
    /// Create if not exists
@@ -278,7 +279,7 @@ impl Display for CreateDatabase {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateExternalTable {
    /// Table name
    pub name: ObjectName,
@@ -309,7 +310,7 @@ impl Display for CreateExternalTable {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateTableLike {
    /// Table name
    pub table_name: ObjectName,
@@ -325,7 +326,7 @@ impl Display for CreateTableLike {
    }
 }

-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateFlow {
    /// Flow name
    pub flow_name: ObjectName,
@@ -367,7 +368,7 @@ impl Display for CreateFlow {
 }

 /// Create SQL view statement.
-#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
+#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
 pub struct CreateView {
    /// View name
    pub name: ObjectName,
--- a/src/sql/src/statements/cursor.rs
+++ b/src/sql/src/statements/cursor.rs
@@ -14,6 +14,7 @@

 use std::fmt::Display;

+use serde::Serialize;
 use sqlparser::ast::ObjectName;
 use sqlparser_derive::{Visit, VisitMut};

@@ -22,7 +23,7 @@ use super::query::Query;
 /// Represents a DECLARE CURSOR statement
 ///
 /// This statement will carry a SQL query
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DeclareCursor {
    pub cursor_name: ObjectName,
    pub query: Box<Query>,
@@ -35,7 +36,7 @@ impl Display for DeclareCursor {
 }

 /// Represents a FETCH FROM cursor statement
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct FetchCursor {
    pub cursor_name: ObjectName,
    pub fetch_size: u64,
@@ -48,7 +49,7 @@ impl Display for FetchCursor {
 }

 /// Represents a CLOSE cursor statement
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct CloseCursor {
    pub cursor_name: ObjectName,
 }
--- a/src/sql/src/statements/delete.rs
+++ b/src/sql/src/statements/delete.rs
@@ -12,10 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use serde::Serialize;
 use sqlparser::ast::Statement;
 use sqlparser_derive::{Visit, VisitMut};

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct Delete {
    pub inner: Statement,
 }
--- a/src/sql/src/statements/describe.rs
+++ b/src/sql/src/statements/describe.rs
@@ -14,11 +14,12 @@

 use std::fmt::Display;

+use serde::Serialize;
 use sqlparser::ast::ObjectName;
 use sqlparser_derive::{Visit, VisitMut};

 /// SQL structure for `DESCRIBE TABLE`.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DescribeTable {
    name: ObjectName,
 }
--- a/src/sql/src/statements/drop.rs
+++ b/src/sql/src/statements/drop.rs
@@ -14,11 +14,12 @@

 use std::fmt::Display;

+use serde::Serialize;
 use sqlparser::ast::ObjectName;
 use sqlparser_derive::{Visit, VisitMut};

 /// DROP TABLE statement.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DropTable {
    table_names: Vec<ObjectName>,

@@ -62,7 +63,7 @@ impl Display for DropTable {
 }

 /// DROP DATABASE statement.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DropDatabase {
    name: ObjectName,
    /// drop table if exists
@@ -99,7 +100,7 @@ impl Display for DropDatabase {
 }

 /// DROP FLOW statement.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DropFlow {
    flow_name: ObjectName,
    /// drop flow if exists
@@ -138,7 +139,7 @@ impl Display for DropFlow {
 }

 /// `DROP VIEW` statement.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct DropView {
    // The view name
    pub view_name: ObjectName,
--- a/src/sql/src/statements/explain.rs
+++ b/src/sql/src/statements/explain.rs
@@ -14,13 +14,14 @@

 use std::fmt::{Display, Formatter};

+use serde::Serialize;
 use sqlparser::ast::Statement as SpStatement;
 use sqlparser_derive::{Visit, VisitMut};

 use crate::error::Error;

 /// Explain statement.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct Explain {
    pub inner: SpStatement,
 }
--- a/src/sql/src/statements/insert.rs
+++ b/src/sql/src/statements/insert.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use serde::Serialize;
 use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values};
 use sqlparser::parser::ParserError;
 use sqlparser_derive::{Visit, VisitMut};
@@ -20,7 +21,7 @@ use crate::ast::{Expr, Value};
 use crate::error::Result;
 use crate::statements::query::Query as GtQuery;

-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct Insert {
    // Can only be sqlparser::ast::Statement::Insert variant
    pub inner: Statement,
--- a/src/sql/src/statements/option_map.rs
+++ b/src/sql/src/statements/option_map.rs
@@ -16,14 +16,16 @@ use std::collections::{BTreeMap, HashMap};
 use std::ops::ControlFlow;

 use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString};
+use serde::Serialize;
 use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut};

 const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"];

 /// Options hashmap.
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Serialize)]
 pub struct OptionMap {
    options: BTreeMap<String, String>,
+    #[serde(skip_serializing)]
    secrets: BTreeMap<String, SecretString>,
 }

--- a/src/sql/src/statements/query.rs
+++ b/src/sql/src/statements/query.rs
@@ -14,13 +14,14 @@

 use std::fmt;

+use serde::Serialize;
 use sqlparser::ast::Query as SpQuery;
 use sqlparser_derive::{Visit, VisitMut};

 use crate::error::Error;

 /// Query statement instance.
-#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
 pub struct Query {
    pub inner: SpQuery,
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Niwaka	358d5e1d63	fix: support alter table ~ add ~ custom_type (#5165 )	2024-12-15 09:05:29 +00:00
Yingwen	579059d99f	ci: use 4xlarge for nightly build (#5158 )	2024-12-13 12:53:11 +00:00
localhost	53d55c0b6b	fix: loki write row len error (#5161 )	2024-12-13 10:10:59 +00:00
Yingwen	bef6896280	docs: Add index panels to standalone grafana dashboard (#5140 ) * docs: Add index panels to standalnoe grafana dashboard * docs: fix flush/compaction op	2024-12-13 08:17:49 +00:00
Yohan Wal	4b4c6dbb66	refactor: cache inverted index with fixed-size page (#5114 ) * feat: cache inverted index by page instead of file * fix: add unit test and fix bugs * chore: typo * chore: ci * fix: math * chore: apply review comments * chore: renames * test: add unit test for index key calculation * refactor: use ReadableSize * feat: add config for inverted index page size * chore: update config file * refactor: handle multiple range read and fix some related bugs * fix: add config * test: turn to a fs reader to match behaviors of object store	2024-12-13 07:34:24 +00:00
localhost	e8e9526738	chore: pipeline dryrun api can currently receives pipeline raw content (#5142 ) * chore: pipeline dryrun api can currently receives pipeline raw content * chore: remove dryrun v1 and add test * chore: change dryrun pipeline api body schema * chore: remove useless struct PipelineInfo * chore: update PipelineDryrunParams doc * chore: increase code readability * chore: add some comment for pipeline dryrun test * Apply suggestions from code review Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * chore: format code --------- Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>	2024-12-12 11:47:21 +00:00
Yingwen	fee75a1fad	feat: collect reader metrics from prune reader (#5152 )	2024-12-12 11:27:22 +00:00
localhost	b8a78b7838	chore: decide tag column in log api follow table schema if table exists (#5138 ) * chore: decide tag column in log api follow table schema if table exists * chore: add more test for greptime_identity pipeline * chore: change pipeline get_table function signature * chore: change identity_pipeline_inner tag_column_names type	2024-12-12 09:01:21 +00:00
Weny Xu	2137c53274	feat(index): add `file_size_hint` for remote blob reader (#5147 ) feat(index): add file_size_hint for remote blob reader	2024-12-12 04:45:40 +00:00
Yohan Wal	03ad6e2a8d	feat(fuzz): add alter table options for alter fuzzer (#5074 ) * feat(fuzz): add set table options to alter fuzzer * chore: clippy is happy, I'm sad * chore: happy ci happy * fix: unit test * feat(fuzz): add unset table options to alter fuzzer * fix: unit test * feat(fuzz): add table option validator * fix: make clippy happy * chore: add comments * chore: apply review comments * fix: unit test * feat(fuzz): add more ttl options * fix: #5108 * chore: add comments * chore: add comments	2024-12-12 04:21:38 +00:00
Weny Xu	d53fbcb936	feat: introduce `PuffinMetadataCache` (#5148 ) * feat: introduce `PuffinMetadataCache` * refactor: remove too_many_arguments * chore: fmt toml	2024-12-12 04:09:36 +00:00
Weny Xu	8c1959c580	feat: add prefetch support to `InvertedIndexFooterReader` for reduced I/O time (#5146 ) * feat: add prefetch support to `InvertedIndeFooterReader` * chore: correct struct name * chore: apply suggestions from CR	2024-12-12 03:49:54 +00:00
Weny Xu	e2a41ccaec	feat: add prefetch support to `PuffinFileFooterReader` for reduced I/O time (#5145 ) * feat: introduce `PuffinFileFooterReader` * refactor: remove `SyncReader` trait and impl * refactor: replace `FooterParser` with `PuffinFileFooterReader` * chore: remove unused errors	2024-12-12 03:13:36 +00:00
Niwaka	a8012147ab	feat: support push down IN filter (#5129 ) * feat: support push down IN filter * chore: move tests to prune.sql	2024-12-11 13:46:23 +00:00
Ruihang Xia	60f8dbf7f0	feat: implement `v1/sql/parse` endpoint to parse GreptimeDB's SQL dialect (#5144 ) * derive ser/de Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl method Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix typo Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove deserialize Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-12-11 13:33:54 +00:00
ZonaHe	9da2e17d0e	feat: update dashboard to v0.7.2 (#5141 ) Co-authored-by: sunchanglong <sunchanglong@users.noreply.github.com>	2024-12-11 12:47:59 +00:00
Yohan Wal	1a8e77a480	test: part of parser test migrated from duckdb (#5125 ) * test: update test * fix: fix test	2024-12-11 09:28:13 +00:00
Zhenchi	e1e39993f7	feat(vector): add scalar add function (#5119 ) * refactor: extract implicit conversion helper functions of vector Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * feat(vector): add scalar add function Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix fmt Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2024-12-11 09:25:56 +00:00
Lei, HUANG	a30d918df2	perf: avoid cache during compaction (#5135 ) * Revert "refactor: Avoid wrapping Option for CacheManagerRef (#4996)" This reverts commit `42bf7e9965`. * fix: memory usage during log ingestion * fix: fmt	2024-12-11 08:24:41 +00:00
dennis zhuang	2c4ac76754	feat: adjust WAL purge default configurations (#5107 ) * feat: adjust WAL purge default configurations * fix: config * feat: change raft engine file_size default to 128Mib	2024-12-11 08:08:05 +00:00
jeremyhi	a6893aad42	chore: set store_key_prefix for all kvbackend (#5132 )	2024-12-11 08:04:02 +00:00
discord9	d91517688a	chore: fix aws_lc not in depend tree check in CI (#5121 ) * chore: fix aws_lc check in CI * chore: update lock file	2024-12-11 07:02:03 +00:00
shuiyisong	3d1b8c4fac	chore: add `/ready` api for health checking (#5124 ) * chore: add ready endpoint for health checking * chore: add test	2024-12-11 02:56:48 +00:00
Yingwen	7c69ca0502	chore: bump main branch version to 0.12 (#5133 ) chore: bump version to v0.12.0	2024-12-10 13:10:37 +00:00