test

test: skopeo authentication
2025-12-24 23:19:57 +00:00 · 2024-08-27 10:11:32 +08:00 · 2024-08-27 02:07:07 +08:00 · 2024-08-27 01:19:54 +08:00 · 2024-08-26 15:33:30 +00:00 · 2024-08-26 13:04:06 +00:00
157 changed files with 2495 additions and 856 deletions
--- a/.github/actions/build-linux-artifacts/action.yml
+++ b/.github/actions/build-linux-artifacts/action.yml
@@ -17,6 +17,12 @@ inputs:
    description: Enable dev mode, only build standard greptime
    required: false
    default: "false"
+  image-namespace:
+    description: Image Namespace
+    required: true
+  image-registry:
+    description: Image Registry
+    required: true
  working-dir:
    description: Working directory to build the artifacts
    required: false
@@ -31,8 +37,8 @@ runs:
      run: |
        cd ${{ inputs.working-dir }} && \
        make run-it-in-container BUILD_JOBS=4 \
-        IMAGE_NAMESPACE=i8k6a5e1/greptime \
-        IMAGE_REGISTRY=public.ecr.aws
+        IMAGE_NAMESPACE=${{ inputs.image-namespace }} \
+        IMAGE_REGISTRY=${{ inputs.image-registry }}

    - name: Upload sqlness logs
      if: ${{ failure() && inputs.disable-run-tests == 'false' }} # Only upload logs when the integration tests failed.
@@ -51,8 +57,8 @@ runs:
        artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
        version: ${{ inputs.version }}
        working-dir: ${{ inputs.working-dir }}
-        image-registry: public.ecr.aws
-        image-namespace: i8k6a5e1/greptime        
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}

    - name: Build greptime without pyo3
      if: ${{ inputs.dev-mode == 'false' }}
@@ -64,8 +70,8 @@ runs:
        artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
        version: ${{ inputs.version }}
        working-dir: ${{ inputs.working-dir }}
-        image-registry: public.ecr.aws
-        image-namespace: i8k6a5e1/greptime
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}

    - name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build.
      shell: bash
@@ -82,8 +88,8 @@ runs:
        artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
        version: ${{ inputs.version }}
        working-dir: ${{ inputs.working-dir }}
-        image-registry: public.ecr.aws
-        image-namespace: i8k6a5e1/greptime
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}

    - name: Build greptime on android base image
      uses: ./.github/actions/build-greptime-binary
@@ -94,5 +100,5 @@ runs:
        version: ${{ inputs.version }}
        working-dir: ${{ inputs.working-dir }}
        build-android-artifacts: true
-        image-registry: public.ecr.aws
-        image-namespace: i8k6a5e1/greptime
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -13,7 +13,7 @@ on:
 name: Build API docs

 env:
-  RUST_TOOLCHAIN: nightly-2024-04-20
+  RUST_TOOLCHAIN: nightly-2024-06-06

 jobs:
  apidoc:
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -177,6 +177,8 @@ jobs:
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
          dev-mode: true # Only build the standard greptime binary.
          working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  build-linux-arm64-artifacts:
    name: Build linux-arm64 artifacts
@@ -206,6 +208,8 @@ jobs:
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
          dev-mode: true # Only build the standard greptime binary.
          working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  release-images-to-dockerhub:
    name: Build and push images to DockerHub
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -30,7 +30,7 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2024-04-20
+  RUST_TOOLCHAIN: nightly-2024-06-06

 jobs:
  check-typos-and-docs:
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -154,6 +154,8 @@ jobs:
          cargo-profile: ${{ env.CARGO_PROFILE }}
          version: ${{ needs.allocate-runners.outputs.version }}
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  build-linux-arm64-artifacts:
    name: Build linux-arm64 artifacts
@@ -173,6 +175,8 @@ jobs:
          cargo-profile: ${{ env.CARGO_PROFILE }}
          version: ${{ needs.allocate-runners.outputs.version }}
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  release-images-to-dockerhub:
    name: Build and push images to DockerHub
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -10,7 +10,7 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2024-04-20
+  RUST_TOOLCHAIN: nightly-2024-06-06

 permissions:
  issues: write
--- a/.github/workflows/release-dev-builder-images.yaml
+++ b/.github/workflows/release-dev-builder-images.yaml
@@ -3,10 +3,6 @@ name: Release dev-builder images
 on:
  workflow_dispatch: # Allows you to run this workflow manually.
    inputs:
-      version:
-        description: Version of the dev-builder
-        required: false
-        default: latest
      release_dev_builder_ubuntu_image:
        type: boolean
        description: Release dev-builder-ubuntu image
@@ -28,22 +24,103 @@ jobs:
    name: Release dev builder images
    if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
    runs-on: ubuntu-20.04-16-cores
+    outputs:
+      version: ${{ steps.set-version.outputs.version }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

+      - name: Configure build image version
+        id: set-version
+        shell: bash
+        run: |
+          commitShortSHA=`echo ${{ github.sha }} | cut -c1-8`
+          buildTime=`date +%Y%m%d%H%M%S`
+          BUILD_VERSION="$commitShortSHA-$buildTime"
+          RUST_TOOLCHAIN_VERSION=$(cat rust-toolchain.toml | grep -Eo '[0-9]{4}-[0-9]{2}-[0-9]{2}')
+          IMAGE_VERSION="${RUST_TOOLCHAIN_VERSION}-${BUILD_VERSION}"
+          echo "VERSION=${IMAGE_VERSION}" >> $GITHUB_ENV
+          echo "version=$IMAGE_VERSION" >> $GITHUB_OUTPUT
+
      - name: Build and push dev builder images
        uses: ./.github/actions/build-dev-builder-images
        with:
-          version: ${{ inputs.version }}
+          version: ${{ env.VERSION }}
          dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
          build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image }}
          build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image }}
          build-dev-builder-android: ${{ inputs.release_dev_builder_android_image }}

+  release-dev-builder-images-ecr:
+    name: Release dev builder images to AWS ECR
+    runs-on: ubuntu-20.04
+    needs: [
+      release-dev-builder-images
+    ]
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ECR_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_ECR_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.ECR_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr-public
+        uses: aws-actions/amazon-ecr-login@v2
+        env:
+          AWS_REGION: ${{ vars.ECR_REGION }}
+        with:
+          registry-type: public
+
+      - name: Push dev-builder-ubuntu image
+        shell: bash
+        if: ${{ inputs.release_dev_builder_ubuntu_image }}
+        run: |
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
+
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest
+      - name: Push dev-builder-centos image
+        shell: bash
+        if: ${{ inputs.release_dev_builder_centos_image }}
+        run: |
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
+
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:latest \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:latest
+      - name: Push dev-builder-android image
+        shell: bash
+        if: ${{ inputs.release_dev_builder_android_image }}
+        run: |
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
+
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:latest \
+            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:latest
  release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
    name: Release dev builder images to CN region
    runs-on: ubuntu-20.04
@@ -51,35 +128,39 @@ jobs:
      release-dev-builder-images
    ]
    steps:
+      - name: Login to AliCloud Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ vars.ACR_IMAGE_REGISTRY }}
+          username: ${{ secrets.ALICLOUD_USERNAME }}
+          password: ${{ secrets.ALICLOUD_PASSWORD }}
+
      - name: Push dev-builder-ubuntu image
        shell: bash
        if: ${{ inputs.release_dev_builder_ubuntu_image }}
-        env:
-          DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
-          DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
        run: |
-          docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }} \
-            --dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }}
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}

      - name: Push dev-builder-centos image
        shell: bash
        if: ${{ inputs.release_dev_builder_centos_image }}
-        env:
-          DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
-          DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
        run: |
-          docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }} \
-            --dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }}
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}

      - name: Push dev-builder-android image
        shell: bash
        if: ${{ inputs.release_dev_builder_android_image }}
-        env:
-          DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
-          DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
        run: |
-          docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }} \
-            --dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }}
+          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
+            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
+            quay.io/skopeo/stable:latest \
+            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
+            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -82,7 +82,7 @@ on:
 # Use env variables to control all the release process.
 env:
  # The arguments of building greptime.
-  RUST_TOOLCHAIN: nightly-2024-04-20
+  RUST_TOOLCHAIN: nightly-2024-06-06
  CARGO_PROFILE: nightly

  # Controls whether to run tests, include unit-test, integration-test and sqlness.
@@ -183,6 +183,8 @@ jobs:
          cargo-profile: ${{ env.CARGO_PROFILE }}
          version: ${{ needs.allocate-runners.outputs.version }}
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  build-linux-arm64-artifacts:
    name: Build linux-arm64 artifacts
@@ -202,6 +204,8 @@ jobs:
          cargo-profile: ${{ env.CARGO_PROFILE }}
          version: ${{ needs.allocate-runners.outputs.version }}
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}

  build-macos-artifacts:
    name: Build macOS artifacts
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1954,6 +1954,7 @@ dependencies = [
 "statrs",
 "store-api",
 "table",
+ "tokio",
 ]

 [[package]]
@@ -4568,9 +4569,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"

 [[package]]
 name = "human-panic"
-version = "1.2.3"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4f016c89920bbb30951a8405ecacbb4540db5524313b9445736e7e1855cf370"
+checksum = "1c5a08ed290eac04006e21e63d32e90086b6182c7cd0452d10f4264def1fec9a"
 dependencies = [
 "anstream",
 "anstyle",
@@ -7010,9 +7011,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

 [[package]]
 name = "opendal"
-version = "0.48.0"
+version = "0.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "615d41187deea0ea7fab5b48e9afef6ae8fc742fdcfa248846ee3d92ff71e986"
+checksum = "39d516adf7db912c38af382c3e92c27cd62fbbc240e630920555d784c2ab1494"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -9170,8 +9171,9 @@ dependencies = [

 [[package]]
 name = "rsasl"
-version = "2.0.2"
-source = "git+https://github.com/wenyxu/rsasl.git?rev=06ebb683d5539c3410de4ce9fa37ff9b97e790a4#06ebb683d5539c3410de4ce9fa37ff9b97e790a4"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45035615cdd68c71daac89aef75b130d4b2cad29599966e1b4671f8fbb463559"
 dependencies = [
 "base64 0.22.1",
 "core2",
@@ -9188,9 +9190,8 @@ dependencies = [
 [[package]]
 name = "rskafka"
 version = "0.5.0"
-source = "git+https://github.com/WenyXu/rskafka.git?rev=940c6030012c5b746fad819fb72e3325b26e39de#940c6030012c5b746fad819fb72e3325b26e39de"
+source = "git+https://github.com/influxdata/rskafka.git?rev=75535b5ad9bae4a5dbb582c82e44dfd81ec10105#75535b5ad9bae4a5dbb582c82e44dfd81ec10105"
 dependencies = [
- "async-trait",
 "bytes",
 "chrono",
 "crc32c",
@@ -9199,7 +9200,6 @@ dependencies = [
 "integer-encoding 4.0.0",
 "lz4",
 "parking_lot 0.12.3",
- "pin-project-lite",
 "rand",
 "rsasl",
 "rustls 0.23.10",
@@ -11800,9 +11800,9 @@ dependencies = [

 [[package]]
 name = "tikv-jemalloc-ctl"
-version = "0.5.4"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
+checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
 dependencies = [
 "libc",
 "paste",
@@ -11811,9 +11811,9 @@ dependencies = [

 [[package]]
 name = "tikv-jemalloc-sys"
-version = "0.5.4+5.3.0-patched"
+version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
+checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
 dependencies = [
 "cc",
 "libc",
@@ -11821,9 +11821,9 @@ dependencies = [

 [[package]]
 name = "tikv-jemallocator"
-version = "0.5.4"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
+checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
 dependencies = [
 "libc",
 "tikv-jemalloc-sys",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,6 +77,7 @@ clippy.readonly_write_lock = "allow"
 rust.unknown_lints = "deny"
 # Remove this after https://github.com/PyO3/pyo3/issues/4094
 rust.non_local_definitions = "allow"
+rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }

 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -151,8 +152,7 @@ reqwest = { version = "0.12", default-features = false, features = [
    "stream",
    "multipart",
 ] }
-# SCRAM-SHA-512 requires https://github.com/dequbed/rsasl/pull/48, https://github.com/influxdata/rskafka/pull/247 
-rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "940c6030012c5b746fad819fb72e3325b26e39de", features = [
+rskafka = { git = "https://github.com/influxdata/rskafka.git", rev = "75535b5ad9bae4a5dbb582c82e44dfd81ec10105", features = [
    "transport-tls",
 ] }
 rstest = "0.21"
@@ -251,7 +251,7 @@ debug = 1

 [profile.nightly]
 inherits = "release"
-strip = true
+strip = "debuginfo"
 lto = "thin"
 debug = false
 incremental = false
--- a/2
+++ b/2
@@ -106,7 +106,7 @@ strip-android-bin: build-android-bin ## Strip greptime binary for android.
 	docker run --network=host \
 	-v ${PWD}:/greptimedb \
 	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
-	bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip /greptimedb/target/aarch64-linux-android/release/greptime'
+	bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip --strip-debug /greptimedb/target/aarch64-linux-android/release/greptime'

 .PHONY: clean
 clean: ## Clean the project.
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ Our official Grafana dashboard is available at [grafana](grafana/README.md) dire

 ## Project Status

-The current version has not yet reached the standards for General Availability. 
+The current version has not yet reached the standards for General Availability.
 According to our Greptime 2024 Roadmap, we aim to achieve a production-level version with the release of v1.0 by the end of 2024. [Join Us](https://github.com/GreptimeTeam/greptimedb/issues/3412)

 We welcome you to test and use GreptimeDB. Some users have already adopted it in their production environments. If you're interested in trying it out, please use the latest stable release available.
@@ -172,6 +172,13 @@ In addition, you may:
 - Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
 - Follow us on [Twitter](https://twitter.com/greptime)

+## Commerial Support
+
+If you are running GreptimeDB OSS in your organization, we offer additional
+enterprise addons, installation service, training and consulting. [Contact
+us](https://greptime.com/contactus) and we will reach out to you with more
+detail of our commerial license.
+
 ## License

 GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between
--- a/config/config.md
+++ b/config/config.md
@@ -67,9 +67,10 @@
 | `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
-| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
+| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
-| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
@@ -116,11 +117,12 @@
 | `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
 | `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
 | `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
-| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
+| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
 | `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
 | `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
 | `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
-| `region_engine.mito.experimental_write_cache_ttl` | String | `1h` | TTL for write cache. |
+| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
@@ -259,12 +261,13 @@
 | `data_home` | String | `/tmp/metasrv/` | The working home directory. |
 | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
 | `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
-| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
+| `store_addr` | String | `127.0.0.1:2379` | Store server address default to etcd store. |
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `backend` | String | `EtcdStore` | The datastore for meta server. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -285,9 +288,10 @@
 | `wal` | -- | -- | -- |
 | `wal.provider` | String | `raft_engine` | -- |
 | `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
-| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
+| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
+| `wal.num_topics` | Integer | `64` | Number of topics. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
-| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
 | `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
 | `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
 | `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
@@ -408,11 +412,12 @@
 | `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
 | `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
 | `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
-| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
+| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
 | `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
 | `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
 | `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
-| `region_engine.mito.experimental_write_cache_ttl` | String | `1h` | TTL for write cache. |
+| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -394,9 +394,13 @@ sst_meta_cache_size = "128MB"
 vector_cache_size = "512MB"

 ## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+## If not set, it's default to 1/8 of OS memory.
 page_cache_size = "512MB"

+## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+selector_result_cache_size = "512MB"
+
 ## Whether to enable the experimental write cache.
 enable_experimental_write_cache = false

@@ -407,7 +411,8 @@ experimental_write_cache_path = ""
 experimental_write_cache_size = "512MB"

 ## TTL for write cache.
-experimental_write_cache_ttl = "1h"
+## +toml2docs:none-default
+experimental_write_cache_ttl = "8h"

 ## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -7,7 +7,7 @@ bind_addr = "127.0.0.1:3002"
 ## The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
 server_addr = "127.0.0.1:3002"

-## Etcd server address.
+## Store server address default to etcd store.
 store_addr = "127.0.0.1:2379"

 ## Datanode selector type.
@@ -32,6 +32,9 @@ store_key_prefix = ""
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## The datastore for meta server.
+backend = "EtcdStore"
+
 ## The runtime options.
 [runtime]
 ## The number of threads to execute the runtime for global read operations.
@@ -96,7 +99,12 @@ provider = "raft_engine"
 ## The broker endpoints of the Kafka cluster.
 broker_endpoints = ["127.0.0.1:9092"]

-## Number of topics to be created upon start.
+## Automatically create topics for WAL.
+## Set to `true` to automatically create topics for WAL.
+## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
+auto_create_topics = true
+
+## Number of topics.
 num_topics = 64

 ## Topic selector type.
@@ -105,6 +113,7 @@ num_topics = 64
 selector_type = "round_robin"

 ## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
 topic_name_prefix = "greptimedb_wal_topic"

 ## Expected number of replicas of each partition.
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -171,7 +171,12 @@ sync_period = "10s"
 ## **It's only used when the provider is `kafka`**.
 broker_endpoints = ["127.0.0.1:9092"]

-## Number of topics to be created upon start.
+## Automatically create topics for WAL.
+## Set to `true` to automatically create topics for WAL.
+## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
+auto_create_topics = true
+
+## Number of topics.
 ## **It's only used when the provider is `kafka`**.
 num_topics = 64

@@ -182,6 +187,7 @@ num_topics = 64
 selector_type = "round_robin"

 ## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
 ## **It's only used when the provider is `kafka`**.
 topic_name_prefix = "greptimedb_wal_topic"

@@ -431,9 +437,13 @@ sst_meta_cache_size = "128MB"
 vector_cache_size = "512MB"

 ## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+## If not set, it's default to 1/8 of OS memory.
 page_cache_size = "512MB"

+## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+selector_result_cache_size = "512MB"
+
 ## Whether to enable the experimental write cache.
 enable_experimental_write_cache = false

@@ -444,7 +454,8 @@ experimental_write_cache_path = ""
 experimental_write_cache_size = "512MB"

 ## TTL for write cache.
-experimental_write_cache_ttl = "1h"
+## +toml2docs:none-default
+experimental_write_cache_ttl = "8h"

 ## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
--- a/docs/benchmarks/log/README.md
+++ b/docs/benchmarks/log/README.md
@@ -0,0 +1,51 @@
+# Log benchmark configuration
+This repo holds the configuration we used to benchmark GreptimeDB, Clickhouse and Elastic Search.
+
+Here are the versions of databases we used in the benchmark
+
+| name          | version    |
+| :------------ | :--------- |
+| GreptimeDB    | v0.9.2     |
+| Clickhouse    | 24.9.1.219 |
+| Elasticsearch | 8.15.0     |
+
+## Structured model vs Unstructured model
+We divide test into two parts, using structured model and unstructured model accordingly. You can also see the difference in create table clause.
+
+__Structured model__
+
+The log data is pre-processed into columns by vector. For example an insert request looks like following
+```SQL
+INSERT INTO test_table (bytes, http_version, ip, method, path, status, user, timestamp) VALUES ()
+```
+The goal is to test string/text support for each database. In real scenarios it means the datasource(or log data producers) have separate fields defined, or have already processed the raw input.
+
+__Unstructured model__
+
+The log data is inserted as a long string, and then we build fulltext index upon these strings. For example an insert request looks like following
+```SQL
+INSERT INTO test_table (message, timestamp) VALUES ()
+```
+The goal is to test fuzzy search performance for each database. In real scenarios it means the log is produced by some kind of middleware and inserted directly into the database.
+
+## Creating tables
+See [here](./create_table.sql) for GreptimeDB and Clickhouse's create table clause.
+The mapping of Elastic search is created automatically.
+
+## Vector Configuration
+We use vector to generate random log data and send inserts to databases.
+Please refer to [structured config](./structured_vector.toml) and [unstructured config](./unstructured_vector.toml) for detailed configuration.
+
+## SQLs and payloads
+Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [query payload](./query.md) for Elastic search.
+
+## Steps to reproduce
+0. Decide whether to run structured model test or unstructured mode test.
+1. Build vector binary(see vector's config file for specific branch) and databases binaries accordingly.
+2. Create table in GreptimeDB and Clickhouse in advance.
+3. Run vector to insert data.
+4. When data insertion is finished, run queries against each database. Note: you'll need to update timerange value after data insertion.
+
+## Addition
+- You can tune GreptimeDB's configuration to get better performance.
+- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).
--- a/docs/benchmarks/log/create_table.sql
+++ b/docs/benchmarks/log/create_table.sql
@@ -0,0 +1,56 @@
+-- GreptimeDB create table clause
+-- structured test, use vector to pre-process log data into fields
+CREATE TABLE IF NOT EXISTS `test_table` (
+    `bytes` Int64 NULL,
+    `http_version` STRING NULL,
+    `ip` STRING NULL,
+    `method` STRING NULL,
+    `path` STRING NULL,
+    `status` SMALLINT UNSIGNED NULL,
+    `user` STRING NULL,
+    `timestamp` TIMESTAMP(3) NOT NULL,
+    PRIMARY KEY (`user`, `path`, `status`),
+    TIME INDEX (`timestamp`)
+)
+ENGINE=mito
+WITH(
+    append_mode = 'true'
+);
+
+-- unstructured test, build fulltext index on message column
+CREATE TABLE IF NOT EXISTS `test_table` (
+    `message` STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'),
+    `timestamp` TIMESTAMP(3) NOT NULL,
+    TIME INDEX (`timestamp`)
+)
+ENGINE=mito
+WITH(
+    append_mode = 'true'
+);
+
+-- Clickhouse create table clause
+-- structured test
+CREATE TABLE IF NOT EXISTS test_table
+(
+    bytes UInt64 NOT NULL,
+    http_version String NOT NULL,
+    ip String NOT NULL,
+    method String NOT NULL,
+    path String NOT NULL,
+    status UInt8 NOT NULL,
+    user String NOT NULL,
+    timestamp String NOT NULL,
+)
+ENGINE = MergeTree()
+ORDER BY (user, path, status);
+
+-- unstructured test
+SET allow_experimental_full_text_index = true;
+CREATE TABLE IF NOT EXISTS test_table
+(
+    message String,
+    timestamp String,
+    INDEX inv_idx(message) TYPE full_text(0) GRANULARITY 1
+)
+ENGINE = MergeTree()
+ORDER BY tuple();
--- a/docs/benchmarks/log/query.md
+++ b/docs/benchmarks/log/query.md
@@ -0,0 +1,199 @@
+# Query URL and payload for Elastic Search
+## Count
+URL: `http://127.0.0.1:9200/_count`
+
+## Query by timerange
+URL: `http://127.0.0.1:9200/_search`
+
+You can use the following payload to get the full timerange first.
+```JSON
+{"size":0,"aggs":{"max_timestamp":{"max":{"field":"timestamp"}},"min_timestamp":{"min":{"field":"timestamp"}}}}
+```
+
+And then use this payload to query by timerange.
+```JSON
+{
+  "from": 0,
+  "size": 1000,
+  "query": {
+    "range": {
+      "timestamp": {
+        "gte": "2024-08-16T04:30:44.000Z",
+        "lte": "2024-08-16T04:51:52.000Z"
+      }
+    }
+  }
+}
+```
+
+## Query by condition
+URL: `http://127.0.0.1:9200/_search`
+### Structured payload
+```JSON
+{
+  "from": 0,
+  "size": 10000,
+  "query": {
+    "bool": {
+      "must": [
+        {
+          "term": {
+            "user.keyword": "CrucifiX"
+          }
+        },
+        {
+          "term": {
+            "method.keyword": "OPTION"
+          }
+        },
+        {
+          "term": {
+            "path.keyword": "/user/booperbot124"
+          }
+        },
+        {
+          "term": {
+            "http_version.keyword": "HTTP/1.1"
+          }
+        },
+        {
+          "term": {
+            "status": "401"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+### Unstructured payload
+```JSON
+{
+  "from": 0,
+  "size": 10000,
+  "query": {
+    "bool": {
+      "must": [
+        {
+          "match_phrase": {
+            "message": "CrucifiX"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "OPTION"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "/user/booperbot124"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "HTTP/1.1"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "401"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+## Query by condition and timerange
+URL: `http://127.0.0.1:9200/_search`
+### Structured payload
+```JSON
+{
+  "size": 10000,
+  "query": {
+    "bool": {
+      "must": [
+        {
+          "term": {
+            "user.keyword": "CrucifiX"
+          }
+        },
+        {
+          "term": {
+            "method.keyword": "OPTION"
+          }
+        },
+        {
+          "term": {
+            "path.keyword": "/user/booperbot124"
+          }
+        },
+        {
+          "term": {
+            "http_version.keyword": "HTTP/1.1"
+          }
+        },
+        {
+          "term": {
+            "status": "401"
+          }
+        },
+        {
+          "range": {
+            "timestamp": {
+              "gte": "2024-08-19T07:03:37.383Z",
+              "lte": "2024-08-19T07:24:58.883Z"
+            }
+          }
+        }
+      ]
+    }
+  }
+}
+```
+### Unstructured payload
+```JSON
+{
+  "size": 10000,
+  "query": {
+    "bool": {
+      "must": [
+        {
+          "match_phrase": {
+            "message": "CrucifiX"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "OPTION"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "/user/booperbot124"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "HTTP/1.1"
+          }
+        },
+        {
+          "match_phrase": {
+            "message": "401"
+          }
+        },
+        {
+          "range": {
+            "timestamp": {
+              "gte": "2024-08-19T05:16:17.099Z",
+              "lte": "2024-08-19T05:46:02.722Z"
+            }
+          }
+        }
+      ]
+    }
+  }
+}
+```
--- a/docs/benchmarks/log/query.sql
+++ b/docs/benchmarks/log/query.sql
@@ -0,0 +1,50 @@
+-- Structured query for GreptimeDB and Clickhouse
+
+-- query count
+select count(*) from test_table;
+
+-- query by timerange. Note: place the timestamp range in the where clause
+-- GreptimeDB
+-- you can use `select max(timestamp)::bigint from test_table;` and `select min(timestamp)::bigint from test_table;`
+-- to get the full timestamp range
+select * from test_table where timestamp between 1723710843619 and 1723711367588;
+-- Clickhouse
+-- you can use `select max(timestamp) from test_table;` and `select min(timestamp) from test_table;`
+-- to get the full timestamp range
+select * from test_table where timestamp between '2024-08-16T03:58:46Z' and '2024-08-16T04:03:50Z';
+
+-- query by condition
+SELECT * FROM test_table WHERE user = 'CrucifiX' and method = 'OPTION' and path = '/user/booperbot124' and http_version = 'HTTP/1.1' and status = 401;
+
+-- query by condition and timerange
+-- GreptimeDB
+SELECT * FROM test_table WHERE user = "CrucifiX" and method = "OPTION" and path = "/user/booperbot124" and http_version = "HTTP/1.1" and status = 401 
+and timestamp between 1723774396760 and 1723774788760;
+-- Clickhouse
+SELECT * FROM test_table WHERE user = 'CrucifiX' and method = 'OPTION' and path = '/user/booperbot124' and http_version = 'HTTP/1.1' and status = 401 
+and timestamp between '2024-08-16T03:58:46Z' and '2024-08-16T04:03:50Z';
+
+-- Unstructured query for GreptimeDB and Clickhouse
+
+
+-- query by condition
+-- GreptimeDB
+SELECT * FROM test_table WHERE MATCHES(message, "+CrucifiX +OPTION +/user/booperbot124 +HTTP/1.1 +401");
+-- Clickhouse
+SELECT * FROM test_table WHERE (message LIKE '%CrucifiX%') 
+AND (message LIKE '%OPTION%') 
+AND (message LIKE '%/user/booperbot124%') 
+AND (message LIKE '%HTTP/1.1%') 
+AND (message LIKE '%401%');
+
+-- query by condition and timerange
+-- GreptimeDB
+SELECT * FROM test_table WHERE MATCHES(message, "+CrucifiX +OPTION +/user/booperbot124 +HTTP/1.1 +401") 
+and timestamp between 1723710843619 and 1723711367588;
+-- Clickhouse
+SELECT * FROM test_table WHERE (message LIKE '%CrucifiX%') 
+AND (message LIKE '%OPTION%') 
+AND (message LIKE '%/user/booperbot124%') 
+AND (message LIKE '%HTTP/1.1%') 
+AND (message LIKE '%401%') 
+AND timestamp between '2024-08-15T10:25:26.524000000Z' AND '2024-08-15T10:31:31.746000000Z';
--- a/docs/benchmarks/log/structured_vector.toml
+++ b/docs/benchmarks/log/structured_vector.toml
@@ -0,0 +1,57 @@
+# Please note we use patched branch to build vector
+# https://github.com/shuiyisong/vector/tree/chore/greptime_log_ingester_logitem
+
+[sources.demo_logs]
+type = "demo_logs"
+format = "apache_common"
+# interval value = 1 / rps
+# say you want to insert at 20k/s, that is 1 / 20000 = 0.00005
+# set to 0 to run as fast as possible
+interval = 0
+# total rows to insert
+count = 100000000
+lines = [ "line1" ]
+
+[transforms.parse_logs]
+type = "remap"
+inputs = ["demo_logs"]
+source = '''
+. = parse_regex!(.message, r'^(?P<ip>\S+) - (?P<user>\S+) \[(?P<timestamp>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<http_version>\S+)" (?P<status>\d+) (?P<bytes>\d+)$')
+
+# Convert timestamp to a standard format
+.timestamp = parse_timestamp!(.timestamp, format: "%d/%b/%Y:%H:%M:%S %z")
+
+# Convert status and bytes to integers
+.status = to_int!(.status)
+.bytes = to_int!(.bytes)
+'''
+
+[sinks.sink_greptime_logs]
+type = "greptimedb_logs"
+# The table to insert into
+table = "test_table"
+pipeline_name = "demo_pipeline"
+compression = "none"
+inputs = [ "parse_logs" ]
+endpoint = "http://127.0.0.1:4000"
+# Batch size for each insertion
+batch.max_events = 4000
+
+[sinks.clickhouse]
+type = "clickhouse"
+inputs = [ "parse_logs" ]
+database = "default"
+endpoint = "http://127.0.0.1:8123"
+format = "json_each_row"
+# The table to insert into
+table = "test_table"
+
+[sinks.sink_elasticsearch]
+type = "elasticsearch"
+inputs = [ "parse_logs" ]
+api_version = "auto"
+compression = "none"
+doc_type = "_doc"
+endpoints = [ "http://127.0.0.1:9200" ]
+id_key = "id"
+mode = "bulk"
--- a/docs/benchmarks/log/unstructured_vector.toml
+++ b/docs/benchmarks/log/unstructured_vector.toml
@@ -0,0 +1,43 @@
+# Please note we use patched branch to build vector
+# https://github.com/shuiyisong/vector/tree/chore/greptime_log_ingester_ft
+
+[sources.demo_logs]
+type = "demo_logs"
+format = "apache_common"
+# interval value = 1 / rps
+# say you want to insert at 20k/s, that is 1 / 20000 = 0.00005
+# set to 0 to run as fast as possible
+interval = 0
+# total rows to insert
+count = 100000000
+lines = [ "line1" ]
+
+[sinks.sink_greptime_logs]
+type = "greptimedb_logs"
+# The table to insert into
+table = "test_table"
+pipeline_name = "demo_pipeline"
+compression = "none"
+inputs = [ "demo_logs" ]
+endpoint = "http://127.0.0.1:4000"
+# Batch size for each insertion
+batch.max_events = 500
+
+[sinks.clickhouse]
+type = "clickhouse"
+inputs = [ "demo_logs" ]
+database = "default"
+endpoint = "http://127.0.0.1:8123"
+format = "json_each_row"
+# The table to insert into
+table = "test_table"
+
+[sinks.sink_elasticsearch]
+type = "elasticsearch"
+inputs = [ "demo_logs" ]
+api_version = "auto"
+compression = "none"
+doc_type = "_doc"
+endpoints = [ "http://127.0.0.1:9200" ]
+id_key = "id"
+mode = "bulk"
--- a/docs/benchmarks/tsbs/v0.9.1.md
+++ b/docs/benchmarks/tsbs/v0.9.1.md
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,3 @@
 [toolchain]
-channel = "nightly-2024-04-20"
+channel = "nightly-2024-06-06"
+
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -91,7 +91,7 @@ impl Database {
    ///
    /// - the name of database when using GreptimeDB standalone or cluster
    /// - the name provided by GreptimeCloud or other multi-tenant GreptimeDB
-    /// environment
+    ///   environment
    pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
        Self {
            catalog: String::default(),
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -51,7 +51,7 @@ file-engine.workspace = true
 flow.workspace = true
 frontend = { workspace = true, default-features = false }
 futures.workspace = true
-human-panic = "1.2.2"
+human-panic = "2.0"
 lazy_static.workspace = true
 meta-client.workspace = true
 meta-srv.workspace = true
@@ -80,7 +80,7 @@ tonic.workspace = true
 tracing-appender = "0.2"

 [target.'cfg(not(windows))'.dependencies]
-tikv-jemallocator = "0.5"
+tikv-jemallocator = "0.6"

 [dev-dependencies]
 client = { workspace = true, features = ["testing"] }
--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -139,13 +139,10 @@ async fn start(cli: Command) -> Result<()> {
 }

 fn setup_human_panic() {
-    let metadata = human_panic::Metadata {
-        version: env!("CARGO_PKG_VERSION").into(),
-        name: "GreptimeDB".into(),
-        authors: Default::default(),
-        homepage: "https://github.com/GreptimeTeam/greptimedb/discussions".into(),
-    };
-    human_panic::setup_panic!(metadata);
+    human_panic::setup_panic!(
+        human_panic::Metadata::new("GreptimeDB", env!("CARGO_PKG_VERSION"))
+            .homepage("https://github.com/GreptimeTeam/greptimedb/discussions")
+    );

    common_telemetry::set_panic_hook();
 }
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -82,6 +82,7 @@ fn test_load_datanode_example_config() {
                    vector_cache_size: ReadableSize::mb(512),
                    page_cache_size: ReadableSize::mb(512),
                    max_background_jobs: 4,
+                    experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
                    ..Default::default()
                }),
                RegionEngineConfig::File(EngineConfig {}),
@@ -218,6 +219,7 @@ fn test_load_standalone_example_config() {
                    vector_cache_size: ReadableSize::mb(512),
                    page_cache_size: ReadableSize::mb(512),
                    max_background_jobs: 4,
+                    experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
                    ..Default::default()
                }),
                RegionEngineConfig::File(EngineConfig {}),
--- a/src/common/catalog/src/lib.rs
+++ b/src/common/catalog/src/lib.rs
@@ -48,19 +48,19 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
 /// The database name may come from different sources:
 ///
 /// - MySQL `schema` name in MySQL protocol login request: it's optional and user
-/// and switch database using `USE` command
+///   and switch database using `USE` command
 /// - Postgres `database` parameter in Postgres wire protocol, required
 /// - HTTP RESTful API: the database parameter, optional
 /// - gRPC: the dbname field in header, optional but has a higher priority than
-/// original catalog/schema
+///   original catalog/schema
 ///
 /// When database name is provided, we attempt to parse catalog and schema from
 /// it. We assume the format `[<catalog>-]<schema>`:
 ///
 /// - If `[<catalog>-]` part is not provided, we use whole database name as
-/// schema name
+///   schema name
 /// - if `[<catalog>-]` is provided, we split database name with `-` and use
-/// `<catalog>` and `<schema>`.
+///   `<catalog>` and `<schema>`.
 pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (String, String) {
    match parse_optional_catalog_and_schema_from_db_string(db) {
        (Some(catalog), schema) => (catalog, schema),
--- a/src/common/datasource/src/object_store/fs.rs
+++ b/src/common/datasource/src/object_store/fs.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use object_store::services::Fs;
+use object_store::util::DefaultLoggingInterceptor;
 use object_store::ObjectStore;
 use snafu::ResultExt;

@@ -22,13 +23,9 @@ pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
    let builder = Fs::default();
    let object_store = ObjectStore::new(builder.root(root))
        .context(BuildBackendSnafu)?
-        .layer(
-            object_store::layers::LoggingLayer::default()
-                // Print the expected error only in DEBUG level.
-                // See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
-                .with_error_level(Some("debug"))
-                .expect("input error level must be valid"),
-        )
+        .layer(object_store::layers::LoggingLayer::new(
+            DefaultLoggingInterceptor,
+        ))
        .layer(object_store::layers::TracingLayer)
        .layer(object_store::layers::PrometheusMetricsLayer::new(true))
        .finish();
--- a/src/common/datasource/src/object_store/s3.rs
+++ b/src/common/datasource/src/object_store/s3.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;

 use object_store::services::S3;
+use object_store::util::DefaultLoggingInterceptor;
 use object_store::ObjectStore;
 use snafu::ResultExt;

@@ -84,13 +85,9 @@ pub fn build_s3_backend(
    // TODO(weny): Consider finding a better way to eliminate duplicate code.
    Ok(ObjectStore::new(builder)
        .context(error::BuildBackendSnafu)?
-        .layer(
-            object_store::layers::LoggingLayer::default()
-                // Print the expected error only in DEBUG level.
-                // See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
-                .with_error_level(Some("debug"))
-                .expect("input error level must be valid"),
-        )
+        .layer(object_store::layers::LoggingLayer::new(
+            DefaultLoggingInterceptor,
+        ))
        .layer(object_store::layers::TracingLayer)
        .layer(object_store::layers::PrometheusMetricsLayer::new(true))
        .finish())
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -39,3 +39,4 @@ table.workspace = true
 [dev-dependencies]
 ron = "0.7"
 serde = { version = "1.0", features = ["derive"] }
+tokio.workspace = true
--- a/src/common/function/src/flush_flow.rs
+++ b/src/common/function/src/flush_flow.rs
@@ -110,7 +110,7 @@ mod test {
    use session::context::QueryContext;

    use super::*;
-    use crate::function::{Function, FunctionContext};
+    use crate::function::{AsyncFunction, FunctionContext};

    #[test]
    fn test_flush_flow_metadata() {
@@ -130,8 +130,8 @@ mod test {
        );
    }

-    #[test]
-    fn test_missing_flow_service() {
+    #[tokio::test]
+    async fn test_missing_flow_service() {
        let f = FlushFlowFunction;

        let args = vec!["flow_name"];
@@ -140,7 +140,7 @@ mod test {
            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
            .collect::<Vec<_>>();

-        let result = f.eval(FunctionContext::default(), &args).unwrap_err();
+        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
        assert_eq!(
            "Missing FlowServiceHandler, not expected",
            result.to_string()
--- a/src/common/function/src/function.rs
+++ b/src/common/function/src/function.rs
@@ -32,7 +32,7 @@ pub struct FunctionContext {

 impl FunctionContext {
    /// Create a mock [`FunctionContext`] for test.
-    #[cfg(any(test, feature = "testing"))]
+    #[cfg(test)]
    pub fn mock() -> Self {
        Self {
            query_ctx: QueryContextBuilder::default().build().into(),
@@ -56,8 +56,10 @@ pub trait Function: fmt::Display + Sync + Send {
    /// Returns the name of the function, should be unique.
    fn name(&self) -> &str;

+    /// The returned data type of function execution.
    fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType>;

+    /// The signature of function.
    fn signature(&self) -> Signature;

    /// Evaluate the function, e.g. run/execute the function.
@@ -65,3 +67,22 @@ pub trait Function: fmt::Display + Sync + Send {
 }

 pub type FunctionRef = Arc<dyn Function>;
+
+/// Async Scalar function trait
+#[async_trait::async_trait]
+pub trait AsyncFunction: fmt::Display + Sync + Send {
+    /// Returns the name of the function, should be unique.
+    fn name(&self) -> &str;
+
+    /// The returned data type of function execution.
+    fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType>;
+
+    /// The signature of function.
+    fn signature(&self) -> Signature;
+
+    /// Evaluate the function, e.g. run/execute the function.
+    /// TODO(dennis): simplify the signature and refactor all the admin functions.
+    async fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef>;
+}
+
+pub type AsyncFunctionRef = Arc<dyn AsyncFunction>;
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -18,7 +18,7 @@ use std::sync::{Arc, RwLock};

 use once_cell::sync::Lazy;

-use crate::function::FunctionRef;
+use crate::function::{AsyncFunctionRef, FunctionRef};
 use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
 use crate::scalars::date::DateFunction;
 use crate::scalars::expression::ExpressionFunction;
@@ -32,6 +32,7 @@ use crate::table::TableFunction;
 #[derive(Default)]
 pub struct FunctionRegistry {
    functions: RwLock<HashMap<String, FunctionRef>>,
+    async_functions: RwLock<HashMap<String, AsyncFunctionRef>>,
    aggregate_functions: RwLock<HashMap<String, AggregateFunctionMetaRef>>,
 }

@@ -44,6 +45,27 @@ impl FunctionRegistry {
            .insert(func.name().to_string(), func);
    }

+    pub fn register_async(&self, func: AsyncFunctionRef) {
+        let _ = self
+            .async_functions
+            .write()
+            .unwrap()
+            .insert(func.name().to_string(), func);
+    }
+
+    pub fn get_async_function(&self, name: &str) -> Option<AsyncFunctionRef> {
+        self.async_functions.read().unwrap().get(name).cloned()
+    }
+
+    pub fn async_functions(&self) -> Vec<AsyncFunctionRef> {
+        self.async_functions
+            .read()
+            .unwrap()
+            .values()
+            .cloned()
+            .collect()
+    }
+
    pub fn register_aggregate_function(&self, func: AggregateFunctionMetaRef) {
        let _ = self
            .aggregate_functions
--- a/src/common/function/src/scalars/aggregate/percentile.rs
+++ b/src/common/function/src/scalars/aggregate/percentile.rs
@@ -75,7 +75,7 @@ where
        // to keep the not_greater length == floor+1
        // so to ensure the peek of the not_greater is array[floor]
        // and the peek of the greater is array[floor+1]
-        let p = if let Some(p) = self.p { p } else { 0.0_f64 };
+        let p = self.p.unwrap_or(0.0_f64);
        let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
        if value <= *self.not_greater.peek().unwrap() {
            self.not_greater.push(value);
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -28,7 +28,7 @@ pub struct FunctionState {

 impl FunctionState {
    /// Create a mock [`FunctionState`] for test.
-    #[cfg(any(test, feature = "testing"))]
+    #[cfg(test)]
    pub fn mock() -> Self {
        use std::sync::Arc;

--- a/src/common/function/src/system.rs
+++ b/src/common/function/src/system.rs
@@ -22,7 +22,7 @@ mod version;
 use std::sync::Arc;

 use build::BuildFunction;
-use database::DatabaseFunction;
+use database::{CurrentSchemaFunction, DatabaseFunction};
 use pg_catalog::PGCatalogFunction;
 use procedure_state::ProcedureStateFunction;
 use timezone::TimezoneFunction;
@@ -37,8 +37,9 @@ impl SystemFunction {
        registry.register(Arc::new(BuildFunction));
        registry.register(Arc::new(VersionFunction));
        registry.register(Arc::new(DatabaseFunction));
+        registry.register(Arc::new(CurrentSchemaFunction));
        registry.register(Arc::new(TimezoneFunction));
-        registry.register(Arc::new(ProcedureStateFunction));
+        registry.register_async(Arc::new(ProcedureStateFunction));
        PGCatalogFunction::register(registry);
    }
 }
--- a/src/common/function/src/system/database.rs
+++ b/src/common/function/src/system/database.rs
@@ -26,11 +26,35 @@ use crate::function::{Function, FunctionContext};
 #[derive(Clone, Debug, Default)]
 pub struct DatabaseFunction;

-const NAME: &str = "database";
+#[derive(Clone, Debug, Default)]
+pub struct CurrentSchemaFunction;
+
+const DATABASE_FUNCTION_NAME: &str = "database";
+const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";

 impl Function for DatabaseFunction {
    fn name(&self) -> &str {
-        NAME
+        DATABASE_FUNCTION_NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::string_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::uniform(0, vec![], Volatility::Immutable)
+    }
+
+    fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
+        let db = func_ctx.query_ctx.current_schema();
+
+        Ok(Arc::new(StringVector::from_slice(&[&db])) as _)
+    }
+}
+
+impl Function for CurrentSchemaFunction {
+    fn name(&self) -> &str {
+        CURRENT_SCHEMA_FUNCTION_NAME
    }

    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -54,6 +78,12 @@ impl fmt::Display for DatabaseFunction {
    }
 }

+impl fmt::Display for CurrentSchemaFunction {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "CURRENT_SCHEMA")
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
--- a/src/common/function/src/system/pg_catalog.rs
+++ b/src/common/function/src/system/pg_catalog.rs
@@ -14,11 +14,13 @@

 mod pg_get_userbyid;
 mod table_is_visible;
+mod version;

 use std::sync::Arc;

 use pg_get_userbyid::PGGetUserByIdFunction;
 use table_is_visible::PGTableIsVisibleFunction;
+use version::PGVersionFunction;

 use crate::function_registry::FunctionRegistry;

@@ -35,5 +37,6 @@ impl PGCatalogFunction {
    pub fn register(registry: &FunctionRegistry) {
        registry.register(Arc::new(PGTableIsVisibleFunction));
        registry.register(Arc::new(PGGetUserByIdFunction));
+        registry.register(Arc::new(PGVersionFunction));
    }
 }
--- a/src/common/function/src/system/pg_catalog/version.rs
+++ b/src/common/function/src/system/pg_catalog/version.rs
@@ -0,0 +1,54 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::{env, fmt};
+
+use common_query::error::Result;
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::vectors::{StringVector, VectorRef};
+
+use crate::function::{Function, FunctionContext};
+
+#[derive(Clone, Debug, Default)]
+pub(crate) struct PGVersionFunction;
+
+impl fmt::Display for PGVersionFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, crate::pg_catalog_func_fullname!("VERSION"))
+    }
+}
+
+impl Function for PGVersionFunction {
+    fn name(&self) -> &str {
+        crate::pg_catalog_func_fullname!("version")
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::string_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::exact(vec![], Volatility::Immutable)
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
+        let result = StringVector::from(vec![format!(
+            "PostgreSQL 16.3 GreptimeDB {}",
+            env!("CARGO_PKG_VERSION")
+        )]);
+        Ok(Arc::new(result))
+    }
+}
--- a/src/common/function/src/system/procedure_state.rs
+++ b/src/common/function/src/system/procedure_state.rs
@@ -96,7 +96,7 @@ mod tests {
    use datatypes::vectors::StringVector;

    use super::*;
-    use crate::function::{Function, FunctionContext};
+    use crate::function::{AsyncFunction, FunctionContext};

    #[test]
    fn test_procedure_state_misc() {
@@ -114,8 +114,8 @@ mod tests {
        ));
    }

-    #[test]
-    fn test_missing_procedure_service() {
+    #[tokio::test]
+    async fn test_missing_procedure_service() {
        let f = ProcedureStateFunction;

        let args = vec!["pid"];
@@ -125,15 +125,15 @@ mod tests {
            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
            .collect::<Vec<_>>();

-        let result = f.eval(FunctionContext::default(), &args).unwrap_err();
+        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
        assert_eq!(
            "Missing ProcedureServiceHandler, not expected",
            result.to_string()
        );
    }

-    #[test]
-    fn test_procedure_state() {
+    #[tokio::test]
+    async fn test_procedure_state() {
        let f = ProcedureStateFunction;

        let args = vec!["pid"];
@@ -143,7 +143,7 @@ mod tests {
            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
            .collect::<Vec<_>>();

-        let result = f.eval(FunctionContext::mock(), &args).unwrap();
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();

        let expect: VectorRef = Arc::new(StringVector::from(vec![
            "{\"status\":\"Done\",\"error\":\"OK\"}",
--- a/src/common/function/src/table.rs
+++ b/src/common/function/src/table.rs
@@ -31,11 +31,11 @@ pub(crate) struct TableFunction;
 impl TableFunction {
    /// Register all table functions to [`FunctionRegistry`].
    pub fn register(registry: &FunctionRegistry) {
-        registry.register(Arc::new(MigrateRegionFunction));
-        registry.register(Arc::new(FlushRegionFunction));
-        registry.register(Arc::new(CompactRegionFunction));
-        registry.register(Arc::new(FlushTableFunction));
-        registry.register(Arc::new(CompactTableFunction));
-        registry.register(Arc::new(FlushFlowFunction));
+        registry.register_async(Arc::new(MigrateRegionFunction));
+        registry.register_async(Arc::new(FlushRegionFunction));
+        registry.register_async(Arc::new(CompactRegionFunction));
+        registry.register_async(Arc::new(FlushTableFunction));
+        registry.register_async(Arc::new(CompactTableFunction));
+        registry.register_async(Arc::new(FlushFlowFunction));
    }
 }
--- a/src/common/function/src/table/flush_compact_region.rs
+++ b/src/common/function/src/table/flush_compact_region.rs
@@ -77,7 +77,7 @@ mod tests {
    use datatypes::vectors::UInt64Vector;

    use super::*;
-    use crate::function::{Function, FunctionContext};
+    use crate::function::{AsyncFunction, FunctionContext};

    macro_rules! define_region_function_test {
        ($name: ident, $func: ident) => {
@@ -97,8 +97,8 @@ mod tests {
                                     } if valid_types == ConcreteDataType::numerics()));
                }

-                #[test]
-                fn [<test_ $name _missing_table_mutation>]() {
+                #[tokio::test]
+                async fn [<test_ $name _missing_table_mutation>]() {
                    let f = $func;

                    let args = vec![99];
@@ -108,15 +108,15 @@ mod tests {
                        .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
                        .collect::<Vec<_>>();

-                    let result = f.eval(FunctionContext::default(), &args).unwrap_err();
+                    let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
                    assert_eq!(
                        "Missing TableMutationHandler, not expected",
                        result.to_string()
                    );
                }

-                #[test]
-                fn [<test_ $name>]() {
+                #[tokio::test]
+                async fn [<test_ $name>]() {
                    let f = $func;


@@ -127,7 +127,7 @@ mod tests {
                        .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
                        .collect::<Vec<_>>();

-                    let result = f.eval(FunctionContext::mock(), &args).unwrap();
+                    let result = f.eval(FunctionContext::mock(), &args).await.unwrap();

                    let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
                    assert_eq!(expect, result);
--- a/src/common/function/src/table/flush_compact_table.rs
+++ b/src/common/function/src/table/flush_compact_table.rs
@@ -210,7 +210,7 @@ mod tests {
    use session::context::QueryContext;

    use super::*;
-    use crate::function::{Function, FunctionContext};
+    use crate::function::{AsyncFunction, FunctionContext};

    macro_rules! define_table_function_test {
        ($name: ident, $func: ident) => {
@@ -230,8 +230,8 @@ mod tests {
                                     } if valid_types == vec![ConcreteDataType::string_datatype()]));
                }

-                #[test]
-                fn [<test_ $name _missing_table_mutation>]() {
+                #[tokio::test]
+                async fn [<test_ $name _missing_table_mutation>]() {
                    let f = $func;

                    let args = vec!["test"];
@@ -241,15 +241,15 @@ mod tests {
                        .map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
                        .collect::<Vec<_>>();

-                    let result = f.eval(FunctionContext::default(), &args).unwrap_err();
+                    let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
                    assert_eq!(
                        "Missing TableMutationHandler, not expected",
                        result.to_string()
                    );
                }

-                #[test]
-                fn [<test_ $name>]() {
+                #[tokio::test]
+                async fn [<test_ $name>]() {
                    let f = $func;


@@ -260,7 +260,7 @@ mod tests {
                        .map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
                        .collect::<Vec<_>>();

-                    let result = f.eval(FunctionContext::mock(), &args).unwrap();
+                    let result = f.eval(FunctionContext::mock(), &args).await.unwrap();

                    let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
                    assert_eq!(expect, result);
--- a/src/common/function/src/table/migrate_region.rs
+++ b/src/common/function/src/table/migrate_region.rs
@@ -123,7 +123,7 @@ mod tests {
    use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};

    use super::*;
-    use crate::function::{Function, FunctionContext};
+    use crate::function::{AsyncFunction, FunctionContext};

    #[test]
    fn test_migrate_region_misc() {
@@ -140,8 +140,8 @@ mod tests {
                         } if sigs.len() == 2));
    }

-    #[test]
-    fn test_missing_procedure_service() {
+    #[tokio::test]
+    async fn test_missing_procedure_service() {
        let f = MigrateRegionFunction;

        let args = vec![1, 1, 1];
@@ -151,15 +151,15 @@ mod tests {
            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
            .collect::<Vec<_>>();

-        let result = f.eval(FunctionContext::default(), &args).unwrap_err();
+        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
        assert_eq!(
            "Missing ProcedureServiceHandler, not expected",
            result.to_string()
        );
    }

-    #[test]
-    fn test_migrate_region() {
+    #[tokio::test]
+    async fn test_migrate_region() {
        let f = MigrateRegionFunction;

        let args = vec![1, 1, 1];
@@ -169,7 +169,7 @@ mod tests {
            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
            .collect::<Vec<_>>();

-        let result = f.eval(FunctionContext::mock(), &args).unwrap();
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();

        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
        assert_eq!(expect, result);
--- a/src/common/macro/src/admin_fn.rs
+++ b/src/common/macro/src/admin_fn.rs
@@ -187,7 +187,8 @@ fn build_struct(
        }


-        impl crate::function::Function for #name {
+        #[async_trait::async_trait]
+        impl crate::function::AsyncFunction for #name {
            fn name(&self) -> &'static str {
                #display_name
            }
@@ -200,7 +201,7 @@ fn build_struct(
                #sig_fn()
            }

-            fn eval(&self, func_ctx: crate::function::FunctionContext, columns: &[datatypes::vectors::VectorRef]) ->  common_query::error::Result<datatypes::vectors::VectorRef> {
+            async fn eval(&self, func_ctx: crate::function::FunctionContext, columns: &[datatypes::vectors::VectorRef]) ->  common_query::error::Result<datatypes::vectors::VectorRef> {
                // Ensure under the `greptime` catalog for security
                crate::ensure_greptime!(func_ctx);

@@ -212,51 +213,36 @@ fn build_struct(
                };
                let columns = Vec::from(columns);

-                // TODO(dennis): DataFusion doesn't support async UDF currently
-                std::thread::spawn(move || {
-                    use snafu::OptionExt;
-                    use datatypes::data_type::DataType;
+                use snafu::OptionExt;
+                use datatypes::data_type::DataType;

-                    let query_ctx = &func_ctx.query_ctx;
-                    let handler = func_ctx
-                        .state
-                        .#handler
-                        .as_ref()
-                        .context(#snafu_type)?;
+                let query_ctx = &func_ctx.query_ctx;
+                let handler = func_ctx
+                    .state
+                    .#handler
+                    .as_ref()
+                    .context(#snafu_type)?;

-                    let mut builder = store_api::storage::ConcreteDataType::#ret()
-                        .create_mutable_vector(rows_num);
+                let mut builder = store_api::storage::ConcreteDataType::#ret()
+                    .create_mutable_vector(rows_num);

-                    if columns_num == 0 {
-                        let result = common_runtime::block_on_global(async move {
-                            #fn_name(handler, query_ctx, &[]).await
-                        })?;
+                if columns_num == 0 {
+                    let result = #fn_name(handler, query_ctx, &[]).await?;
+
+                    builder.push_value_ref(result.as_value_ref());
+                } else {
+                    for i in 0..rows_num {
+                        let args: Vec<_> = columns.iter()
+                            .map(|vector| vector.get_ref(i))
+                            .collect();
+
+                        let result = #fn_name(handler, query_ctx, &args).await?;

                        builder.push_value_ref(result.as_value_ref());
-                    } else {
-                        for i in 0..rows_num {
-                            let args: Vec<_> = columns.iter()
-                                .map(|vector| vector.get_ref(i))
-                                .collect();
-
-                            let result = common_runtime::block_on_global(async move {
-                                #fn_name(handler, query_ctx, &args).await
-                            })?;
-
-                            builder.push_value_ref(result.as_value_ref());
-                        }
                    }
+                }

-                    Ok(builder.to_vector())
-                })
-                    .join()
-                    .map_err(|e| {
-                        common_telemetry::error!(e; "Join thread error");
-                        common_query::error::Error::ThreadJoin {
-                            location: snafu::Location::default(),
-                        }
-                    })?
-
+                Ok(builder.to_vector())
            }

        }
--- a/src/common/macro/src/lib.rs
+++ b/src/common/macro/src/lib.rs
@@ -76,6 +76,7 @@ pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
 ///    - `&ProcedureServiceHandlerRef` or `&TableMutationHandlerRef` or `FlowServiceHandlerRef` as the first argument,
 ///    - `&QueryContextRef` as the second argument, and
 ///    - `&[ValueRef<'_>]` as the third argument which is SQL function input values in each row.
+///
 /// Return type must be `common_query::error::Result<Value>`.
 ///
 /// # Example see `common/function/src/system/procedure_state.rs`.
--- a/src/common/mem-prof/Cargo.toml
+++ b/src/common/mem-prof/Cargo.toml
@@ -15,8 +15,8 @@ tempfile = "3.4"
 tokio.workspace = true

 [target.'cfg(not(windows))'.dependencies]
-tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
+tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }

 [target.'cfg(not(windows))'.dependencies.tikv-jemalloc-sys]
 features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
-version = "0.5"
+version = "0.6"
--- a/src/common/meta/src/lock_key.rs
+++ b/src/common/meta/src/lock_key.rs
@@ -172,8 +172,8 @@ impl From<TableLock> for StringKey {
 ///
 /// Note:
 /// - Allows modification the corresponding region's [TableRouteValue](crate::key::table_route::TableRouteValue),
-/// [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
-/// it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
+///   [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
+///   it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
 ///
 /// - Should acquire [TableLock] of the table at same procedure.
 ///
--- a/src/common/meta/src/wal_options_allocator/kafka/topic_manager.rs
+++ b/src/common/meta/src/wal_options_allocator/kafka/topic_manager.rs
@@ -76,6 +76,10 @@ impl TopicManager {
    /// The initializer first tries to restore persisted topics from the kv backend.
    /// If not enough topics retrieved, the initializer will try to contact the Kafka cluster and request creating more topics.
    pub async fn start(&self) -> Result<()> {
+        // Skip creating topics.
+        if !self.config.auto_create_topics {
+            return Ok(());
+        }
        let num_topics = self.config.kafka_topic.num_topics;
        ensure!(num_topics > 0, InvalidNumTopicsSnafu { num_topics });

--- a/src/common/procedure/src/local.rs
+++ b/src/common/procedure/src/local.rs
@@ -51,7 +51,7 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
 /// [Notify] is not a condition variable, we can't guarantee the waiters are notified
 /// if they didn't call `notified()` before we signal the notify. So we
 /// 1. use dedicated notify for each condition, such as waiting for a lock, waiting
-/// for children;
+///    for children;
 /// 2. always use `notify_one` and ensure there are only one waiter.
 #[derive(Debug)]
 pub(crate) struct ProcedureMeta {
--- a/src/common/test-util/src/lib.rs
+++ b/src/common/test-util/src/lib.rs
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#![feature(lazy_cell)]
-
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::sync::LazyLock;
--- a/src/common/version/build.rs
+++ b/src/common/version/build.rs
@@ -19,7 +19,11 @@ use build_data::{format_timestamp, get_source_time};
 use shadow_rs::{CARGO_METADATA, CARGO_TREE};

 fn main() -> shadow_rs::SdResult<()> {
-    println!("cargo:rerun-if-changed=.git/refs/heads");
+    println!(
+        "cargo:rerun-if-changed={}/.git/refs/heads",
+        env!("CARGO_RUSTC_CURRENT_DIR")
+    );
+
    println!(
        "cargo:rustc-env=SOURCE_TIMESTAMP={}",
        if let Ok(t) = get_source_time() {
--- a/src/common/wal/src/config.rs
+++ b/src/common/wal/src/config.rs
@@ -53,6 +53,7 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
                connection: config.connection,
                backoff: config.backoff,
                kafka_topic: config.kafka_topic,
+                auto_create_topics: config.auto_create_topics,
            }),
        }
    }
@@ -188,6 +189,7 @@ mod tests {
                replication_factor: 1,
                create_topic_timeout: Duration::from_secs(30),
            },
+            auto_create_topics: true,
        };
        assert_eq!(metasrv_wal_config, MetasrvWalConfig::Kafka(expected));

--- a/src/common/wal/src/config/kafka/common.rs
+++ b/src/common/wal/src/config/kafka/common.rs
@@ -187,7 +187,7 @@ impl Default for KafkaConnectionConfig {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
 pub struct KafkaTopicConfig {
-    /// Number of topics to be created upon start.
+    /// Number of topics.
    pub num_topics: usize,
    /// Number of partitions per topic.
    pub num_partitions: i32,
--- a/src/common/wal/src/config/kafka/datanode.rs
+++ b/src/common/wal/src/config/kafka/datanode.rs
@@ -40,6 +40,9 @@ pub struct DatanodeKafkaConfig {
    /// The kafka topic config.
    #[serde(flatten)]
    pub kafka_topic: KafkaTopicConfig,
+    // Automatically create topics for WAL.
+    pub auto_create_topics: bool,
+    // Create index for WAL.
    pub create_index: bool,
    #[serde(with = "humantime_serde")]
    pub dump_index_interval: Duration,
@@ -54,6 +57,7 @@ impl Default for DatanodeKafkaConfig {
            consumer_wait_timeout: Duration::from_millis(100),
            backoff: BackoffConfig::default(),
            kafka_topic: KafkaTopicConfig::default(),
+            auto_create_topics: true,
            create_index: true,
            dump_index_interval: Duration::from_secs(60),
        }
--- a/src/common/wal/src/config/kafka/metasrv.rs
+++ b/src/common/wal/src/config/kafka/metasrv.rs
@@ -18,7 +18,7 @@ use super::common::KafkaConnectionConfig;
 use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};

 /// Kafka wal configurations for metasrv.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 #[serde(default)]
 pub struct MetasrvKafkaConfig {
    /// The kafka connection config.
@@ -30,4 +30,17 @@ pub struct MetasrvKafkaConfig {
    /// The kafka config.
    #[serde(flatten)]
    pub kafka_topic: KafkaTopicConfig,
+    // Automatically create topics for WAL.
+    pub auto_create_topics: bool,
+}
+
+impl Default for MetasrvKafkaConfig {
+    fn default() -> Self {
+        Self {
+            connection: Default::default(),
+            backoff: Default::default(),
+            kafka_topic: Default::default(),
+            auto_create_topics: true,
+        }
+    }
 }
--- a/src/datanode/src/tests.rs
+++ b/src/datanode/src/tests.rs
@@ -27,7 +27,7 @@ use common_runtime::Runtime;
 use query::dataframe::DataFrame;
 use query::plan::LogicalPlan;
 use query::planner::LogicalPlanner;
-use query::query_engine::DescribeResult;
+use query::query_engine::{DescribeResult, QueryEngineState};
 use query::{QueryEngine, QueryEngineContext};
 use session::context::QueryContextRef;
 use store_api::metadata::RegionMetadataRef;
@@ -86,6 +86,9 @@ impl QueryEngine for MockQueryEngine {
    fn engine_context(&self, _query_ctx: QueryContextRef) -> QueryEngineContext {
        unimplemented!()
    }
+    fn engine_state(&self) -> &QueryEngineState {
+        unimplemented!()
+    }
 }

 /// Create a region server without any engine
--- a/src/flow/src/lib.rs
+++ b/src/flow/src/lib.rs
@@ -17,7 +17,6 @@
 //! It also contains definition of expression, adapter and plan, and internal state management.

 #![feature(let_chains)]
-#![feature(duration_abs_diff)]
 #![allow(dead_code)]
 #![warn(clippy::missing_docs_in_private_items)]
 #![warn(clippy::too_many_lines)]
--- a/src/frontend/src/instance.rs
+++ b/src/frontend/src/instance.rs
@@ -441,6 +441,9 @@ pub fn check_permission(
    }

    match stmt {
+        // Will be checked in execution.
+        // TODO(dennis): add a hook for admin commands.
+        Statement::Admin(_) => {}
        // These are executed by query engine, and will be checked there.
        Statement::Query(_) | Statement::Explain(_) | Statement::Tql(_) | Statement::Delete(_) => {}
        // database ops won't be checked
--- a/src/log-store/src/kafka/util/record.rs
+++ b/src/log-store/src/kafka/util/record.rs
@@ -40,8 +40,9 @@ pub(crate) const ESTIMATED_META_SIZE: usize = 256;
 /// - If the entry is able to fit into a Kafka record, it's converted into a Full record.
 ///
 /// - If the entry is too large to fit into a Kafka record, it's converted into a collection of records.
+///
 /// Those records must contain exactly one First record and one Last record, and potentially several
-/// Middle records. There may be no Middle record.
+///   Middle records. There may be no Middle record.
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 pub enum RecordType {
    /// The record is self-contained, i.e. an entry's data is fully stored into this record.
--- a/src/log-store/src/raft_engine.rs
+++ b/src/log-store/src/raft_engine.rs
@@ -23,6 +23,7 @@ use store_api::logstore::entry::{Entry, NaiveEntry};
 use store_api::logstore::provider::Provider;
 use store_api::storage::RegionId;

+#[allow(renamed_and_removed_lints)]
 pub mod protos {
    include!(concat!(env!("OUT_DIR"), concat!("/", "protos/", "mod.rs")));
 }
--- a/src/meta-srv/src/lib.rs
+++ b/src/meta-srv/src/lib.rs
@@ -15,8 +15,8 @@
 #![feature(async_closure)]
 #![feature(result_flattening)]
 #![feature(assert_matches)]
-#![feature(option_take_if)]
 #![feature(extract_if)]
+#![feature(option_take_if)]

 pub mod bootstrap;
 mod cache_invalidator;
--- a/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs
@@ -33,8 +33,8 @@ impl UpdateMetadata {
    /// About the failure of updating the [TableRouteValue](common_meta::key::table_region::TableRegionValue):
    ///
    /// - There may be another [RegionMigrationProcedure](crate::procedure::region_migration::RegionMigrationProcedure)
-    /// that is executed concurrently for **other region**.
-    /// It will only update **other region** info. Therefore, It's safe to retry after failure.
+    ///   that is executed concurrently for **other region**.
+    ///   It will only update **other region** info. Therefore, It's safe to retry after failure.
    ///
    /// - There is no other DDL procedure executed concurrently for the current table.
    pub async fn downgrade_leader_region(&self, ctx: &mut Context) -> Result<()> {
--- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs
@@ -122,7 +122,7 @@ impl UpdateMetadata {
    ///
    /// Abort(non-retry):
    /// - TableRoute or RegionRoute is not found.
-    /// Typically, it's impossible, there is no other DDL procedure executed concurrently for the current table.
+    ///   Typically, it's impossible, there is no other DDL procedure executed concurrently for the current table.
    ///
    /// Retry:
    /// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).
--- a/src/meta-srv/src/selector/weighted_choose.rs
+++ b/src/meta-srv/src/selector/weighted_choose.rs
@@ -27,6 +27,7 @@ pub trait WeightedChoose<Item>: Send + Sync {
    /// Note:
    /// 1. make sure weight_array is not empty.
    /// 2. the total weight is greater than 0.
+    ///
    /// Otherwise an error will be returned.
    fn set_weight_array(&mut self, weight_array: Vec<WeightedItem<Item>>) -> Result<()>;

--- a/src/meta-srv/src/service/store/cached_kv.rs
+++ b/src/meta-srv/src/service/store/cached_kv.rs
@@ -61,9 +61,9 @@ impl CheckLeader for RwLock<State> {
 /// To use this cache, the following constraints must be followed:
 ///   1. The leader node can create this metadata.
 ///   2. The follower node can create this metadata. The leader node can lazily retrieve
-///     the corresponding data through the caching loading mechanism.
+///      the corresponding data through the caching loading mechanism.
 ///   3. Only the leader node can update this metadata, as the cache cannot detect
-///     modifications made to the data on the follower node.
+///      modifications made to the data on the follower node.
 ///   4. Only the leader node can delete this metadata for the same reason mentioned above.
 pub struct LeaderCachedKvBackend {
    check_leader: CheckLeaderRef,
--- a/src/metric-engine/src/error.rs
+++ b/src/metric-engine/src/error.rs
@@ -223,7 +223,7 @@ pub enum Error {

    #[snafu(display("Unsupported region request: {}", request))]
    UnsupportedRegionRequest {
-        request: RegionRequest,
+        request: Box<RegionRequest>,
        #[snafu(implicit)]
        location: Location,
    },
--- a/src/mito2/src/access_layer.rs
+++ b/src/mito2/src/access_layer.rs
@@ -175,14 +175,6 @@ impl AccessLayer {

        Ok(sst_info)
    }
-    /// Returns whether the file exists in the object store.
-    pub(crate) async fn is_exist(&self, file_meta: &FileMeta) -> Result<bool> {
-        let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
-        self.object_store
-            .is_exist(&path)
-            .await
-            .context(OpenDalSnafu)
-    }
 }

 /// `OperationType` represents the origin of the `SstWriteRequest`.
--- a/src/mito2/src/compaction.rs
+++ b/src/mito2/src/compaction.rs
@@ -48,8 +48,8 @@ use crate::compaction::picker::{new_picker, CompactionTask};
 use crate::compaction::task::CompactionTaskImpl;
 use crate::config::MitoConfig;
 use crate::error::{
-    CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result,
-    TimeRangePredicateOverflowSnafu,
+    CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu,
+    RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
 };
 use crate::metrics::COMPACTION_STAGE_ELAPSED;
 use crate::read::projection::ProjectionMapper;
@@ -314,6 +314,16 @@ impl CompactionScheduler {
                        return Ok(());
                    }
                    Err(e) => {
+                        if !current_version.options.compaction.fallback_to_local() {
+                            error!(e; "Failed to schedule remote compaction job for region {}", region_id);
+                            return RemoteCompactionSnafu {
+                                region_id,
+                                job_id: None,
+                                reason: e.reason,
+                            }
+                            .fail();
+                        }
+
                        error!(e; "Failed to schedule remote compaction job for region {}, fallback to local compaction", region_id);

                        // Return the waiters back to the caller for local compaction.
@@ -531,7 +541,10 @@ impl<'a> CompactionSstReaderBuilder<'a> {
                scan_input.with_predicate(time_range_to_predicate(time_range, &self.metadata)?);
        }

-        SeqScan::new(scan_input).build_reader().await
+        SeqScan::new(scan_input)
+            .with_compaction()
+            .build_reader()
+            .await
    }
 }

--- a/src/mito2/src/config.rs
+++ b/src/mito2/src/config.rs
@@ -143,7 +143,7 @@ impl Default for MitoConfig {
            enable_experimental_write_cache: false,
            experimental_write_cache_path: String::new(),
            experimental_write_cache_size: ReadableSize::mb(512),
-            experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60)),
+            experimental_write_cache_ttl: None,
            sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
            scan_parallelism: divide_num_cpus(4),
            parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -757,14 +757,14 @@ pub enum Error {
    },

    #[snafu(display(
-        "Failed to remotely compact region {} by job {} due to {}",
+        "Failed to remotely compact region {} by job {:?} due to {}",
        region_id,
        job_id,
        reason
    ))]
    RemoteCompaction {
        region_id: RegionId,
-        job_id: JobId,
+        job_id: Option<JobId>,
        reason: String,
        #[snafu(implicit)]
        location: Location,
--- a/src/mito2/src/read.rs
+++ b/src/mito2/src/read.rs
@@ -56,6 +56,7 @@ use crate::error::{
 use crate::memtable::BoxedBatchIterator;
 use crate::metrics::{READ_BATCHES_RETURN, READ_ROWS_RETURN, READ_STAGE_ELAPSED};
 use crate::read::prune::PruneReader;
+use crate::sst::parquet::reader::{ReaderFilterMetrics, ReaderMetrics};

 /// Storage internal representation of a batch of rows for a primary key (time series).
 ///
@@ -752,11 +753,13 @@ pub(crate) struct ScannerMetrics {
    num_batches: usize,
    /// Number of rows returned.
    num_rows: usize,
+    /// Filter related metrics for readers.
+    filter_metrics: ReaderFilterMetrics,
 }

 impl ScannerMetrics {
    /// Sets and observes metrics on initializing parts.
-    fn observe_init_part(&mut self, build_parts_cost: Duration) {
+    fn observe_init_part(&mut self, build_parts_cost: Duration, reader_metrics: &ReaderMetrics) {
        self.build_parts_cost = build_parts_cost;

        // Observes metrics.
@@ -766,6 +769,11 @@ impl ScannerMetrics {
        READ_STAGE_ELAPSED
            .with_label_values(&["build_parts"])
            .observe(self.build_parts_cost.as_secs_f64());
+
+        // We only call this once so we overwrite it directly.
+        self.filter_metrics = reader_metrics.filter_metrics;
+        // Observes filter metrics.
+        self.filter_metrics.observe();
    }

    /// Observes metrics on scanner finish.
--- a/src/mito2/src/read/merge.rs
+++ b/src/mito2/src/read/merge.rs
@@ -31,7 +31,7 @@ use crate::read::{Batch, BatchReader, BoxedBatchReader, Source};
 ///
 /// The merge reader merges [Batch]es from multiple sources that yield sorted batches.
 /// 1. Batch is ordered by primary key, time index, sequence desc, op type desc (we can
-/// ignore op type as sequence is already unique).
+///    ignore op type as sequence is already unique).
 /// 2. Batches from sources **must** not be empty.
 ///
 /// The reader won't concatenate batches. Each batch returned by the reader also doesn't
--- a/src/mito2/src/read/prune.rs
+++ b/src/mito2/src/read/prune.rs
@@ -97,13 +97,13 @@ impl PruneReader {
        let num_rows_before_filter = batch.num_rows();
        let Some(batch_filtered) = self.context.precise_filter(batch)? else {
            // the entire batch is filtered out
-            self.metrics.num_rows_precise_filtered += num_rows_before_filter;
+            self.metrics.filter_metrics.num_rows_precise_filtered += num_rows_before_filter;
            return Ok(None);
        };

        // update metric
        let filtered_rows = num_rows_before_filter - batch_filtered.num_rows();
-        self.metrics.num_rows_precise_filtered += filtered_rows;
+        self.metrics.filter_metrics.num_rows_precise_filtered += filtered_rows;

        if !batch_filtered.is_empty() {
            Ok(Some(batch_filtered))
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -50,6 +50,7 @@ use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef;
 use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
 use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef;
 use crate::sst::parquet::file_range::FileRange;
+use crate::sst::parquet::reader::ReaderMetrics;

 /// A scanner scans a region and returns a [SendableRecordBatchStream].
 pub(crate) enum Scanner {
@@ -606,8 +607,9 @@ impl ScanInput {
    pub(crate) async fn prune_file_ranges(
        &self,
        collector: &mut impl FileRangeCollector,
-    ) -> Result<()> {
+    ) -> Result<ReaderMetrics> {
        let mut file_prune_cost = Duration::ZERO;
+        let mut reader_metrics = ReaderMetrics::default();
        for file in &self.files {
            let prune_start = Instant::now();
            let res = self
@@ -620,7 +622,7 @@ impl ScanInput {
                .inverted_index_applier(self.inverted_index_applier.clone())
                .fulltext_index_applier(self.fulltext_index_applier.clone())
                .expected_metadata(Some(self.mapper.metadata().clone()))
-                .build_reader_input()
+                .build_reader_input(&mut reader_metrics)
                .await;
            file_prune_cost += prune_start.elapsed();
            let (mut file_range_ctx, row_groups) = match res {
@@ -665,7 +667,7 @@ impl ScanInput {
            file_prune_cost
        );

-        Ok(())
+        Ok(reader_metrics)
    }

    /// Scans the input source in another task and sends batches to the sender.
--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -59,6 +59,8 @@ pub struct SeqScan {
    /// Semaphore to control scan parallelism of files.
    /// Streams created by the scanner share the same semaphore.
    semaphore: Arc<Semaphore>,
+    /// The scanner is used for compaction.
+    compaction: bool,
 }

 impl SeqScan {
@@ -75,9 +77,16 @@ impl SeqScan {
            properties,
            stream_ctx,
            semaphore: Arc::new(Semaphore::new(parallelism)),
+            compaction: false,
        }
    }

+    /// Sets the scanner to be used for compaction.
+    pub(crate) fn with_compaction(mut self) -> Self {
+        self.compaction = true;
+        self
+    }
+
    /// Builds a stream for the query.
    ///
    /// The returned stream is not partitioned and will contains all the data. If want
@@ -97,9 +106,13 @@ impl SeqScan {
            prepare_scan_cost: self.stream_ctx.query_start.elapsed(),
            ..Default::default()
        };
-        let maybe_reader =
-            Self::build_all_merge_reader(&self.stream_ctx, self.semaphore.clone(), &mut metrics)
-                .await?;
+        let maybe_reader = Self::build_all_merge_reader(
+            &self.stream_ctx,
+            self.semaphore.clone(),
+            &mut metrics,
+            self.compaction,
+        )
+        .await?;
        // Safety: `build_merge_reader()` always returns a reader if partition is None.
        let reader = maybe_reader.unwrap();
        Ok(Box::new(reader))
@@ -110,6 +123,7 @@ impl SeqScan {
        part: &ScanPart,
        sources: &mut Vec<Source>,
        row_selector: Option<TimeSeriesRowSelector>,
+        compaction: bool,
    ) -> Result<()> {
        sources.reserve(part.memtable_ranges.len() + part.file_ranges.len());
        // Read memtables.
@@ -117,6 +131,11 @@ impl SeqScan {
            let iter = mem.build_iter()?;
            sources.push(Source::Iter(iter));
        }
+        let read_type = if compaction {
+            "compaction"
+        } else {
+            "seq_scan_files"
+        };
        // Read files.
        for file in &part.file_ranges {
            if file.is_empty() {
@@ -148,6 +167,8 @@ impl SeqScan {
                    "Seq scan region {}, file {}, {} ranges finished, metrics: {:?}",
                    region_id, file_id, range_num, reader_metrics
                );
+                // Reports metrics.
+                reader_metrics.observe_rows(read_type);
            };
            let stream = Box::pin(stream);
            sources.push(Source::Stream(stream));
@@ -161,6 +182,7 @@ impl SeqScan {
        stream_ctx: &StreamContext,
        semaphore: Arc<Semaphore>,
        metrics: &mut ScannerMetrics,
+        compaction: bool,
    ) -> Result<Option<BoxedBatchReader>> {
        // initialize parts list
        let mut parts = stream_ctx.parts.lock().await;
@@ -173,7 +195,7 @@ impl SeqScan {
                return Ok(None);
            };

-            Self::build_part_sources(part, &mut sources, None)?;
+            Self::build_part_sources(part, &mut sources, None, compaction)?;
        }

        Self::build_reader_from_sources(stream_ctx, sources, semaphore).await
@@ -187,6 +209,7 @@ impl SeqScan {
        range_id: usize,
        semaphore: Arc<Semaphore>,
        metrics: &mut ScannerMetrics,
+        compaction: bool,
    ) -> Result<Option<BoxedBatchReader>> {
        let mut sources = Vec::new();
        let build_start = {
@@ -198,7 +221,12 @@ impl SeqScan {
            };

            let build_start = Instant::now();
-            Self::build_part_sources(part, &mut sources, stream_ctx.input.series_row_selector)?;
+            Self::build_part_sources(
+                part,
+                &mut sources,
+                stream_ctx.input.series_row_selector,
+                compaction,
+            )?;

            build_start
        };
@@ -281,12 +309,13 @@ impl SeqScan {
        let stream_ctx = self.stream_ctx.clone();
        let semaphore = self.semaphore.clone();
        let partition_ranges = self.properties.partitions[partition].clone();
+        let compaction = self.compaction;
        let stream = try_stream! {
            let first_poll = stream_ctx.query_start.elapsed();

            for partition_range in partition_ranges {
                let maybe_reader =
-                    Self::build_merge_reader(&stream_ctx, partition_range.identifier, semaphore.clone(), &mut metrics)
+                    Self::build_merge_reader(&stream_ctx, partition_range.identifier, semaphore.clone(), &mut metrics, compaction)
                        .await
                        .map_err(BoxedError::new)
                        .context(ExternalSnafu)?;
@@ -359,6 +388,7 @@ impl SeqScan {
        };
        let stream_ctx = self.stream_ctx.clone();
        let semaphore = self.semaphore.clone();
+        let compaction = self.compaction;

        // build stream
        let stream = try_stream! {
@@ -379,6 +409,7 @@ impl SeqScan {
                    id,
                    semaphore.clone(),
                    &mut metrics,
+                    compaction,
                )
                .await
                .map_err(BoxedError::new)
@@ -439,7 +470,7 @@ impl SeqScan {
        if part_list.0.is_none() {
            let now = Instant::now();
            let mut distributor = SeqDistributor::default();
-            input.prune_file_ranges(&mut distributor).await?;
+            let reader_metrics = input.prune_file_ranges(&mut distributor).await?;
            distributor.append_mem_ranges(
                &input.memtables,
                Some(input.mapper.column_ids()),
@@ -451,7 +482,7 @@ impl SeqScan {
            let build_part_cost = now.elapsed();
            part_list.1 = build_part_cost;

-            metrics.observe_init_part(build_part_cost);
+            metrics.observe_init_part(build_part_cost, &reader_metrics);
        } else {
            // Updates the cost of building parts.
            metrics.build_parts_cost = part_list.1;
--- a/src/mito2/src/read/unordered_scan.rs
+++ b/src/mito2/src/read/unordered_scan.rs
@@ -211,6 +211,7 @@ impl RegionScanner for UnorderedScan {
                }
            }

+            reader_metrics.observe_rows("unordered_scan_files");
            metrics.total_cost = query_start.elapsed();
            metrics.observe_metrics_on_finish();
            debug!(
@@ -263,7 +264,7 @@ async fn maybe_init_parts(
    if part_list.0.is_none() {
        let now = Instant::now();
        let mut distributor = UnorderedDistributor::default();
-        input.prune_file_ranges(&mut distributor).await?;
+        let reader_metrics = input.prune_file_ranges(&mut distributor).await?;
        distributor.append_mem_ranges(
            &input.memtables,
            Some(input.mapper.column_ids()),
@@ -275,7 +276,7 @@ async fn maybe_init_parts(
        let build_part_cost = now.elapsed();
        part_list.1 = build_part_cost;

-        metrics.observe_init_part(build_part_cost);
+        metrics.observe_init_part(build_part_cost, &reader_metrics);
    } else {
        // Updates the cost of building parts.
        metrics.build_parts_cost = part_list.1;
--- a/src/mito2/src/region/options.rs
+++ b/src/mito2/src/region/options.rs
@@ -170,6 +170,12 @@ impl CompactionOptions {
            CompactionOptions::Twcs(opts) => opts.remote_compaction,
        }
    }
+
+    pub(crate) fn fallback_to_local(&self) -> bool {
+        match self {
+            CompactionOptions::Twcs(opts) => opts.fallback_to_local,
+        }
+    }
 }

 impl Default for CompactionOptions {
@@ -201,6 +207,9 @@ pub struct TwcsOptions {
    /// Whether to use remote compaction.
    #[serde_as(as = "DisplayFromStr")]
    pub remote_compaction: bool,
+    /// Whether to fall back to local compaction if remote compaction fails.
+    #[serde_as(as = "DisplayFromStr")]
+    pub fallback_to_local: bool,
 }

 with_prefix!(prefix_twcs "compaction.twcs.");
@@ -228,6 +237,7 @@ impl Default for TwcsOptions {
            max_inactive_window_files: 1,
            time_window: None,
            remote_compaction: false,
+            fallback_to_local: true,
        }
    }
 }
@@ -590,6 +600,7 @@ mod tests {
            ("compaction.twcs.time_window", "2h"),
            ("compaction.type", "twcs"),
            ("compaction.twcs.remote_compaction", "false"),
+            ("compaction.twcs.fallback_to_local", "true"),
            ("storage", "S3"),
            ("append_mode", "false"),
            ("index.inverted_index.ignore_column_ids", "1,2,3"),
@@ -614,6 +625,7 @@ mod tests {
                max_inactive_window_files: 3,
                time_window: Some(Duration::from_secs(3600 * 2)),
                remote_compaction: false,
+                fallback_to_local: true,
            }),
            storage: Some("S3".to_string()),
            append_mode: false,
@@ -645,6 +657,7 @@ mod tests {
                max_inactive_window_files: usize::MAX,
                time_window: Some(Duration::from_secs(3600 * 2)),
                remote_compaction: false,
+                fallback_to_local: true,
            }),
            storage: Some("S3".to_string()),
            append_mode: false,
@@ -710,6 +723,7 @@ mod tests {
                max_inactive_window_files: 7,
                time_window: Some(Duration::from_secs(3600 * 2)),
                remote_compaction: false,
+                fallback_to_local: true,
            }),
            storage: Some("S3".to_string()),
            append_mode: false,
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -52,39 +52,27 @@ pub struct IndexOutput {
    pub fulltext_index: FulltextIndexOutput,
 }

+/// Base output of the index creation.
+#[derive(Debug, Clone, Default)]
+pub struct IndexBaseOutput {
+    /// Size of the index.
+    pub index_size: ByteCount,
+    /// Number of rows in the index.
+    pub row_count: RowCount,
+    /// Available columns in the index.
+    pub columns: Vec<ColumnId>,
+}
+
+impl IndexBaseOutput {
+    pub fn is_available(&self) -> bool {
+        self.index_size > 0
+    }
+}
+
 /// Output of the inverted index creation.
-#[derive(Debug, Clone, Default)]
-pub struct InvertedIndexOutput {
-    /// Size of the index.
-    pub index_size: ByteCount,
-    /// Number of rows in the index.
-    pub row_count: RowCount,
-    /// Available columns in the index.
-    pub columns: Vec<ColumnId>,
-}
-
+pub type InvertedIndexOutput = IndexBaseOutput;
 /// Output of the fulltext index creation.
-#[derive(Debug, Clone, Default)]
-pub struct FulltextIndexOutput {
-    /// Size of the index.
-    pub index_size: ByteCount,
-    /// Number of rows in the index.
-    pub row_count: RowCount,
-    /// Available columns in the index.
-    pub columns: Vec<ColumnId>,
-}
-
-impl InvertedIndexOutput {
-    pub fn is_available(&self) -> bool {
-        self.index_size > 0
-    }
-}
-
-impl FulltextIndexOutput {
-    pub fn is_available(&self) -> bool {
-        self.index_size > 0
-    }
-}
+pub type FulltextIndexOutput = IndexBaseOutput;

 /// The index creator that hides the error handling details.
 #[derive(Default)]
--- a/src/mito2/src/sst/parquet/reader.rs
+++ b/src/mito2/src/sst/parquet/reader.rs
@@ -174,14 +174,19 @@ impl ParquetReaderBuilder {
    ///
    /// This needs to perform IO operation.
    pub async fn build(&self) -> Result<ParquetReader> {
-        let (context, row_groups) = self.build_reader_input().await?;
+        let mut metrics = ReaderMetrics::default();
+
+        let (context, row_groups) = self.build_reader_input(&mut metrics).await?;
        ParquetReader::new(Arc::new(context), row_groups).await
    }

    /// Builds a [FileRangeContext] and collects row groups to read.
    ///
    /// This needs to perform IO operation.
-    pub(crate) async fn build_reader_input(&self) -> Result<(FileRangeContext, RowGroupMap)> {
+    pub(crate) async fn build_reader_input(
+        &self,
+        metrics: &mut ReaderMetrics,
+    ) -> Result<(FileRangeContext, RowGroupMap)> {
        let start = Instant::now();

        let file_path = self.file_handle.file_path(&self.file_dir);
@@ -219,10 +224,8 @@ impl ParquetReaderBuilder {
            parquet_to_arrow_field_levels(parquet_schema_desc, projection_mask.clone(), hint)
                .context(ReadParquetSnafu { path: &file_path })?;

-        let mut metrics = ReaderMetrics::default();
-
        let row_groups = self
-            .row_groups_to_read(&read_format, &parquet_meta, &mut metrics)
+            .row_groups_to_read(&read_format, &parquet_meta, &mut metrics.filter_metrics)
            .await;

        let reader_builder = RowGroupReaderBuilder {
@@ -336,7 +339,7 @@ impl ParquetReaderBuilder {
        &self,
        read_format: &ReadFormat,
        parquet_meta: &ParquetMetaData,
-        metrics: &mut ReaderMetrics,
+        metrics: &mut ReaderFilterMetrics,
    ) -> BTreeMap<usize, Option<RowSelection>> {
        let num_row_groups = parquet_meta.num_row_groups();
        let num_rows = parquet_meta.file_metadata().num_rows();
@@ -382,7 +385,7 @@ impl ParquetReaderBuilder {
        row_group_size: usize,
        parquet_meta: &ParquetMetaData,
        output: &mut BTreeMap<usize, Option<RowSelection>>,
-        metrics: &mut ReaderMetrics,
+        metrics: &mut ReaderFilterMetrics,
    ) -> bool {
        let Some(index_applier) = &self.fulltext_index_applier else {
            return false;
@@ -462,7 +465,7 @@ impl ParquetReaderBuilder {
        row_group_size: usize,
        parquet_meta: &ParquetMetaData,
        output: &mut BTreeMap<usize, Option<RowSelection>>,
-        metrics: &mut ReaderMetrics,
+        metrics: &mut ReaderFilterMetrics,
    ) -> bool {
        let Some(index_applier) = &self.inverted_index_applier else {
            return false;
@@ -529,7 +532,7 @@ impl ParquetReaderBuilder {
        read_format: &ReadFormat,
        parquet_meta: &ParquetMetaData,
        output: &mut BTreeMap<usize, Option<RowSelection>>,
-        metrics: &mut ReaderMetrics,
+        metrics: &mut ReaderFilterMetrics,
    ) -> bool {
        let Some(predicate) = &self.predicate else {
            return false;
@@ -724,9 +727,9 @@ fn time_range_to_predicate(
    Ok(predicates)
 }

-/// Parquet reader metrics.
-#[derive(Debug, Default, Clone)]
-pub(crate) struct ReaderMetrics {
+/// Metrics of filtering rows groups and rows.
+#[derive(Debug, Default, Clone, Copy)]
+pub(crate) struct ReaderFilterMetrics {
    /// Number of row groups before filtering.
    pub(crate) num_row_groups_before_filtering: usize,
    /// Number of row groups filtered by fulltext index.
@@ -743,6 +746,57 @@ pub(crate) struct ReaderMetrics {
    pub(crate) num_rows_in_row_group_fulltext_index_filtered: usize,
    /// Number of rows in row group filtered by inverted index.
    pub(crate) num_rows_in_row_group_inverted_index_filtered: usize,
+}
+
+impl ReaderFilterMetrics {
+    /// Adds `other` metrics to this metrics.
+    pub(crate) fn merge_from(&mut self, other: &ReaderFilterMetrics) {
+        self.num_row_groups_before_filtering += other.num_row_groups_before_filtering;
+        self.num_row_groups_fulltext_index_filtered += other.num_row_groups_fulltext_index_filtered;
+        self.num_row_groups_inverted_index_filtered += other.num_row_groups_inverted_index_filtered;
+        self.num_row_groups_min_max_filtered += other.num_row_groups_min_max_filtered;
+        self.num_rows_precise_filtered += other.num_rows_precise_filtered;
+        self.num_rows_in_row_group_before_filtering += other.num_rows_in_row_group_before_filtering;
+        self.num_rows_in_row_group_fulltext_index_filtered +=
+            other.num_rows_in_row_group_fulltext_index_filtered;
+        self.num_rows_in_row_group_inverted_index_filtered +=
+            other.num_rows_in_row_group_inverted_index_filtered;
+    }
+
+    /// Reports metrics.
+    pub(crate) fn observe(&self) {
+        READ_ROW_GROUPS_TOTAL
+            .with_label_values(&["before_filtering"])
+            .inc_by(self.num_row_groups_before_filtering as u64);
+        READ_ROW_GROUPS_TOTAL
+            .with_label_values(&["fulltext_index_filtered"])
+            .inc_by(self.num_row_groups_fulltext_index_filtered as u64);
+        READ_ROW_GROUPS_TOTAL
+            .with_label_values(&["inverted_index_filtered"])
+            .inc_by(self.num_row_groups_inverted_index_filtered as u64);
+        READ_ROW_GROUPS_TOTAL
+            .with_label_values(&["minmax_index_filtered"])
+            .inc_by(self.num_row_groups_min_max_filtered as u64);
+        PRECISE_FILTER_ROWS_TOTAL
+            .with_label_values(&["parquet"])
+            .inc_by(self.num_rows_precise_filtered as u64);
+        READ_ROWS_IN_ROW_GROUP_TOTAL
+            .with_label_values(&["before_filtering"])
+            .inc_by(self.num_rows_in_row_group_before_filtering as u64);
+        READ_ROWS_IN_ROW_GROUP_TOTAL
+            .with_label_values(&["fulltext_index_filtered"])
+            .inc_by(self.num_rows_in_row_group_fulltext_index_filtered as u64);
+        READ_ROWS_IN_ROW_GROUP_TOTAL
+            .with_label_values(&["inverted_index_filtered"])
+            .inc_by(self.num_rows_in_row_group_inverted_index_filtered as u64);
+    }
+}
+
+/// Parquet reader metrics.
+#[derive(Debug, Default, Clone)]
+pub(crate) struct ReaderMetrics {
+    /// Filtered row groups and rows metrics.
+    pub(crate) filter_metrics: ReaderFilterMetrics,
    /// Duration to build the parquet reader.
    pub(crate) build_cost: Duration,
    /// Duration to scan the reader.
@@ -758,22 +812,20 @@ pub(crate) struct ReaderMetrics {
 impl ReaderMetrics {
    /// Adds `other` metrics to this metrics.
    pub(crate) fn merge_from(&mut self, other: &ReaderMetrics) {
-        self.num_row_groups_before_filtering += other.num_row_groups_before_filtering;
-        self.num_row_groups_fulltext_index_filtered += other.num_row_groups_fulltext_index_filtered;
-        self.num_row_groups_inverted_index_filtered += other.num_row_groups_inverted_index_filtered;
-        self.num_row_groups_min_max_filtered += other.num_row_groups_min_max_filtered;
-        self.num_rows_precise_filtered += other.num_rows_precise_filtered;
-        self.num_rows_in_row_group_before_filtering += other.num_rows_in_row_group_before_filtering;
-        self.num_rows_in_row_group_fulltext_index_filtered +=
-            other.num_rows_in_row_group_fulltext_index_filtered;
-        self.num_rows_in_row_group_inverted_index_filtered +=
-            other.num_rows_in_row_group_inverted_index_filtered;
+        self.filter_metrics.merge_from(&other.filter_metrics);
        self.build_cost += other.build_cost;
        self.scan_cost += other.scan_cost;
        self.num_record_batches += other.num_record_batches;
        self.num_batches += other.num_batches;
        self.num_rows += other.num_rows;
    }
+
+    /// Reports total rows.
+    pub(crate) fn observe_rows(&self, read_type: &str) {
+        READ_ROWS_TOTAL
+            .with_label_values(&[read_type])
+            .inc_by(self.num_rows as u64);
+    }
 }

 /// Builder to build a [ParquetRecordBatchReader] for a row group.
@@ -1006,10 +1058,12 @@ impl Drop for ParquetReader {
            self.context.reader_builder().file_handle.region_id(),
            self.context.reader_builder().file_handle.file_id(),
            self.context.reader_builder().file_handle.time_range(),
-            metrics.num_row_groups_before_filtering
-                - metrics.num_row_groups_inverted_index_filtered
-                - metrics.num_row_groups_min_max_filtered,
-            metrics.num_row_groups_before_filtering,
+            metrics.filter_metrics.num_row_groups_before_filtering
+                - metrics
+                    .filter_metrics
+                    .num_row_groups_inverted_index_filtered
+                - metrics.filter_metrics.num_row_groups_min_max_filtered,
+            metrics.filter_metrics.num_row_groups_before_filtering,
            metrics
        );

@@ -1020,33 +1074,8 @@ impl Drop for ParquetReader {
        READ_STAGE_ELAPSED
            .with_label_values(&["scan_row_groups"])
            .observe(metrics.scan_cost.as_secs_f64());
-        READ_ROWS_TOTAL
-            .with_label_values(&["parquet"])
-            .inc_by(metrics.num_rows as u64);
-        READ_ROW_GROUPS_TOTAL
-            .with_label_values(&["before_filtering"])
-            .inc_by(metrics.num_row_groups_before_filtering as u64);
-        READ_ROW_GROUPS_TOTAL
-            .with_label_values(&["fulltext_index_filtered"])
-            .inc_by(metrics.num_row_groups_fulltext_index_filtered as u64);
-        READ_ROW_GROUPS_TOTAL
-            .with_label_values(&["inverted_index_filtered"])
-            .inc_by(metrics.num_row_groups_inverted_index_filtered as u64);
-        READ_ROW_GROUPS_TOTAL
-            .with_label_values(&["minmax_index_filtered"])
-            .inc_by(metrics.num_row_groups_min_max_filtered as u64);
-        PRECISE_FILTER_ROWS_TOTAL
-            .with_label_values(&["parquet"])
-            .inc_by(metrics.num_rows_precise_filtered as u64);
-        READ_ROWS_IN_ROW_GROUP_TOTAL
-            .with_label_values(&["before_filtering"])
-            .inc_by(metrics.num_rows_in_row_group_before_filtering as u64);
-        READ_ROWS_IN_ROW_GROUP_TOTAL
-            .with_label_values(&["fulltext_index_filtered"])
-            .inc_by(metrics.num_rows_in_row_group_fulltext_index_filtered as u64);
-        READ_ROWS_IN_ROW_GROUP_TOTAL
-            .with_label_values(&["inverted_index_filtered"])
-            .inc_by(metrics.num_rows_in_row_group_inverted_index_filtered as u64);
+        metrics.observe_rows("parquet_reader");
+        metrics.filter_metrics.observe();
    }
 }

--- a/src/mito2/src/test_util.rs
+++ b/src/mito2/src/test_util.rs
@@ -960,6 +960,7 @@ pub fn build_rows(start: usize, end: usize) -> Vec<Row> {
 /// - `key`: A string key that is common across all rows.
 /// - `timestamps`: Array of timestamp values.
 /// - `fields`: Array of tuples where each tuple contains two optional i64 values, representing two optional float fields.
+///
 /// Returns a vector of `Row` each containing the key, two optional float fields, and a timestamp.
 pub fn build_rows_with_fields(
    key: &str,
--- a/src/mito2/src/wal/entry_reader.rs
+++ b/src/mito2/src/wal/entry_reader.rs
@@ -15,6 +15,7 @@
 use api::v1::WalEntry;
 use async_stream::stream;
 use futures::StreamExt;
+use object_store::Buffer;
 use prost::Message;
 use snafu::{ensure, ResultExt};
 use store_api::logstore::entry::Entry;
@@ -28,13 +29,20 @@ pub(crate) fn decode_raw_entry(raw_entry: Entry) -> Result<(EntryId, WalEntry)>
    let entry_id = raw_entry.entry_id();
    let region_id = raw_entry.region_id();
    ensure!(raw_entry.is_complete(), CorruptedEntrySnafu { region_id });
-    // TODO(weny): implement the [Buf] for return value, avoid extra memory allocation.
-    let bytes = raw_entry.into_bytes();
-    let wal_entry = WalEntry::decode(bytes.as_slice()).context(DecodeWalSnafu { region_id })?;
-
+    let buffer = into_buffer(raw_entry);
+    let wal_entry = WalEntry::decode(buffer).context(DecodeWalSnafu { region_id })?;
    Ok((entry_id, wal_entry))
 }

+fn into_buffer(raw_entry: Entry) -> Buffer {
+    match raw_entry {
+        Entry::Naive(entry) => Buffer::from(entry.data),
+        Entry::MultiplePart(entry) => {
+            Buffer::from_iter(entry.parts.into_iter().map(bytes::Bytes::from))
+        }
+    }
+}
+
 /// [WalEntryReader] provides the ability to read and decode entries from the underlying store.
 ///
 /// Notes: It will consume the inner stream and only allow invoking the `read` at once.
--- a/src/mito2/src/worker/handle_manifest.rs
+++ b/src/mito2/src/worker/handle_manifest.rs
@@ -19,10 +19,9 @@
 use std::collections::{HashMap, VecDeque};

 use common_telemetry::{info, warn};
-use snafu::ensure;
 use store_api::storage::RegionId;

-use crate::error::{InvalidRequestSnafu, RegionBusySnafu, RegionNotFoundSnafu, Result};
+use crate::error::{RegionBusySnafu, RegionNotFoundSnafu, Result};
 use crate::manifest::action::{
    RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionTruncate,
 };
@@ -289,20 +288,6 @@ impl<S> RegionWorkerLoop<S> {
 /// Checks the edit, writes and applies it.
 async fn edit_region(region: &MitoRegionRef, edit: RegionEdit) -> Result<()> {
    let region_id = region.region_id;
-    for file_meta in &edit.files_to_add {
-        let is_exist = region.access_layer.is_exist(file_meta).await?;
-        ensure!(
-            is_exist,
-            InvalidRequestSnafu {
-                region_id,
-                reason: format!(
-                    "trying to add a not exist file '{}' when editing region",
-                    file_meta.file_id
-                )
-            }
-        );
-    }
-
    info!("Applying {edit:?} to region {}", region_id);

    let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit));
--- a/src/object-store/Cargo.toml
+++ b/src/object-store/Cargo.toml
@@ -17,7 +17,7 @@ futures.workspace = true
 lazy_static.workspace = true
 md5 = "0.7"
 moka = { workspace = true, features = ["future"] }
-opendal = { version = "0.48", features = [
+opendal = { version = "0.49", features = [
    "layers-tracing",
    "services-azblob",
    "services-fs",
--- a/src/object-store/src/layers/lru_cache/read_cache.rs
+++ b/src/object-store/src/layers/lru_cache/read_cache.rs
@@ -18,15 +18,17 @@ use common_telemetry::debug;
 use futures::FutureExt;
 use moka::future::Cache;
 use moka::notification::ListenerFuture;
-use opendal::raw::oio::{List, Read, Reader, Write};
-use opendal::raw::{Access, OpDelete, OpList, OpRead, OpStat, OpWrite, RpRead};
-use opendal::{Error as OpendalError, ErrorKind, Result};
+use opendal::raw::oio::{Read, Reader, Write};
+use opendal::raw::{Access, OpDelete, OpRead, OpStat, OpWrite, RpRead};
+use opendal::{Error as OpendalError, ErrorKind, Metakey, OperatorBuilder, Result};

 use crate::metrics::{
    OBJECT_STORE_LRU_CACHE_BYTES, OBJECT_STORE_LRU_CACHE_ENTRIES, OBJECT_STORE_LRU_CACHE_HIT,
    OBJECT_STORE_LRU_CACHE_MISS, OBJECT_STORE_READ_ERROR,
 };

+const RECOVER_CACHE_LIST_CONCURRENT: usize = 8;
+
 /// Cache value for read file
 #[derive(Debug, Clone, PartialEq, Eq, Copy)]
 enum ReadResult {
@@ -142,19 +144,16 @@ impl<C: Access> ReadCache<C> {
    /// Recover existing cache items from `file_cache` to `mem_cache`.
    /// Return entry count and total approximate entry size in bytes.
    pub(crate) async fn recover_cache(&self) -> Result<(u64, u64)> {
-        let (_, mut pager) = self.file_cache.list("/", OpList::default()).await?;
+        let op = OperatorBuilder::new(self.file_cache.clone()).finish();
+        let mut entries = op
+            .list_with("/")
+            .metakey(Metakey::ContentLength | Metakey::ContentType)
+            .concurrent(RECOVER_CACHE_LIST_CONCURRENT)
+            .await?;

-        while let Some(entry) = pager.next().await? {
+        while let Some(entry) = entries.pop() {
            let read_key = entry.path();
-
-            // We can't retrieve the metadata from `[opendal::raw::oio::Entry]` directly,
-            // because it's private field.
-            let size = {
-                let stat = self.file_cache.stat(read_key, OpStat::default()).await?;
-
-                stat.into_metadata().content_length()
-            };
-
+            let size = entry.metadata().content_length();
            OBJECT_STORE_LRU_CACHE_ENTRIES.inc();
            OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64);
            self.mem_cache
--- a/src/object-store/src/layers/prometheus.rs
+++ b/src/object-store/src/layers/prometheus.rs
@@ -159,9 +159,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let create_res = self.inner.create_dir(path, args).await;

        timer.observe_duration();
-        create_res.map_err(|e| {
+        create_res.inspect_err(|e| {
            increment_errors_total(Operation::CreateDir, e.kind());
-            e
        })
    }

@@ -175,9 +174,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
            .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label])
            .start_timer();

-        let (rp, r) = self.inner.read(path, args).await.map_err(|e| {
+        let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| {
            increment_errors_total(Operation::Read, e.kind());
-            e
        })?;

        Ok((
@@ -205,9 +203,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
            .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label])
            .start_timer();

-        let (rp, r) = self.inner.write(path, args).await.map_err(|e| {
+        let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| {
            increment_errors_total(Operation::Write, e.kind());
-            e
        })?;

        Ok((
@@ -236,9 +233,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {

        let stat_res = self.inner.stat(path, args).await;
        timer.observe_duration();
-        stat_res.map_err(|e| {
+        stat_res.inspect_err(|e| {
            increment_errors_total(Operation::Stat, e.kind());
-            e
        })
    }

@@ -254,9 +250,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {

        let delete_res = self.inner.delete(path, args).await;
        timer.observe_duration();
-        delete_res.map_err(|e| {
+        delete_res.inspect_err(|e| {
            increment_errors_total(Operation::Delete, e.kind());
-            e
        })
    }

@@ -273,9 +268,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let list_res = self.inner.list(path, args).await;

        timer.observe_duration();
-        list_res.map_err(|e| {
+        list_res.inspect_err(|e| {
            increment_errors_total(Operation::List, e.kind());
-            e
        })
    }

@@ -290,9 +284,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let result = self.inner.batch(args).await;

        timer.observe_duration();
-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::Batch, e.kind());
-            e
        })
    }

@@ -308,9 +301,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let result = self.inner.presign(path, args).await;
        timer.observe_duration();

-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::Presign, e.kind());
-            e
        })
    }

@@ -335,9 +327,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {

        timer.observe_duration();

-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::BlockingCreateDir, e.kind());
-            e
        })
    }

@@ -376,9 +367,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
                    ),
                )
            })
-            .map_err(|e| {
+            .inspect_err(|e| {
                increment_errors_total(Operation::BlockingRead, e.kind());
-                e
            })
    }

@@ -417,9 +407,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
                    ),
                )
            })
-            .map_err(|e| {
+            .inspect_err(|e| {
                increment_errors_total(Operation::BlockingWrite, e.kind());
-                e
            })
    }

@@ -442,9 +431,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
            .start_timer();
        let result = self.inner.blocking_stat(path, args);
        timer.observe_duration();
-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::BlockingStat, e.kind());
-            e
        })
    }

@@ -468,9 +456,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let result = self.inner.blocking_delete(path, args);
        timer.observe_duration();

-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::BlockingDelete, e.kind());
-            e
        })
    }

@@ -494,9 +481,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
        let result = self.inner.blocking_list(path, args);
        timer.observe_duration();

-        result.map_err(|e| {
+        result.inspect_err(|e| {
            increment_errors_total(Operation::BlockingList, e.kind());
-            e
        })
    }
 }
@@ -535,18 +521,16 @@ impl<R> PrometheusMetricWrapper<R> {

 impl<R: oio::Read> oio::Read for PrometheusMetricWrapper<R> {
    async fn read(&mut self) -> Result<Buffer> {
-        self.inner.read().await.map_err(|err| {
+        self.inner.read().await.inspect_err(|err| {
            increment_errors_total(self.op, err.kind());
-            err
        })
    }
 }

 impl<R: oio::BlockingRead> oio::BlockingRead for PrometheusMetricWrapper<R> {
    fn read(&mut self) -> opendal::Result<Buffer> {
-        self.inner.read().map_err(|err| {
+        self.inner.read().inspect_err(|err| {
            increment_errors_total(self.op, err.kind());
-            err
        })
    }
 }
@@ -567,16 +551,14 @@ impl<R: oio::Write> oio::Write for PrometheusMetricWrapper<R> {
    }

    async fn close(&mut self) -> Result<()> {
-        self.inner.close().await.map_err(|err| {
+        self.inner.close().await.inspect_err(|err| {
            increment_errors_total(self.op, err.kind());
-            err
        })
    }

    async fn abort(&mut self) -> Result<()> {
-        self.inner.close().await.map_err(|err| {
+        self.inner.close().await.inspect_err(|err| {
            increment_errors_total(self.op, err.kind());
-            err
        })
    }
 }
@@ -589,16 +571,14 @@ impl<R: oio::BlockingWrite> oio::BlockingWrite for PrometheusMetricWrapper<R> {
            .map(|_| {
                self.bytes += bytes as u64;
            })
-            .map_err(|err| {
+            .inspect_err(|err| {
                increment_errors_total(self.op, err.kind());
-                err
            })
    }

    fn close(&mut self) -> Result<()> {
-        self.inner.close().map_err(|err| {
+        self.inner.close().inspect_err(|err| {
            increment_errors_total(self.op, err.kind());
-            err
        })
    }
 }
--- a/src/object-store/src/lib.rs
+++ b/src/object-store/src/lib.rs
@@ -14,7 +14,7 @@

 pub use opendal::raw::{normalize_path as raw_normalize_path, Access, HttpClient};
 pub use opendal::{
-    services, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind,
+    services, Buffer, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind,
    FuturesAsyncReader, FuturesAsyncWriter, Lister, Metakey, Operator as ObjectStore, Reader,
    Result, Writer,
 };
--- a/src/object-store/src/util.rs
+++ b/src/object-store/src/util.rs
@@ -12,9 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fmt::Display;
+
+use common_telemetry::{debug, error, trace};
 use futures::TryStreamExt;
-use opendal::layers::{LoggingLayer, TracingLayer};
-use opendal::{Entry, Lister};
+use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer};
+use opendal::raw::{AccessorInfo, Operation};
+use opendal::{Entry, ErrorKind, Lister};

 use crate::layers::PrometheusMetricsLayer;
 use crate::ObjectStore;
@@ -140,17 +144,83 @@ pub(crate) fn extract_parent_path(path: &str) -> &str {
 /// Attaches instrument layers to the object store.
 pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore {
    object_store
-        .layer(
-            LoggingLayer::default()
-                // Print the expected error only in DEBUG level.
-                // See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
-                .with_error_level(Some("debug"))
-                .expect("input error level must be valid"),
-        )
+        .layer(LoggingLayer::new(DefaultLoggingInterceptor))
        .layer(TracingLayer)
        .layer(PrometheusMetricsLayer::new(path_label))
 }

+static LOGGING_TARGET: &str = "opendal::services";
+
+struct LoggingContext<'a>(&'a [(&'a str, &'a str)]);
+
+impl<'a> Display for LoggingContext<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for (i, (k, v)) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, " {}={}", k, v)?;
+            } else {
+                write!(f, "{}={}", k, v)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+#[derive(Debug, Copy, Clone, Default)]
+pub struct DefaultLoggingInterceptor;
+
+impl LoggingInterceptor for DefaultLoggingInterceptor {
+    #[inline]
+    fn log(
+        &self,
+        info: &AccessorInfo,
+        operation: Operation,
+        context: &[(&str, &str)],
+        message: &str,
+        err: Option<&opendal::Error>,
+    ) {
+        if let Some(err) = err {
+            // Print error if it's unexpected, otherwise in error.
+            if err.kind() == ErrorKind::Unexpected {
+                error!(
+                    target: LOGGING_TARGET,
+                    "service={} name={} {}: {operation} {message} {err:#?}",
+                    info.scheme(),
+                    info.name(),
+                    LoggingContext(context),
+                );
+            } else {
+                debug!(
+                    target: LOGGING_TARGET,
+                    "service={} name={} {}: {operation} {message} {err}",
+                    info.scheme(),
+                    info.name(),
+                    LoggingContext(context),
+                );
+            };
+        }
+
+        // Print debug message if operation is oneshot, otherwise in trace.
+        if operation.is_oneshot() {
+            debug!(
+                target: LOGGING_TARGET,
+                "service={} name={} {}: {operation} {message}",
+                info.scheme(),
+                info.name(),
+                LoggingContext(context),
+            );
+        } else {
+            trace!(
+                target: LOGGING_TARGET,
+                "service={} name={} {}: {operation} {message}",
+                info.scheme(),
+                info.name(),
+                LoggingContext(context),
+            );
+        };
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/operator/src/error.rs
+++ b/src/operator/src/error.rs
@@ -42,6 +42,19 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to execute admin function"))]
+    ExecuteAdminFunction {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_query::error::Error,
+    },
+
+    #[snafu(display("Failed to build admin function args: {msg}"))]
+    BuildAdminFunctionArgs { msg: String },
+
+    #[snafu(display("Expected {expected} args, but actual {actual}"))]
+    FunctionArityMismatch { expected: usize, actual: usize },
+
    #[snafu(display("Failed to invalidate table cache"))]
    InvalidateTableCache {
        #[snafu(implicit)]
@@ -209,6 +222,9 @@ pub enum Error {
    #[snafu(display("Table not found: {}", table_name))]
    TableNotFound { table_name: String },

+    #[snafu(display("Admin function not found: {}", name))]
+    AdminFunctionNotFound { name: String },
+
    #[snafu(display("Flow not found: {}", flow_name))]
    FlowNotFound { flow_name: String },

@@ -546,6 +562,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to build record batch"))]
+    BuildRecordBatch {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_recordbatch::error::Error,
+    },
+
    #[snafu(display("Failed to read orc schema"))]
    ReadOrc {
        source: common_datasource::error::Error,
@@ -792,9 +815,12 @@ impl ErrorExt for Error {
            | Error::InvalidViewName { .. }
            | Error::InvalidView { .. }
            | Error::InvalidExpr { .. }
+            | Error::AdminFunctionNotFound { .. }
            | Error::ViewColumnsMismatch { .. }
            | Error::InvalidViewStmt { .. }
            | Error::ConvertIdentifier { .. }
+            | Error::BuildAdminFunctionArgs { .. }
+            | Error::FunctionArityMismatch { .. }
            | Error::InvalidPartition { .. }
            | Error::PhysicalExpr { .. } => StatusCode::InvalidArguments,

@@ -902,6 +928,9 @@ impl ErrorExt for Error {
            | Error::InvalidTimestampRange { .. } => StatusCode::InvalidArguments,

            Error::CreateLogicalTables { .. } => StatusCode::Unexpected,
+
+            Error::ExecuteAdminFunction { source, .. } => source.status_code(),
+            Error::BuildRecordBatch { source, .. } => source.status_code(),
        }
    }

--- a/src/operator/src/statement.rs
+++ b/src/operator/src/statement.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+mod admin;
 mod copy_database;
 mod copy_table_from;
 mod copy_table_to;
@@ -277,6 +278,7 @@ impl StatementExecutor {
            Statement::ShowIndex(show_index) => self.show_index(show_index, query_ctx).await,
            Statement::ShowStatus(_) => self.show_status(query_ctx).await,
            Statement::Use(db) => self.use_database(db, query_ctx).await,
+            Statement::Admin(admin) => self.execute_admin_command(admin, query_ctx).await,
        }
    }

--- a/src/operator/src/statement/admin.rs
+++ b/src/operator/src/statement/admin.rs
@@ -0,0 +1,233 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_function::function::FunctionContext;
+use common_function::function_registry::FUNCTION_REGISTRY;
+use common_query::prelude::TypeSignature;
+use common_query::Output;
+use common_recordbatch::{RecordBatch, RecordBatches};
+use common_telemetry::tracing;
+use common_time::Timezone;
+use datatypes::data_type::DataType;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::schema::{ColumnSchema, Schema};
+use datatypes::value::Value;
+use datatypes::vectors::VectorRef;
+use session::context::QueryContextRef;
+use snafu::{ensure, OptionExt, ResultExt};
+use sql::ast::{Expr, FunctionArg, FunctionArgExpr, Value as SqlValue};
+use sql::statements::admin::Admin;
+use sql::statements::sql_value_to_value;
+
+use crate::error::{self, Result};
+use crate::statement::StatementExecutor;
+
+const DUMMY_COLUMN: &str = "<dummy>";
+
+impl StatementExecutor {
+    /// Execute the [`Admin`] statement and returns the output.
+    #[tracing::instrument(skip_all)]
+    pub(super) async fn execute_admin_command(
+        &self,
+        stmt: Admin,
+        query_ctx: QueryContextRef,
+    ) -> Result<Output> {
+        let Admin::Func(func) = &stmt;
+        // the function name should be in lower case.
+        let func_name = func.name.to_string().to_lowercase();
+        let admin_func = FUNCTION_REGISTRY
+            .get_async_function(&func_name)
+            .context(error::AdminFunctionNotFoundSnafu { name: func_name })?;
+
+        let signature = admin_func.signature();
+        let arg_values = func
+            .args
+            .iter()
+            .map(|arg| {
+                let FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) = arg else {
+                    return error::BuildAdminFunctionArgsSnafu {
+                        msg: "unsupported function arg {arg}",
+                    }
+                    .fail();
+                };
+                Ok(value)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let args = args_to_vector(&signature.type_signature, &arg_values, &query_ctx)?;
+        let arg_types = args.iter().map(|arg| arg.data_type()).collect::<Vec<_>>();
+
+        let func_ctx = FunctionContext {
+            query_ctx,
+            state: self.query_engine.engine_state().function_state(),
+        };
+
+        let result = admin_func
+            .eval(func_ctx, &args)
+            .await
+            .context(error::ExecuteAdminFunctionSnafu)?;
+
+        let column_schemas = vec![ColumnSchema::new(
+            // Use statement as the result column name
+            stmt.to_string(),
+            admin_func
+                .return_type(&arg_types)
+                .context(error::ExecuteAdminFunctionSnafu)?,
+            false,
+        )];
+        let schema = Arc::new(Schema::new(column_schemas));
+        let batch =
+            RecordBatch::new(schema.clone(), vec![result]).context(error::BuildRecordBatchSnafu)?;
+        let batches =
+            RecordBatches::try_new(schema, vec![batch]).context(error::BuildRecordBatchSnafu)?;
+
+        Ok(Output::new_with_record_batches(batches))
+    }
+}
+
+/// Try to cast the arguments to vectors by function's signature.
+fn args_to_vector(
+    type_signature: &TypeSignature,
+    args: &Vec<&SqlValue>,
+    query_ctx: &QueryContextRef,
+) -> Result<Vec<VectorRef>> {
+    let tz = query_ctx.timezone();
+
+    match type_signature {
+        TypeSignature::Variadic(valid_types) => {
+            values_to_vectors_by_valid_types(valid_types, args, Some(&tz))
+        }
+
+        TypeSignature::Uniform(arity, valid_types) => {
+            ensure!(
+                *arity == args.len(),
+                error::FunctionArityMismatchSnafu {
+                    actual: args.len(),
+                    expected: *arity,
+                }
+            );
+
+            values_to_vectors_by_valid_types(valid_types, args, Some(&tz))
+        }
+
+        TypeSignature::Exact(data_types) => {
+            values_to_vectors_by_exact_types(data_types, args, Some(&tz))
+        }
+
+        TypeSignature::VariadicAny => {
+            let data_types = args
+                .iter()
+                .map(|value| try_get_data_type_for_sql_value(value))
+                .collect::<Result<Vec<_>>>()?;
+
+            values_to_vectors_by_exact_types(&data_types, args, Some(&tz))
+        }
+
+        TypeSignature::Any(arity) => {
+            ensure!(
+                *arity == args.len(),
+                error::FunctionArityMismatchSnafu {
+                    actual: args.len(),
+                    expected: *arity,
+                }
+            );
+
+            let data_types = args
+                .iter()
+                .map(|value| try_get_data_type_for_sql_value(value))
+                .collect::<Result<Vec<_>>>()?;
+
+            values_to_vectors_by_exact_types(&data_types, args, Some(&tz))
+        }
+
+        TypeSignature::OneOf(type_sigs) => {
+            for type_sig in type_sigs {
+                if let Ok(vectors) = args_to_vector(type_sig, args, query_ctx) {
+                    return Ok(vectors);
+                }
+            }
+
+            error::BuildAdminFunctionArgsSnafu {
+                msg: "function signature not match",
+            }
+            .fail()
+        }
+    }
+}
+
+/// Try to cast sql values to vectors by exact data types.
+fn values_to_vectors_by_exact_types(
+    exact_types: &[ConcreteDataType],
+    args: &[&SqlValue],
+    tz: Option<&Timezone>,
+) -> Result<Vec<VectorRef>> {
+    args.iter()
+        .zip(exact_types.iter())
+        .map(|(value, data_type)| {
+            let value = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None)
+                .context(error::ParseSqlValueSnafu)?;
+
+            Ok(value_to_vector(value))
+        })
+        .collect()
+}
+
+/// Try to cast sql values to vectors by valid data types.
+fn values_to_vectors_by_valid_types(
+    valid_types: &[ConcreteDataType],
+    args: &[&SqlValue],
+    tz: Option<&Timezone>,
+) -> Result<Vec<VectorRef>> {
+    args.iter()
+        .map(|value| {
+            for data_type in valid_types {
+                if let Ok(value) = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None) {
+                    return Ok(value_to_vector(value));
+                }
+            }
+
+            error::BuildAdminFunctionArgsSnafu {
+                msg: "failed to cast {value}",
+            }
+            .fail()
+        })
+        .collect::<Result<Vec<_>>>()
+}
+
+/// Build a [`VectorRef`] from [`Value`]
+fn value_to_vector(value: Value) -> VectorRef {
+    let data_type = value.data_type();
+    let mut mutable_vector = data_type.create_mutable_vector(1);
+    mutable_vector.push_value_ref(value.as_value_ref());
+
+    mutable_vector.to_vector()
+}
+
+/// Try to infer the data type from sql value.
+fn try_get_data_type_for_sql_value(value: &SqlValue) -> Result<ConcreteDataType> {
+    match value {
+        SqlValue::Number(_, _) => Ok(ConcreteDataType::float64_datatype()),
+        SqlValue::Null => Ok(ConcreteDataType::null_datatype()),
+        SqlValue::Boolean(_) => Ok(ConcreteDataType::boolean_datatype()),
+        SqlValue::HexStringLiteral(_)
+        | SqlValue::DoubleQuotedString(_)
+        | SqlValue::SingleQuotedString(_) => Ok(ConcreteDataType::string_datatype()),
+        _ => error::BuildAdminFunctionArgsSnafu {
+            msg: format!("unsupported sql value: {value}"),
+        }
+        .fail(),
+    }
+}
--- a/src/pipeline/src/etl.rs
+++ b/src/pipeline/src/etl.rs
@@ -284,7 +284,7 @@ where
                let mut search_from = 0;
                // because of the key in the json map is ordered
                for (payload_key, payload_value) in map.into_iter() {
-                    if search_from >= self.required_keys.len() - 1 {
+                    if search_from >= self.required_keys.len() {
                        break;
                    }

@@ -359,15 +359,16 @@ mod tests {

    #[test]
    fn test_pipeline_prepare() {
-        let input_value_str = r#"
+        {
+            let input_value_str = r#"
            {
                "my_field": "1,2",
                "foo": "bar"
            }
        "#;
-        let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
+            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();

-        let pipeline_yaml = r#"
+            let pipeline_yaml = r#"
 ---
 description: Pipeline for Apache Tomcat

@@ -381,32 +382,73 @@ transform:
  - field: field2
    type: uint32
 "#;
-        let pipeline: Pipeline<GreptimeTransformer> =
-            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-        let mut payload = pipeline.init_intermediate_state();
-        pipeline.prepare(input_value, &mut payload).unwrap();
-        assert_eq!(
-            &["greptime_timestamp", "my_field"].to_vec(),
-            pipeline.required_keys()
-        );
-        assert_eq!(
-            payload,
-            vec![
-                Value::Null,
-                Value::String("1,2".to_string()),
-                Value::Null,
-                Value::Null
-            ]
-        );
-        let result = pipeline.exec_mut(&mut payload).unwrap();
+            let pipeline: Pipeline<GreptimeTransformer> =
+                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+            let mut payload = pipeline.init_intermediate_state();
+            pipeline.prepare(input_value, &mut payload).unwrap();
+            assert_eq!(
+                &["greptime_timestamp", "my_field"].to_vec(),
+                pipeline.required_keys()
+            );
+            assert_eq!(
+                payload,
+                vec![
+                    Value::Null,
+                    Value::String("1,2".to_string()),
+                    Value::Null,
+                    Value::Null
+                ]
+            );
+            let result = pipeline.exec_mut(&mut payload).unwrap();

-        assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
-        assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
-        match &result.values[2].value_data {
-            Some(ValueData::TimestampNanosecondValue(v)) => {
-                assert_ne!(*v, 0);
+            assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
+            assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
+            match &result.values[2].value_data {
+                Some(ValueData::TimestampNanosecondValue(v)) => {
+                    assert_ne!(*v, 0);
+                }
+                _ => panic!("expect null value"),
            }
-            _ => panic!("expect null value"),
+        }
+        {
+            let input_value_str = r#"
+          {
+            "reqTimeSec": "1573840000.000"
+          }
+    "#;
+
+            let pipeline_yaml = r#"
+---
+description: Pipeline for Demo Log
+
+processors:
+  - gsub:
+      field: reqTimeSec
+      pattern: "\\."
+      replacement: ""
+  - epoch:
+      field: reqTimeSec
+      resolution: millisecond
+      ignore_missing: true
+
+transform:
+  - field: reqTimeSec
+    type: epoch, millisecond
+    index: timestamp
+"#;
+            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
+            let pipeline: Pipeline<GreptimeTransformer> =
+                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+            let mut payload = pipeline.init_intermediate_state();
+            pipeline.prepare(input_value, &mut payload).unwrap();
+            assert_eq!(&["reqTimeSec"].to_vec(), pipeline.required_keys());
+            assert_eq!(payload, vec![Value::String("1573840000.000".to_string())]);
+            let result = pipeline.exec_mut(&mut payload).unwrap();
+
+            assert_eq!(
+                result.values[0].value_data,
+                Some(ValueData::TimestampMillisecondValue(1573840000000))
+            );
        }
    }

--- a/src/promql/src/extension_plan/empty_metric.rs
+++ b/src/promql/src/extension_plan/empty_metric.rs
@@ -47,7 +47,7 @@ use crate::extension_plan::Millisecond;
 /// Empty source plan that generate record batch with two columns:
 /// - time index column, computed from start, end and interval
 /// - value column, generated by the input expr. The expr should not
-/// reference any column except the time index column.
+///   reference any column except the time index column.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct EmptyMetric {
    start: Millisecond,
--- a/src/promql/src/extension_plan/series_divide.rs
+++ b/src/promql/src/extension_plan/series_divide.rs
@@ -205,11 +205,12 @@ impl ExecutionPlan for SeriesDivideExec {
            .collect();
        Ok(Box::pin(SeriesDivideStream {
            tag_indices,
-            buffer: None,
+            buffer: vec![],
            schema,
            input,
            metric: baseline_metric,
            num_series: 0,
+            inspect_start: 0,
        }))
    }

@@ -231,11 +232,13 @@ impl DisplayAs for SeriesDivideExec {
 /// Assume the input stream is ordered on the tag columns.
 pub struct SeriesDivideStream {
    tag_indices: Vec<usize>,
-    buffer: Option<RecordBatch>,
+    buffer: Vec<RecordBatch>,
    schema: SchemaRef,
    input: SendableRecordBatchStream,
    metric: BaselineMetrics,
    num_series: usize,
+    /// Index of buffered batches to start inspect next time.
+    inspect_start: usize,
 }

 impl RecordBatchStream for SeriesDivideStream {
@@ -248,30 +251,45 @@ impl Stream for SeriesDivideStream {
    type Item = DataFusionResult<RecordBatch>;

    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let timer = std::time::Instant::now();
        loop {
-            if let Some(batch) = self.buffer.as_ref() {
-                let same_length = self.find_first_diff_row(batch) + 1;
-                if same_length >= batch.num_rows() {
+            if !self.buffer.is_empty() {
+                let cut_at = self.find_first_diff_row();
+                if let Some((batch_index, row_index)) = cut_at {
+                    // slice out the first time series and return it.
+                    let half_batch_of_first_series =
+                        self.buffer[batch_index].slice(0, row_index + 1);
+                    let half_batch_of_second_series = self.buffer[batch_index].slice(
+                        row_index + 1,
+                        self.buffer[batch_index].num_rows() - row_index - 1,
+                    );
+                    let result_batches = self
+                        .buffer
+                        .drain(0..batch_index)
+                        .chain([half_batch_of_first_series])
+                        .collect::<Vec<_>>();
+                    self.buffer[0] = half_batch_of_second_series;
+                    let result_batch = compute::concat_batches(&self.schema, &result_batches)?;
+
+                    self.inspect_start = 0;
+                    self.num_series += 1;
+                    self.metric.elapsed_compute().add_elapsed(timer);
+                    return Poll::Ready(Some(Ok(result_batch)));
+                } else {
+                    // continue to fetch next batch as the current buffer only contains one time series.
                    let next_batch = ready!(self.as_mut().fetch_next_batch(cx)).transpose()?;
-                    // SAFETY: if-let guards the buffer is not None;
-                    //   and we cannot change the buffer at this point.
-                    let batch = self.buffer.take().expect("this batch must exist");
                    if let Some(next_batch) = next_batch {
-                        self.buffer = Some(compute::concat_batches(
-                            &batch.schema(),
-                            &[batch, next_batch],
-                        )?);
+                        self.buffer.push(next_batch);
                        continue;
                    } else {
+                        // input stream is ended
+                        let result = compute::concat_batches(&self.schema, &self.buffer)?;
+                        self.buffer.clear();
+                        self.inspect_start = 0;
                        self.num_series += 1;
-                        return Poll::Ready(Some(Ok(batch)));
+                        self.metric.elapsed_compute().add_elapsed(timer);
+                        return Poll::Ready(Some(Ok(result)));
                    }
-                } else {
-                    let result_batch = batch.slice(0, same_length);
-                    let remaining_batch = batch.slice(same_length, batch.num_rows() - same_length);
-                    self.buffer = Some(remaining_batch);
-                    self.num_series += 1;
-                    return Poll::Ready(Some(Ok(result_batch)));
                }
            } else {
                let batch = match ready!(self.as_mut().fetch_next_batch(cx)) {
@@ -282,7 +300,7 @@ impl Stream for SeriesDivideStream {
                    }
                    error => return Poll::Ready(error),
                };
-                self.buffer = Some(batch);
+                self.buffer.push(batch);
                continue;
            }
        }
@@ -294,40 +312,72 @@ impl SeriesDivideStream {
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
    ) -> Poll<Option<DataFusionResult<RecordBatch>>> {
-        let poll = match self.input.poll_next_unpin(cx) {
-            Poll::Ready(batch) => {
-                let _timer = self.metric.elapsed_compute().timer();
-                Poll::Ready(batch)
-            }
-            Poll::Pending => Poll::Pending,
-        };
+        let poll = self.input.poll_next_unpin(cx);
        self.metric.record_poll(poll)
    }

-    fn find_first_diff_row(&self, batch: &RecordBatch) -> usize {
+    /// Return the position to cut buffer.
+    /// None implies the current buffer only contains one time series.
+    fn find_first_diff_row(&mut self) -> Option<(usize, usize)> {
        // fast path: no tag columns means all data belongs to the same series.
        if self.tag_indices.is_empty() {
-            return batch.num_rows();
+            return None;
        }

-        let num_rows = batch.num_rows();
-        let mut result = num_rows;
+        let mut resumed_batch_index = self.inspect_start;

-        for index in &self.tag_indices {
-            let array = batch.column(*index);
-            let string_array = array.as_any().downcast_ref::<StringArray>().unwrap();
-            // the first row number that not equal to the next row.
-            let mut same_until = 0;
-            while same_until < num_rows - 1 {
-                if string_array.value(same_until) != string_array.value(same_until + 1) {
-                    break;
+        for batch in &self.buffer[resumed_batch_index..] {
+            let num_rows = batch.num_rows();
+            let mut result_index = num_rows;
+
+            // check if the first row is the same with last batch's last row
+            if resumed_batch_index > self.inspect_start {
+                let last_batch = &self.buffer[resumed_batch_index - 1];
+                let last_row = last_batch.num_rows() - 1;
+                for index in &self.tag_indices {
+                    let current_array = batch.column(*index);
+                    let last_array = last_batch.column(*index);
+                    let current_value = current_array
+                        .as_any()
+                        .downcast_ref::<StringArray>()
+                        .unwrap()
+                        .value(0);
+                    let last_value = last_array
+                        .as_any()
+                        .downcast_ref::<StringArray>()
+                        .unwrap()
+                        .value(last_row);
+                    if current_value != last_value {
+                        return Some((resumed_batch_index, 0));
+                    }
                }
-                same_until += 1;
            }
-            result = result.min(same_until);
+
+            // check column by column
+            for index in &self.tag_indices {
+                let array = batch.column(*index);
+                let string_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                // the first row number that not equal to the next row.
+                let mut same_until = 0;
+                while same_until < num_rows - 1 {
+                    if string_array.value(same_until) != string_array.value(same_until + 1) {
+                        break;
+                    }
+                    same_until += 1;
+                }
+                result_index = result_index.min(same_until);
+            }
+
+            if result_index + 1 >= num_rows {
+                // all rows are the same, inspect next batch
+                resumed_batch_index += 1;
+            } else {
+                return Some((resumed_batch_index, result_index));
+            }
        }

-        result
+        self.inspect_start = resumed_batch_index;
+        None
    }
 }

--- a/src/promql/src/functions/holt_winters.rs
+++ b/src/promql/src/functions/holt_winters.rs
@@ -32,11 +32,12 @@ use crate::range_array::RangeArray;

 /// There are 3 variants of smoothing functions:
 /// 1) "Simple exponential smoothing": only the `level` component (the weighted average of the observations) is used to make forecasts.
-///   This method is applied for time-series data that does not exhibit trend or seasonality.
+///    This method is applied for time-series data that does not exhibit trend or seasonality.
 /// 2) "Holt's linear method" (a.k.a. "double exponential smoothing"): `level` and `trend` components are used to make forecasts.
-///   This method is applied for time-series data that exhibits trend but not seasonality.
+///    This method is applied for time-series data that exhibits trend but not seasonality.
 /// 3) "Holt-Winter's method" (a.k.a. "triple exponential smoothing"): `level`, `trend`, and `seasonality` are used to make forecasts.
-///   This method is applied for time-series data that exhibits both trend and seasonality.
+///
+/// This method is applied for time-series data that exhibits both trend and seasonality.
 ///
 /// In order to keep the parity with the Prometheus functions we had to follow the same naming ("HoltWinters"), however
 /// the "Holt's linear"("double exponential smoothing") suits better and reflects implementation.
--- a/src/puffin/src/file_format.rs
+++ b/src/puffin/src/file_format.rs
@@ -34,7 +34,7 @@
 //!     - bit 0 (lowest bit): whether `FooterPayload` is compressed
 //!     - all other bits are reserved for future use and should be set to 0 on write
 //!   * all other bytes are reserved for future use and should be set to 0 on write
-//! A 4 byte integer is always signed, in a two’s complement representation, stored little-endian.
+//!     A 4 byte integer is always signed, in a two’s complement representation, stored little-endian.
 //!
 //! ## Footer Payload
 //!
--- a/src/query/src/datafusion.rs
+++ b/src/query/src/datafusion.rs
@@ -447,6 +447,10 @@ impl QueryEngine for DatafusionQueryEngine {
        state.config_mut().set_extension(query_ctx.clone());
        QueryEngineContext::new(state, query_ctx)
    }
+
+    fn engine_state(&self) -> &QueryEngineState {
+        &self.state
+    }
 }

 impl QueryExecutor for DatafusionQueryEngine {
--- a/src/query/src/dummy_catalog.rs
+++ b/src/query/src/dummy_catalog.rs
@@ -17,7 +17,9 @@
 use std::any::Any;
 use std::sync::{Arc, Mutex};

+use api::v1::SemanticType;
 use async_trait::async_trait;
+use common_recordbatch::filter::SimpleFilterEvaluator;
 use common_recordbatch::OrderOption;
 use datafusion::catalog::schema::SchemaProvider;
 use datafusion::catalog::{CatalogProvider, CatalogProviderList};
@@ -177,7 +179,27 @@ impl TableProvider for DummyTableProvider {
        &self,
        filters: &[&Expr],
    ) -> datafusion::error::Result<Vec<TableProviderFilterPushDown>> {
-        Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()])
+        let supported = filters
+            .iter()
+            .map(|e| {
+                // Simple filter on primary key columns are precisely evaluated.
+                if let Some(simple_filter) = SimpleFilterEvaluator::try_new(e) {
+                    if self
+                        .metadata
+                        .column_by_name(simple_filter.column_name())
+                        .and_then(|c| (c.semantic_type == SemanticType::Tag).then_some(()))
+                        .is_some()
+                    {
+                        TableProviderFilterPushDown::Exact
+                    } else {
+                        TableProviderFilterPushDown::Inexact
+                    }
+                } else {
+                    TableProviderFilterPushDown::Inexact
+                }
+            })
+            .collect();
+        Ok(supported)
    }
 }

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
liyang	b4b105ad35	test	2024-08-27 10:11:32 +08:00
liyang	e1d0bb3749	test	2024-08-27 02:07:07 +08:00
liyang	867d6ab600	test: skopeo authentication	2024-08-27 01:19:54 +08:00
liyang	63a442632e	fix: failed to get version (#4622 )	2024-08-26 15:33:30 +00:00
liyang	d39bafcfbd	fix: change toolchain file name (#4621 )	2024-08-26 13:04:06 +00:00
liyang	1717445ebe	fix: failed to get github sha (#4620 )	2024-08-26 11:42:07 +00:00
liyang	55d65da24d	ci: add push dev-build images to aws ecr (#4618 ) * ci: add push dev-build images to aws ecr * chore: use toolchain file generation dev-build image tag * chore: change dev-build version * Update .github/workflows/release-dev-builder-images.yaml Co-authored-by: zyy17 <zyylsxm@gmail.com> --------- Co-authored-by: zyy17 <zyylsxm@gmail.com>	2024-08-26 09:36:55 +00:00
Weny Xu	3297d5f657	feat: allow skipping topic creation (#4616 ) * feat: introduce `create_topics` opt * feat: allow skipping topic creation * chore: refine docs * chore: apply suggestions from CR	2024-08-26 08:34:27 +00:00
Ning Sun	d6865911ee	feat: add postgres response for trasaction related statements (#4562 ) * feat: add postgres fixtures WIP * feat: implement more postgres fixtures * feat: add compatibility for transaction/set transaction/show transaction * fix: improve regex for set transaction	2024-08-26 08:09:21 +00:00
dennis zhuang	63f2463273	feat!: impl admin command (#4600 ) * feat: impl admin statement parser * feat: introduce AsyncFunction and implements it for admin functions * feat: execute admin functions * fix: license header * fix: panic in test * chore: fixed by code review	2024-08-26 07:53:40 +00:00
Ruihang Xia	da337a9635	perf: acclerate scatter query (#4607 ) Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-26 03:03:30 +00:00
fys	3973d6b01f	chore: optimize common_version build (#4611 )	2024-08-23 12:36:28 +00:00
discord9	2c731c76ad	chore: add `stats` feature for jemalloc-ctl (#4610 )	2024-08-23 11:18:30 +00:00
ozewr	40e7b58c80	feat: refactoring LruCacheLayer with list_with_metakey and concurrent_stat_in_list (#4596 ) * use list_with_metakey and concurrent_stat_in_list * change concurrent in recover_cache like before * remove stat funcation * use 8 concurrent * use const value * fmt code * Apply suggestions from code review --------- Co-authored-by: ozewr <l19ht@google.com> Co-authored-by: Weny Xu <wenymedia@gmail.com>	2024-08-23 03:22:00 +00:00
zyy17	5177717f71	refactor: add `fallback_to_local` region option (#4578 ) * refactor: add 'fallback_to_local_compaction' region option * refactor: use 'fallback_to_local'	2024-08-23 03:09:43 +00:00
Weny Xu	8d61e6fe49	chore: bump rskafka to `75535b` (#4608 )	2024-08-23 03:05:52 +00:00
Ruihang Xia	a3b8d2fe8f	chore: bump rust toolchain to 2024-08-21 (#4606 ) * chore: bump rust toolchain to 2024-08-22 Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update workflow Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * try 20240606 Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-22 15:38:10 +00:00
Ning Sun	863ee073a9	chore: add commerial support section (#4601 ) doc: add commerial support section	2024-08-22 12:03:20 +00:00
Weny Xu	25cd61b310	chore: upgrade toolchain to nightly-2024-08-07 (#4549 ) * chore: upgrade toolchain to `nightly-2024-08-07` * chore(ci): upgrade toolchain * fix: fix unit test	2024-08-22 11:02:18 +00:00
fys	3517c13192	fix: incremental compilation always compile the common-version crate (#4605 ) fix: wrong cargo:rerun	2024-08-22 11:00:33 +00:00
Ruihang Xia	b9cedf2c1a	perf: optimize series divide algo (#4603 ) * perf: optimize series divide algo Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove dead code Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-22 09:16:36 +00:00
LFC	883c5bc5b0	refactor: skip checking the existence of the SST files (#4602 ) refactor: skip checking the existence of the SST files when region is directly edited	2024-08-22 08:32:27 +00:00
Yingwen	d628079f4c	feat: collect filters metrics for scanners (#4591 ) * feat: collect filter metrics * refactor: reuse ReaderFilterMetrics * feat: record read rows from parquet by type * feat: unordered scan observe rows also fix read type * chore: rename label	2024-08-22 03:22:05 +00:00
Weny Xu	0025fa6ec7	chore: bump opendal version to 0.49 (#4587 ) * chore: bump opendal version to 0.49 * chore: apply suggestions from CR * Update src/object-store/src/util.rs Co-authored-by: Yingwen <realevenyag@gmail.com> --------- Co-authored-by: Yingwen <realevenyag@gmail.com>	2024-08-22 03:05:36 +00:00
Lanqing Yang	ff04109ee6	docs: add example configs introduced by pg_kvbackend (#4573 ) chore: add example configs that introduced after pg_kvbackend	2024-08-22 01:52:02 +00:00
Yingwen	9c1704d4cb	docs: move v0.9.1 benchmark report to tsbs dir (#4598 ) * docs: move v0.9.1 benchmark report to tsbs dir * docs: add newlines	2024-08-21 09:31:05 +00:00
Yingwen	a12a905578	chore: disable ttl for write cache by default (#4595 ) * chore: remove default write cache ttl * docs: update example config * chore: fix ci	2024-08-21 08:38:38 +00:00
shuiyisong	449236360d	docs: log benchmark (#4597 ) * chore: add log benchmark stuff * chore: minor update	2024-08-21 07:12:32 +00:00
localhost	bf16422cee	fix: pipeline prepare loop break detects a conditional error (#4593 )	2024-08-21 06:20:09 +00:00
Ran Joe	9db08dbbe0	refactor(mito2): reduce duplicate IndexOutput struct (#4592 ) * refactor(mito2): reduce duplicate IndexOutput struct * docs(mito2): add index output note	2024-08-20 12:30:17 +00:00
fys	9d885fa0c2	chore: bump tikv-jemalloc* to "0.6" (#4590 ) chore: bump tikv-jemalloc* ti "0.6"	2024-08-20 09:08:21 +00:00
Ruihang Xia	b25a2b117e	feat: remove sql in error desc (#4589 ) Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-20 06:37:30 +00:00
fys	6fccff4810	chore: keep symbol table in nightly profile (#4588 ) chore: keep symbol table in nighly profile	2024-08-20 02:27:31 +00:00
ozewr	30af78700f	feat: Implement the Buf to avoid extra memory allocation (#4585 ) * feat: Implement the Buf to avoid extra memory allocation * fmt toml * fmt code * mv entry.into_buffer to raw_entry_buffer * less reuse opendal * remove todo #4065 * Update src/mito2/src/wal/entry_reader.rs Co-authored-by: Weny Xu <wenymedia@gmail.com> * fmt code --------- Co-authored-by: ozewr <l19ht@google.com> Co-authored-by: Weny Xu <wenymedia@gmail.com>	2024-08-19 12:11:08 +00:00
Ruihang Xia	8de11a0e34	perf: set simple filter on primary key columns to exact filter (#4564 ) * perf: set simple filter on primary key columns to exact filter Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add sqlness test Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix typo Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix sqlness Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-19 09:07:35 +00:00