Compare commits

..

39 Commits

Author SHA1 Message Date
evenyag
d5760a7348 chore: remove unused codes 2025-03-17 15:20:42 +08:00
discord9
bc9614e22c feat: file parallel 2025-03-10 21:00:40 +08:00
discord9
7dd9e98ff6 docs: chore 2025-03-10 16:12:28 +08:00
evenyag
fb6b7f7801 fix: use label value to add map 2025-03-10 15:17:59 +08:00
evenyag
87d7c316df fix: use label value as table name 2025-03-10 14:42:19 +08:00
evenyag
c80a73bc20 feat: use pb in parquet 2025-03-10 14:40:29 +08:00
discord9
dd9d13e7df fix: cli arg 2025-03-10 14:18:35 +08:00
evenyag
79d249f5fa feat: fix panic in TimeSeriesParquetReader 2025-03-10 14:13:37 +08:00
evenyag
63bc544514 refactor: use constant 2025-03-10 14:02:27 +08:00
evenyag
30c29539a3 feat: special handle metric engine path 2025-03-10 13:58:46 +08:00
evenyag
359da62d9e feat: use parquet 2025-03-10 13:36:49 +08:00
evenyag
c9f4b36360 fix: use flushed_sequence as we can't set sequence in ingester 2025-03-10 13:36:49 +08:00
discord9
85c346b16a chore: progress bar 2025-03-10 11:53:33 +08:00
discord9
738c23beb0 feat: time unit 2025-03-10 11:25:23 +08:00
evenyag
8aadd1e59a feat: parquet remote write reader 2025-03-09 23:42:08 +08:00
discord9
cbd58291da chore: more logs 2025-03-09 23:29:58 +08:00
evenyag
e522e8959b chore: add more logs 2025-03-09 21:19:55 +08:00
evenyag
7183a93e5a feat: sanitize mito config 2025-03-09 21:05:21 +08:00
evenyag
8c538622e2 feat: add logs 2025-03-09 20:52:02 +08:00
evenyag
142dacb2c8 chore: update fs object build 2025-03-09 20:52:02 +08:00
discord9
371afc458f chore: init logging 2025-03-09 20:44:53 +08:00
discord9
0751cd74c0 feat: all in one cfg 2025-03-09 20:36:10 +08:00
discord9
ec34e8739a fix: is file 2025-03-09 19:55:12 +08:00
evenyag
b650743785 feat: implement converter convert 2025-03-09 19:53:36 +08:00
discord9
80a8b2e1bd feat: debug output file option 2025-03-09 17:23:14 +08:00
discord9
ec8a15cadd feat: ingester(WIP) 2025-03-09 16:57:26 +08:00
evenyag
f929d751a5 feat: update api 2025-03-09 16:39:35 +08:00
evenyag
fad3621a7a feat: define converter api 2025-03-09 16:05:52 +08:00
evenyag
87723effc7 feat: declare converter 2025-03-09 15:33:49 +08:00
evenyag
62a333ad09 feat: import datanode 2025-03-09 15:32:02 +08:00
evenyag
6ad186a13e feat: series to batch 2025-03-09 15:09:13 +08:00
discord9
77dee84a75 fix: parquet also sort by pk 2025-03-09 14:47:34 +08:00
evenyag
a57e263e5a feat: sort time series 2025-03-08 22:20:13 +08:00
discord9
8796ddaf31 chore: remove unwrap 2025-03-08 20:32:11 +08:00
discord9
7fa3fbdfef feat: parquet reader 2025-03-08 20:27:44 +08:00
jeremyhi
457d2a620c feat: add get table api 2025-03-08 19:53:15 +08:00
evenyag
9f14edbb28 feat: implement sst writer 2025-03-08 17:22:03 +08:00
evenyag
cb3fad0c2d chore: add deps 2025-03-08 16:17:49 +08:00
evenyag
2d1e7c2441 feat: init the converter crate 2025-03-08 14:15:35 +08:00
234 changed files with 8112 additions and 10897 deletions

View File

@@ -52,7 +52,7 @@ runs:
uses: ./.github/actions/build-greptime-binary uses: ./.github/actions/build-greptime-binary
with: with:
base-image: ubuntu base-image: ubuntu
features: servers/dashboard,pg_kvbackend,mysql_kvbackend features: servers/dashboard,pg_kvbackend
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}
@@ -70,7 +70,7 @@ runs:
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64. if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
with: with:
base-image: centos base-image: centos
features: servers/dashboard,pg_kvbackend,mysql_kvbackend features: servers/dashboard,pg_kvbackend
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}

View File

@@ -238,13 +238,6 @@ jobs:
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry. push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images. dev-mode: true # Only build the standard images.
- name: Echo Docker image tag to step summary
run: |
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
- name: Set build result - name: Set build result
id: set-build-result id: set-build-result

View File

@@ -111,7 +111,7 @@ jobs:
- name: Build greptime binaries - name: Build greptime binaries
shell: bash shell: bash
# `cargo gc` will invoke `cargo build` with specified args # `cargo gc` will invoke `cargo build` with specified args
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend" run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
- name: Pack greptime binaries - name: Pack greptime binaries
shell: bash shell: bash
run: | run: |
@@ -270,7 +270,7 @@ jobs:
- name: Build greptime bianry - name: Build greptime bianry
shell: bash shell: bash
# `cargo gc` will invoke `cargo build` with specified args # `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend" run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
- name: Pack greptime binary - name: Pack greptime binary
shell: bash shell: bash
run: | run: |
@@ -687,7 +687,7 @@ jobs:
working-directory: tests-integration/fixtures working-directory: tests-integration/fixtures
run: docker compose up -d --wait run: docker compose up -d --wait
- name: Run nextest cases - name: Run nextest cases
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
env: env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold" CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1 RUST_BACKTRACE: 1
@@ -704,7 +704,6 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000 GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379 GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092 GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093 GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs" UNITTEST_LOG_DIR: "__unittest_logs"
@@ -740,7 +739,7 @@ jobs:
working-directory: tests-integration/fixtures working-directory: tests-integration/fixtures
run: docker compose up -d --wait run: docker compose up -d --wait
- name: Run nextest cases - name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
env: env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold" CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1 RUST_BACKTRACE: 1
@@ -756,7 +755,6 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000 GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379 GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092 GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093 GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs" UNITTEST_LOG_DIR: "__unittest_logs"

View File

@@ -1,52 +0,0 @@
name: Check Grafana Panels
on:
pull_request:
branches:
- main
paths:
- 'grafana/**' # Trigger only when files under the grafana/ directory change
jobs:
check-panels:
runs-on: ubuntu-latest
steps:
# Check out the repository
- name: Checkout repository
uses: actions/checkout@v4
# Install jq (required for the script)
- name: Install jq
run: sudo apt-get install -y jq
# Make the check.sh script executable
- name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

150
Cargo.lock generated
View File

@@ -1594,7 +1594,7 @@ dependencies = [
"bitflags 1.3.2", "bitflags 1.3.2",
"strsim 0.8.0", "strsim 0.8.0",
"textwrap 0.11.0", "textwrap 0.11.0",
"unicode-width", "unicode-width 0.1.14",
"vec_map", "vec_map",
] ]
@@ -1693,7 +1693,6 @@ dependencies = [
"humantime", "humantime",
"meta-client", "meta-client",
"nu-ansi-term", "nu-ansi-term",
"opendal",
"query", "query",
"rand", "rand",
"reqwest", "reqwest",
@@ -1877,7 +1876,7 @@ checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [ dependencies = [
"strum 0.26.3", "strum 0.26.3",
"strum_macros 0.26.4", "strum_macros 0.26.4",
"unicode-width", "unicode-width 0.1.14",
] ]
[[package]] [[package]]
@@ -2016,7 +2015,6 @@ dependencies = [
"arc-swap", "arc-swap",
"async-trait", "async-trait",
"bincode", "bincode",
"chrono",
"common-base", "common-base",
"common-catalog", "common-catalog",
"common-error", "common-error",
@@ -2198,7 +2196,6 @@ dependencies = [
"serde_with", "serde_with",
"session", "session",
"snafu 0.8.5", "snafu 0.8.5",
"sqlx",
"store-api", "store-api",
"strum 0.25.0", "strum 0.25.0",
"table", "table",
@@ -2472,6 +2469,7 @@ dependencies = [
"encode_unicode", "encode_unicode",
"lazy_static", "lazy_static",
"libc", "libc",
"unicode-width 0.1.14",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@@ -4170,7 +4168,6 @@ dependencies = [
"bytes", "bytes",
"cache", "cache",
"catalog", "catalog",
"chrono",
"client", "client",
"common-base", "common-base",
"common-catalog", "common-catalog",
@@ -4649,7 +4646,7 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [ dependencies = [
"unicode-width", "unicode-width 0.1.14",
] ]
[[package]] [[package]]
@@ -4705,7 +4702,7 @@ dependencies = [
[[package]] [[package]]
name = "greptime-proto" name = "greptime-proto"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c5419bbd20cb42e568ec325a4d71a3c94cc327e1#c5419bbd20cb42e568ec325a4d71a3c94cc327e1" source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
dependencies = [ dependencies = [
"prost 0.13.3", "prost 0.13.3",
"serde", "serde",
@@ -5570,7 +5567,6 @@ dependencies = [
"rand", "rand",
"regex", "regex",
"regex-automata 0.4.8", "regex-automata 0.4.8",
"roaring",
"serde", "serde",
"serde_json", "serde_json",
"snafu 0.8.5", "snafu 0.8.5",
@@ -5604,6 +5600,19 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width 0.2.0",
"web-time 1.1.0",
]
[[package]] [[package]]
name = "inferno" name = "inferno"
version = "0.11.21" version = "0.11.21"
@@ -5633,6 +5642,25 @@ dependencies = [
"snafu 0.7.5", "snafu 0.7.5",
] ]
[[package]]
name = "ingester"
version = "0.13.0"
dependencies = [
"clap 4.5.19",
"common-telemetry",
"common-time",
"datanode",
"meta-client",
"mito2",
"object-store",
"reqwest",
"serde",
"serde_json",
"sst-convert",
"tokio",
"toml 0.8.19",
]
[[package]] [[package]]
name = "inotify" name = "inotify"
version = "0.9.6" version = "0.9.6"
@@ -5902,15 +5930,15 @@ dependencies = [
[[package]] [[package]]
name = "jsonpath-rust" name = "jsonpath-rust"
version = "0.7.5" version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b" checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
dependencies = [ dependencies = [
"pest", "pest",
"pest_derive", "pest_derive",
"regex", "regex",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 1.0.64",
] ]
[[package]] [[package]]
@@ -6721,7 +6749,6 @@ dependencies = [
"session", "session",
"snafu 0.8.5", "snafu 0.8.5",
"store-api", "store-api",
"strum 0.25.0",
"table", "table",
"tokio", "tokio",
"tokio-postgres", "tokio-postgres",
@@ -7523,6 +7550,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]] [[package]]
name = "objc" name = "objc"
version = "0.2.7" version = "0.2.7"
@@ -7979,7 +8012,7 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3" checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3"
dependencies = [ dependencies = [
"unicode-width", "unicode-width 0.1.14",
] ]
[[package]] [[package]]
@@ -8075,6 +8108,19 @@ dependencies = [
"zstd-sys", "zstd-sys",
] ]
[[package]]
name = "parquet_opendal"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4140ae96f37c170f8d684a544711fabdac1d94adcbd97e8b033329bd37f40446"
dependencies = [
"async-trait",
"bytes",
"futures",
"opendal",
"parquet",
]
[[package]] [[package]]
name = "parse-zoneinfo" name = "parse-zoneinfo"
version = "0.3.1" version = "0.3.1"
@@ -8276,7 +8322,7 @@ dependencies = [
"rand", "rand",
"ring", "ring",
"rust_decimal", "rust_decimal",
"thiserror 2.0.12", "thiserror 2.0.6",
"tokio", "tokio",
"tokio-rustls 0.26.0", "tokio-rustls 0.26.0",
"tokio-util", "tokio-util",
@@ -8388,7 +8434,7 @@ dependencies = [
"greptime-proto", "greptime-proto",
"itertools 0.10.5", "itertools 0.10.5",
"jsonb", "jsonb",
"jsonpath-rust 0.7.5", "jsonpath-rust 0.7.3",
"lazy_static", "lazy_static",
"moka", "moka",
"once_cell", "once_cell",
@@ -8766,7 +8812,6 @@ dependencies = [
"common-recordbatch", "common-recordbatch",
"common-telemetry", "common-telemetry",
"datafusion", "datafusion",
"datafusion-common",
"datafusion-expr", "datafusion-expr",
"datatypes", "datatypes",
"futures", "futures",
@@ -8780,9 +8825,8 @@ dependencies = [
[[package]] [[package]]
name = "promql-parser" name = "promql-parser"
version = "0.5.0" version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/GreptimeTeam/promql-parser.git?rev=27abb8e16003a50c720f00d6c85f41f5fa2a2a8e#27abb8e16003a50c720f00d6c85f41f5fa2a2a8e"
checksum = "7c6b1429bdd199d53bd58b745075c1652efedbe2746e5d4f0d56d3184dda48ec"
dependencies = [ dependencies = [
"cfgrammar", "cfgrammar",
"chrono", "chrono",
@@ -9640,16 +9684,6 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "roaring"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
dependencies = [
"bytemuck",
"byteorder",
]
[[package]] [[package]]
name = "robust" name = "robust"
version = "1.1.0" version = "1.1.0"
@@ -10074,7 +10108,7 @@ dependencies = [
"radix_trie", "radix_trie",
"scopeguard", "scopeguard",
"unicode-segmentation", "unicode-segmentation",
"unicode-width", "unicode-width 0.1.14",
"utf8parse", "utf8parse",
"winapi", "winapi",
] ]
@@ -11069,7 +11103,7 @@ dependencies = [
"serde_json", "serde_json",
"sha2", "sha2",
"smallvec", "smallvec",
"thiserror 2.0.12", "thiserror 2.0.6",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tracing", "tracing",
@@ -11154,7 +11188,7 @@ dependencies = [
"smallvec", "smallvec",
"sqlx-core", "sqlx-core",
"stringprep", "stringprep",
"thiserror 2.0.12", "thiserror 2.0.6",
"tracing", "tracing",
"whoami", "whoami",
] ]
@@ -11192,7 +11226,7 @@ dependencies = [
"smallvec", "smallvec",
"sqlx-core", "sqlx-core",
"stringprep", "stringprep",
"thiserror 2.0.12", "thiserror 2.0.6",
"tracing", "tracing",
"whoami", "whoami",
] ]
@@ -11221,6 +11255,36 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "sst-convert"
version = "0.13.0"
dependencies = [
"api",
"arrow-array",
"async-trait",
"catalog",
"common-error",
"common-macro",
"common-meta",
"common-recordbatch",
"common-telemetry",
"datanode",
"datatypes",
"futures",
"futures-util",
"indicatif",
"meta-client",
"metric-engine",
"mito2",
"object-store",
"parquet",
"parquet_opendal",
"prost 0.13.3",
"snafu 0.8.5",
"store-api",
"table",
]
[[package]] [[package]]
name = "stable_deref_trait" name = "stable_deref_trait"
version = "1.2.0" version = "1.2.0"
@@ -11953,7 +12017,7 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [ dependencies = [
"unicode-width", "unicode-width 0.1.14",
] ]
[[package]] [[package]]
@@ -11973,11 +12037,11 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.12" version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
dependencies = [ dependencies = [
"thiserror-impl 2.0.12", "thiserror-impl 2.0.6",
] ]
[[package]] [[package]]
@@ -11993,9 +12057,9 @@ dependencies = [
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "2.0.12" version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -13056,6 +13120,12 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "unicode-width"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.6" version = "0.2.6"

View File

@@ -41,6 +41,7 @@ members = [
"src/flow", "src/flow",
"src/frontend", "src/frontend",
"src/index", "src/index",
"src/ingester",
"src/log-query", "src/log-query",
"src/log-store", "src/log-store",
"src/meta-client", "src/meta-client",
@@ -58,6 +59,7 @@ members = [
"src/servers", "src/servers",
"src/session", "src/session",
"src/sql", "src/sql",
"src/sst-convert",
"src/store-api", "src/store-api",
"src/table", "src/table",
"tests-fuzz", "tests-fuzz",
@@ -129,7 +131,7 @@ etcd-client = "0.14"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c5419bbd20cb42e568ec325a4d71a3c94cc327e1" } greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
hex = "0.4" hex = "0.4"
http = "1" http = "1"
humantime = "2.1" humantime = "2.1"
@@ -160,7 +162,9 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
paste = "1.0" paste = "1.0"
pin-project = "1.0" pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.5", features = ["ser"] } promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
"ser",
], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
prost = "0.13" prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false } raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8" rand = "0.8"
@@ -188,10 +192,6 @@ shadow-rs = "0.38"
similar-asserts = "1.6.0" similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] } smallvec = { version = "1", features = ["serde"] }
snafu = "0.8" snafu = "0.8"
sqlx = { version = "0.8", features = [
"runtime-tokio-rustls",
"mysql",
] }
sysinfo = "0.30" sysinfo = "0.30"
# on branch v0.52.x # on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
@@ -273,6 +273,7 @@ query = { path = "src/query" }
servers = { path = "src/servers" } servers = { path = "src/servers" }
session = { path = "src/session" } session = { path = "src/session" }
sql = { path = "src/sql" } sql = { path = "src/sql" }
sst-convert = { path = "src/sst-convert" }
store-api = { path = "src/store-api" } store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" } substrait = { path = "src/common/substrait" }
table = { path = "src/table" } table = { path = "src/table" }

76
chore.md Normal file
View File

@@ -0,0 +1,76 @@
# log
## first create table
```bash
mysql --host=127.0.0.1 --port=19195 --database=public;
```
```sql
CREATE DATABASE IF NOT EXISTS `cluster1`;
USE `cluster1`;
CREATE TABLE IF NOT EXISTS `app1` (
`greptime_timestamp` TimestampNanosecond NOT NULL TIME INDEX,
`app` STRING NULL INVERTED INDEX,
`cluster` STRING NULL INVERTED INDEX,
`message` STRING NULL,
`region` STRING NULL,
`cloud-provider` STRING NULL,
`environment` STRING NULL,
`product` STRING NULL,
`sub-product` STRING NULL,
`service` STRING NULL
) WITH (
append_mode = 'true',
'compaction.type' = 'twcs',
'compaction.twcs.max_output_file_size' = '500MB',
'compaction.twcs.max_active_window_files' = '16',
'compaction.twcs.max_active_window_runs' = '4',
'compaction.twcs.max_inactive_window_files' = '4',
'compaction.twcs.max_inactive_window_runs' = '2',
);
select count(*) from app1;
SELECT * FROM app1 ORDER BY greptime_timestamp DESC LIMIT 10\G
```
## then ingest
```bash
RUST_LOG="debug" cargo run --bin=ingester -- --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --parquet-dir="parquet_store/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
```
# metrics!!!!!!!
```bash
mysql --host=127.0.0.1 --port=19195 --database=public < public.greptime_physical_table-create-tables.sql
```
## then ingest
```bash
RUST_LOG="debug"
cargo run --bin=ingester -- --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --remote-write-dir="metrics_parquet/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
# perf it
cargo build --release ---bin=ingester
samply record target/release/ingester --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --remote-write-dir="metrics_parquet/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
```
## check data
```sql
select count(*) from greptime_physical_table;
+----------+
| count(*) |
+----------+
| 36200 |
+----------+
1 row in set (0.06 sec)
select count(*) from storage_operation_errors_total;
+----------+
| count(*) |
+----------+
| 10 |
+----------+
1 row in set (0.03 sec)
```
# with oss
the same, only different is change storage config in `ingester.toml`

View File

@@ -231,7 +231,6 @@ overwrite_entry_start_id = false
# secret_access_key = "123456" # secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com" # endpoint = "https://s3.amazonaws.com"
# region = "us-west-2" # region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage. # Example of using Oss as the storage.
# [storage] # [storage]

View File

@@ -318,7 +318,6 @@ retry_delay = "500ms"
# secret_access_key = "123456" # secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com" # endpoint = "https://s3.amazonaws.com"
# region = "us-west-2" # region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage. # Example of using Oss as the storage.
# [storage] # [storage]

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

35
ingester.toml Normal file
View File

@@ -0,0 +1,35 @@
## The metasrv client options.
[meta_client]
## The addresses of the metasrv.
metasrv_addrs = ["127.0.0.1:3002", "127.0.0.1:3003"]
## Operation timeout.
timeout = "3s"
## Heartbeat timeout.
heartbeat_timeout = "500ms"
## DDL timeout.
ddl_timeout = "10s"
## Connect server timeout.
connect_timeout = "1s"
## `TCP_NODELAY` option for accepted connections.
tcp_nodelay = true
## The configuration about the cache of the metadata.
metadata_cache_max_capacity = 100000
## TTL of the metadata cache.
metadata_cache_ttl = "10m"
# TTI of the metadata cache.
metadata_cache_tti = "5m"
## The data storage options.
[storage]
## The working home directory.
data_home = "/tmp/greptimedb-cluster/datanode0"
type = "File"
[mito]

View File

@@ -19,7 +19,9 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
use common_decimal::Decimal128; use common_decimal::Decimal128;
use common_time::time::Time; use common_time::time::Time;
use common_time::timestamp::TimeUnit; use common_time::timestamp::TimeUnit;
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp}; use common_time::{
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
};
use datatypes::prelude::{ConcreteDataType, ValueRef}; use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector; use datatypes::scalars::ScalarVector;
use datatypes::types::{ use datatypes::types::{
@@ -27,8 +29,8 @@ use datatypes::types::{
}; };
use datatypes::value::{OrderedF32, OrderedF64, Value}; use datatypes::value::{OrderedF32, OrderedF64, Value};
use datatypes::vectors::{ use datatypes::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector, BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector, IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
@@ -116,7 +118,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::Json => ConcreteDataType::json_datatype(), ColumnDataType::Json => ConcreteDataType::json_datatype(),
ColumnDataType::String => ConcreteDataType::string_datatype(), ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(), ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(), ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(), ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
ColumnDataType::TimestampMillisecond => { ColumnDataType::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::timestamp_millisecond_datatype()
@@ -269,6 +271,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::Binary(_) => ColumnDataType::Binary, ConcreteDataType::Binary(_) => ColumnDataType::Binary,
ConcreteDataType::String(_) => ColumnDataType::String, ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date, ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
ConcreteDataType::Timestamp(t) => match t { ConcreteDataType::Timestamp(t) => match t {
TimestampType::Second(_) => ColumnDataType::TimestampSecond, TimestampType::Second(_) => ColumnDataType::TimestampSecond,
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond, TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
@@ -473,6 +476,7 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
Value::String(val) => values.string_values.push(val.as_utf8().to_string()), Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
Value::Binary(val) => values.binary_values.push(val.to_vec()), Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()), Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => match val.unit() { Value::Timestamp(val) => match val.unit() {
TimeUnit::Second => values.timestamp_second_values.push(val.value()), TimeUnit::Second => values.timestamp_second_values.push(val.value()),
TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()), TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()),
@@ -573,11 +577,12 @@ pub fn pb_value_to_value_ref<'a>(
ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()), ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()),
ValueData::StringValue(string) => ValueRef::String(string.as_str()), ValueData::StringValue(string) => ValueRef::String(string.as_str()),
ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)), ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)),
ValueData::DatetimeValue(d) => ValueRef::DateTime(DateTime::new(*d)),
ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)), ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)),
ValueData::TimestampMillisecondValue(t) => { ValueData::TimestampMillisecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_millisecond(*t)) ValueRef::Timestamp(Timestamp::new_millisecond(*t))
} }
ValueData::DatetimeValue(t) | ValueData::TimestampMicrosecondValue(t) => { ValueData::TimestampMicrosecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_microsecond(*t)) ValueRef::Timestamp(Timestamp::new_microsecond(*t))
} }
ValueData::TimestampNanosecondValue(t) => { ValueData::TimestampNanosecondValue(t) => {
@@ -646,6 +651,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)), ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)), ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)), ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
ConcreteDataType::DateTime(_) => Arc::new(DateTimeVector::from_vec(values.datetime_values)),
ConcreteDataType::Timestamp(unit) => match unit { ConcreteDataType::Timestamp(unit) => match unit {
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec( TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
values.timestamp_second_values, values.timestamp_second_values,
@@ -781,6 +787,11 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
.into_iter() .into_iter()
.map(|val| val.into()) .map(|val| val.into())
.collect(), .collect(),
ConcreteDataType::DateTime(_) => values
.datetime_values
.into_iter()
.map(|v| Value::DateTime(v.into()))
.collect(),
ConcreteDataType::Date(_) => values ConcreteDataType::Date(_) => values
.date_values .date_values
.into_iter() .into_iter()
@@ -936,6 +947,9 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
Value::Date(v) => v1::Value { Value::Date(v) => v1::Value {
value_data: Some(ValueData::DateValue(v.val())), value_data: Some(ValueData::DateValue(v.val())),
}, },
Value::DateTime(v) => v1::Value {
value_data: Some(ValueData::DatetimeValue(v.val())),
},
Value::Timestamp(v) => match v.unit() { Value::Timestamp(v) => match v.unit() {
TimeUnit::Second => v1::Value { TimeUnit::Second => v1::Value {
value_data: Some(ValueData::TimestampSecondValue(v.value())), value_data: Some(ValueData::TimestampSecondValue(v.value())),
@@ -1052,6 +1066,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())), Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())), Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
Value::Date(v) => Some(ValueData::DateValue(v.val())), Value::Date(v) => Some(ValueData::DateValue(v.val())),
Value::DateTime(v) => Some(ValueData::DatetimeValue(v.val())),
Value::Timestamp(v) => Some(match v.unit() { Value::Timestamp(v) => Some(match v.unit() {
TimeUnit::Second => ValueData::TimestampSecondValue(v.value()), TimeUnit::Second => ValueData::TimestampSecondValue(v.value()),
TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()), TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()),
@@ -1233,7 +1248,7 @@ mod tests {
ColumnDataTypeWrapper::date_datatype().into() ColumnDataTypeWrapper::date_datatype().into()
); );
assert_eq!( assert_eq!(
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::datetime_datatype(),
ColumnDataTypeWrapper::datetime_datatype().into() ColumnDataTypeWrapper::datetime_datatype().into()
); );
assert_eq!( assert_eq!(
@@ -1324,6 +1339,10 @@ mod tests {
ColumnDataTypeWrapper::date_datatype(), ColumnDataTypeWrapper::date_datatype(),
ConcreteDataType::date_datatype().try_into().unwrap() ConcreteDataType::date_datatype().try_into().unwrap()
); );
assert_eq!(
ColumnDataTypeWrapper::datetime_datatype(),
ConcreteDataType::datetime_datatype().try_into().unwrap()
);
assert_eq!( assert_eq!(
ColumnDataTypeWrapper::timestamp_millisecond_datatype(), ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::timestamp_millisecond_datatype()
@@ -1811,6 +1830,17 @@ mod tests {
] ]
); );
test_convert_values!(
datetime,
vec![1.into(), 2.into(), 3.into()],
datetime,
vec![
Value::DateTime(1.into()),
Value::DateTime(2.into()),
Value::DateTime(3.into())
]
);
#[test] #[test]
fn test_vectors_to_rows_for_different_types() { fn test_vectors_to_rows_for_different_types() {
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]); let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);

View File

@@ -77,7 +77,7 @@ trait SystemSchemaProviderInner {
fn system_table(&self, name: &str) -> Option<SystemTableRef>; fn system_table(&self, name: &str) -> Option<SystemTableRef>;
fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef { fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef {
let table_meta = TableMetaBuilder::empty() let table_meta = TableMetaBuilder::default()
.schema(table.schema()) .schema(table.schema())
.primary_key_indices(vec![]) .primary_key_indices(vec![])
.next_column_id(0) .next_column_id(0)

View File

@@ -365,6 +365,10 @@ impl InformationSchemaColumnsBuilder {
self.numeric_scales.push(None); self.numeric_scales.push(None);
match &column_schema.data_type { match &column_schema.data_type {
ConcreteDataType::DateTime(datetime_type) => {
self.datetime_precisions
.push(Some(datetime_type.precision() as i64));
}
ConcreteDataType::Timestamp(ts_type) => { ConcreteDataType::Timestamp(ts_type) => {
self.datetime_precisions self.datetime_precisions
.push(Some(ts_type.precision() as i64)); .push(Some(ts_type.precision() as i64));

View File

@@ -28,19 +28,16 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datatypes::prelude::ConcreteDataType as CDT; use datatypes::prelude::ConcreteDataType as CDT;
use datatypes::scalars::ScalarVectorBuilder; use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ use datatypes::vectors::{
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder, Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
}; };
use futures::TryStreamExt; use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, Result,
Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::{Predicates, FLOWS}; use crate::information_schema::{Predicates, FLOWS};
use crate::system_schema::information_schema::InformationTable; use crate::system_schema::information_schema::InformationTable;
@@ -62,10 +59,6 @@ pub const SOURCE_TABLE_IDS: &str = "source_table_ids";
pub const SINK_TABLE_NAME: &str = "sink_table_name"; pub const SINK_TABLE_NAME: &str = "sink_table_name";
pub const FLOWNODE_IDS: &str = "flownode_ids"; pub const FLOWNODE_IDS: &str = "flownode_ids";
pub const OPTIONS: &str = "options"; pub const OPTIONS: &str = "options";
pub const CREATED_TIME: &str = "created_time";
pub const UPDATED_TIME: &str = "updated_time";
pub const LAST_EXECUTION_TIME: &str = "last_execution_time";
pub const SOURCE_TABLE_NAMES: &str = "source_table_names";
/// The `information_schema.flows` to provides information about flows in databases. /// The `information_schema.flows` to provides information about flows in databases.
#[derive(Debug)] #[derive(Debug)]
@@ -106,14 +99,6 @@ impl InformationSchemaFlows {
(SINK_TABLE_NAME, CDT::string_datatype(), false), (SINK_TABLE_NAME, CDT::string_datatype(), false),
(FLOWNODE_IDS, CDT::string_datatype(), true), (FLOWNODE_IDS, CDT::string_datatype(), true),
(OPTIONS, CDT::string_datatype(), true), (OPTIONS, CDT::string_datatype(), true),
(CREATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(UPDATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(
LAST_EXECUTION_TIME,
CDT::timestamp_millisecond_datatype(),
true,
),
(SOURCE_TABLE_NAMES, CDT::string_datatype(), true),
] ]
.into_iter() .into_iter()
.map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable)) .map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable))
@@ -185,10 +170,6 @@ struct InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder, sink_table_names: StringVectorBuilder,
flownode_id_groups: StringVectorBuilder, flownode_id_groups: StringVectorBuilder,
option_groups: StringVectorBuilder, option_groups: StringVectorBuilder,
created_time: TimestampMillisecondVectorBuilder,
updated_time: TimestampMillisecondVectorBuilder,
last_execution_time: TimestampMillisecondVectorBuilder,
source_table_names: StringVectorBuilder,
} }
impl InformationSchemaFlowsBuilder { impl InformationSchemaFlowsBuilder {
@@ -215,10 +196,6 @@ impl InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY), sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY), flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY), option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
created_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
updated_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
last_execution_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
source_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -258,14 +235,13 @@ impl InformationSchemaFlowsBuilder {
catalog_name: catalog_name.to_string(), catalog_name: catalog_name.to_string(),
flow_name: flow_name.to_string(), flow_name: flow_name.to_string(),
})?; })?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat) self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
.await?;
} }
self.finish() self.finish()
} }
async fn add_flow( fn add_flow(
&mut self, &mut self,
predicates: &Predicates, predicates: &Predicates,
flow_id: FlowId, flow_id: FlowId,
@@ -314,36 +290,6 @@ impl InformationSchemaFlowsBuilder {
input: format!("{:?}", flow_info.options()), input: format!("{:?}", flow_info.options()),
}, },
)?)); )?));
self.created_time
.push(Some(flow_info.created_time().timestamp_millis().into()));
self.updated_time
.push(Some(flow_info.updated_time().timestamp_millis().into()));
self.last_execution_time
.push(flow_stat.as_ref().and_then(|state| {
state
.last_exec_time_map
.get(&flow_id)
.map(|v| TimestampMillisecond::new(*v))
}));
let mut source_table_names = vec![];
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
source_table_names.extend(
catalog_manager
.tables_by_ids(&catalog_name, &schema_name, flow_info.source_table_ids())
.await?
.into_iter()
.map(|table| table.table_info().full_table_name()),
);
}
let source_table_names = source_table_names.join(",");
self.source_table_names.push(Some(&source_table_names));
Ok(()) Ok(())
} }
@@ -361,10 +307,6 @@ impl InformationSchemaFlowsBuilder {
Arc::new(self.sink_table_names.finish()), Arc::new(self.sink_table_names.finish()),
Arc::new(self.flownode_id_groups.finish()), Arc::new(self.flownode_id_groups.finish()),
Arc::new(self.option_groups.finish()), Arc::new(self.option_groups.finish()),
Arc::new(self.created_time.finish()),
Arc::new(self.updated_time.finish()),
Arc::new(self.last_execution_time.finish()),
Arc::new(self.source_table_names.finish()),
]; ];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu) RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
} }

View File

@@ -20,7 +20,7 @@ use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
use super::table_names::*; use super::table_names::*;
use crate::system_schema::utils::tables::{ use crate::system_schema::utils::tables::{
bigint_column, string_column, string_columns, timestamp_micro_column, bigint_column, datetime_column, string_column, string_columns,
}; };
const NO_VALUE: &str = "NO"; const NO_VALUE: &str = "NO";
@@ -163,17 +163,17 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("EVENT_BODY"), string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"), string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"), string_column("EVENT_TYPE"),
timestamp_micro_column("EXECUTE_AT"), datetime_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"), bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"), string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"), string_column("SQL_MODE"),
timestamp_micro_column("STARTS"), datetime_column("STARTS"),
timestamp_micro_column("ENDS"), datetime_column("ENDS"),
string_column("STATUS"), string_column("STATUS"),
string_column("ON_COMPLETION"), string_column("ON_COMPLETION"),
timestamp_micro_column("CREATED"), datetime_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"), datetime_column("LAST_ALTERED"),
timestamp_micro_column("LAST_EXECUTED"), datetime_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"), string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"), bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"), string_column("CHARACTER_SET_CLIENT"),
@@ -204,10 +204,10 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("INITIAL_SIZE"), bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"), bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"), bigint_column("AUTOEXTEND_SIZE"),
timestamp_micro_column("CREATION_TIME"), datetime_column("CREATION_TIME"),
timestamp_micro_column("LAST_UPDATE_TIME"), datetime_column("LAST_UPDATE_TIME"),
timestamp_micro_column("LAST_ACCESS_TIME"), datetime_column("LAST_ACCESS_TIME"),
timestamp_micro_column("RECOVER_TIME"), datetime_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"), bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"), string_column("VERSION"),
string_column("ROW_FORMAT"), string_column("ROW_FORMAT"),
@@ -217,9 +217,9 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("MAX_DATA_LENGTH"), bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"), bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"), bigint_column("DATA_FREE"),
timestamp_micro_column("CREATE_TIME"), datetime_column("CREATE_TIME"),
timestamp_micro_column("UPDATE_TIME"), datetime_column("UPDATE_TIME"),
timestamp_micro_column("CHECK_TIME"), datetime_column("CHECK_TIME"),
string_column("CHECKSUM"), string_column("CHECKSUM"),
string_column("STATUS"), string_column("STATUS"),
string_column("EXTRA"), string_column("EXTRA"),
@@ -330,8 +330,8 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("SQL_DATA_ACCESS"), string_column("SQL_DATA_ACCESS"),
string_column("SQL_PATH"), string_column("SQL_PATH"),
string_column("SECURITY_TYPE"), string_column("SECURITY_TYPE"),
timestamp_micro_column("CREATED"), datetime_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"), datetime_column("LAST_ALTERED"),
string_column("SQL_MODE"), string_column("SQL_MODE"),
string_column("ROUTINE_COMMENT"), string_column("ROUTINE_COMMENT"),
string_column("DEFINER"), string_column("DEFINER"),
@@ -383,7 +383,7 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("ACTION_REFERENCE_NEW_TABLE"), string_column("ACTION_REFERENCE_NEW_TABLE"),
string_column("ACTION_REFERENCE_OLD_ROW"), string_column("ACTION_REFERENCE_OLD_ROW"),
string_column("ACTION_REFERENCE_NEW_ROW"), string_column("ACTION_REFERENCE_NEW_ROW"),
timestamp_micro_column("CREATED"), datetime_column("CREATED"),
string_column("SQL_MODE"), string_column("SQL_MODE"),
string_column("DEFINER"), string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"), string_column("CHARACTER_SET_CLIENT"),

View File

@@ -20,18 +20,17 @@ use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_recordbatch::adapter::RecordBatchStreamAdapter; use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::datetime::DateTime;
use datafusion::execution::TaskContext; use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter; use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream; use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef}; use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMicrosecond;
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ use datatypes::vectors::{
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector, ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
UInt64VectorBuilder,
}; };
use futures::{StreamExt, TryStreamExt}; use futures::{StreamExt, TryStreamExt};
use partition::manager::PartitionInfo; use partition::manager::PartitionInfo;
@@ -128,21 +127,9 @@ impl InformationSchemaPartitions {
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true), ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true), ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true), ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new( ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
"create_time", ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
ConcreteDataType::timestamp_microsecond_datatype(), ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
true,
),
ColumnSchema::new(
"update_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
"check_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true), ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new( ColumnSchema::new(
"partition_comment", "partition_comment",
@@ -213,7 +200,7 @@ struct InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder, partition_names: StringVectorBuilder,
partition_ordinal_positions: Int64VectorBuilder, partition_ordinal_positions: Int64VectorBuilder,
partition_expressions: StringVectorBuilder, partition_expressions: StringVectorBuilder,
create_times: TimestampMicrosecondVectorBuilder, create_times: DateTimeVectorBuilder,
partition_ids: UInt64VectorBuilder, partition_ids: UInt64VectorBuilder,
} }
@@ -233,7 +220,7 @@ impl InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY), partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY), partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY), partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY), create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -337,7 +324,7 @@ impl InformationSchemaPartitionsBuilder {
}; };
self.partition_expressions.push(expressions.as_deref()); self.partition_expressions.push(expressions.as_deref());
self.create_times.push(Some(TimestampMicrosecond::from( self.create_times.push(Some(DateTime::from(
table_info.meta.created_on.timestamp_millis(), table_info.meta.created_on.timestamp_millis(),
))); )));
self.partition_ids.push(Some(partition.id.as_u64())); self.partition_ids.push(Some(partition.id.as_u64()));
@@ -355,8 +342,8 @@ impl InformationSchemaPartitionsBuilder {
Arc::new(Int64Vector::from(vec![None])), Arc::new(Int64Vector::from(vec![None])),
rows_num, rows_num,
)); ));
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new( let null_datetime_vector = Arc::new(ConstantVector::new(
Arc::new(TimestampMicrosecondVector::from(vec![None])), Arc::new(DateTimeVector::from(vec![None])),
rows_num, rows_num,
)); ));
let partition_methods = Arc::new(ConstantVector::new( let partition_methods = Arc::new(ConstantVector::new(
@@ -386,8 +373,8 @@ impl InformationSchemaPartitionsBuilder {
null_i64_vector.clone(), null_i64_vector.clone(),
Arc::new(self.create_times.finish()), Arc::new(self.create_times.finish()),
// TODO(dennis): supports update_time // TODO(dennis): supports update_time
null_timestampmicrosecond_vector.clone(), null_datetime_vector.clone(),
null_timestampmicrosecond_vector, null_datetime_vector,
null_i64_vector, null_i64_vector,
null_string_vector.clone(), null_string_vector.clone(),
null_string_vector.clone(), null_string_vector.clone(),

View File

@@ -30,8 +30,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ use datatypes::vectors::{
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder, DateTimeVectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
UInt64VectorBuilder,
}; };
use futures::TryStreamExt; use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
@@ -106,21 +105,9 @@ impl InformationSchemaTables {
ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new( ColumnSchema::new(CREATE_TIME, ConcreteDataType::datetime_datatype(), true),
CREATE_TIME, ColumnSchema::new(UPDATE_TIME, ConcreteDataType::datetime_datatype(), true),
ConcreteDataType::timestamp_microsecond_datatype(), ColumnSchema::new(CHECK_TIME, ConcreteDataType::datetime_datatype(), true),
true,
),
ColumnSchema::new(
UPDATE_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
CHECK_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true), ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true), ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true),
@@ -195,9 +182,9 @@ struct InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder, max_index_length: UInt64VectorBuilder,
data_free: UInt64VectorBuilder, data_free: UInt64VectorBuilder,
auto_increment: UInt64VectorBuilder, auto_increment: UInt64VectorBuilder,
create_time: TimestampMicrosecondVectorBuilder, create_time: DateTimeVectorBuilder,
update_time: TimestampMicrosecondVectorBuilder, update_time: DateTimeVectorBuilder,
check_time: TimestampMicrosecondVectorBuilder, check_time: DateTimeVectorBuilder,
table_collation: StringVectorBuilder, table_collation: StringVectorBuilder,
checksum: UInt64VectorBuilder, checksum: UInt64VectorBuilder,
create_options: StringVectorBuilder, create_options: StringVectorBuilder,
@@ -232,9 +219,9 @@ impl InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY), create_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY), update_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY), check_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY), table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY), create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),

View File

@@ -51,10 +51,10 @@ pub fn bigint_column(name: &str) -> ColumnSchema {
) )
} }
pub fn timestamp_micro_column(name: &str) -> ColumnSchema { pub fn datetime_column(name: &str) -> ColumnSchema {
ColumnSchema::new( ColumnSchema::new(
str::to_lowercase(name), str::to_lowercase(name),
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::datetime_datatype(),
false, false,
) )
} }

View File

@@ -6,7 +6,6 @@ license.workspace = true
[features] [features]
pg_kvbackend = ["common-meta/pg_kvbackend"] pg_kvbackend = ["common-meta/pg_kvbackend"]
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
[lints] [lints]
workspace = true workspace = true
@@ -44,10 +43,6 @@ futures.workspace = true
humantime.workspace = true humantime.workspace = true
meta-client.workspace = true meta-client.workspace = true
nu-ansi-term = "0.46" nu-ansi-term = "0.46"
opendal = { version = "0.51.1", features = [
"services-fs",
"services-s3",
] }
query.workspace = true query.workspace = true
rand.workspace = true rand.workspace = true
reqwest.workspace = true reqwest.workspace = true

View File

@@ -23,8 +23,6 @@ use common_error::ext::BoxedError;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::etcd::EtcdStore; use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::memory::MemoryKvBackend; use common_meta::kv_backend::memory::MemoryKvBackend;
#[cfg(feature = "mysql_kvbackend")]
use common_meta::kv_backend::rds::MySqlStore;
#[cfg(feature = "pg_kvbackend")] #[cfg(feature = "pg_kvbackend")]
use common_meta::kv_backend::rds::PgStore; use common_meta::kv_backend::rds::PgStore;
use common_meta::peer::Peer; use common_meta::peer::Peer;
@@ -65,9 +63,6 @@ pub struct BenchTableMetadataCommand {
#[cfg(feature = "pg_kvbackend")] #[cfg(feature = "pg_kvbackend")]
#[clap(long)] #[clap(long)]
postgres_addr: Option<String>, postgres_addr: Option<String>,
#[cfg(feature = "mysql_kvbackend")]
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)] #[clap(long)]
count: u32, count: u32,
} }
@@ -91,16 +86,6 @@ impl BenchTableMetadataCommand {
kv_backend kv_backend
}; };
#[cfg(feature = "mysql_kvbackend")]
let kv_backend = if let Some(mysql_addr) = &self.mysql_addr {
info!("Using mysql as kv backend");
MySqlStore::with_url(mysql_addr, "greptime_metakv", 128)
.await
.unwrap()
} else {
kv_backend
};
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend)); let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
let tool = BenchTableMetadata { let tool = BenchTableMetadata {

View File

@@ -276,24 +276,6 @@ pub enum Error {
#[snafu(implicit)] #[snafu(implicit)]
location: Location, location: Location,
}, },
#[snafu(display("OpenDAL operator failed"))]
OpenDal {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: opendal::Error,
},
#[snafu(display("S3 config need be set"))]
S3ConfigNotSet {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Output directory not set"))]
OutputDirNotSet {
#[snafu(implicit)]
location: Location,
},
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
@@ -337,9 +319,6 @@ impl ErrorExt for Error {
| Error::BuildClient { .. } => StatusCode::Unexpected, | Error::BuildClient { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(), Error::Other { source, .. } => source.status_code(),
Error::OpenDal { .. } => StatusCode::Internal,
Error::S3ConfigNotSet { .. } => StatusCode::InvalidArguments,
Error::OutputDirNotSet { .. } => StatusCode::InvalidArguments,
Error::BuildRuntime { source, .. } => source.status_code(), Error::BuildRuntime { source, .. } => source.status_code(),

View File

@@ -21,18 +21,15 @@ use async_trait::async_trait;
use clap::{Parser, ValueEnum}; use clap::{Parser, ValueEnum};
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_telemetry::{debug, error, info}; use common_telemetry::{debug, error, info};
use opendal::layers::LoggingLayer;
use opendal::{services, Operator};
use serde_json::Value; use serde_json::Value;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use tokio::time::Instant; use tokio::time::Instant;
use crate::database::{parse_proxy_opts, DatabaseClient}; use crate::database::{parse_proxy_opts, DatabaseClient};
use crate::error::{ use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
SchemaNotFoundSnafu,
};
use crate::{database, Tool}; use crate::{database, Tool};
type TableReference = (String, String, String); type TableReference = (String, String, String);
@@ -55,9 +52,8 @@ pub struct ExportCommand {
addr: String, addr: String,
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export /// Directory to put the exported data. E.g.: /tmp/greptimedb-export
/// for local export.
#[clap(long)] #[clap(long)]
output_dir: Option<String>, output_dir: String,
/// The name of the catalog to export. /// The name of the catalog to export.
#[clap(long, default_value = "greptime-*")] #[clap(long, default_value = "greptime-*")]
@@ -105,51 +101,10 @@ pub struct ExportCommand {
/// Disable proxy server, if set, will not use any proxy. /// Disable proxy server, if set, will not use any proxy.
#[clap(long)] #[clap(long)]
no_proxy: bool, no_proxy: bool,
/// if export data to s3
#[clap(long)]
s3: bool,
/// The s3 bucket name
/// if s3 is set, this is required
#[clap(long)]
s3_bucket: Option<String>,
/// The s3 endpoint
/// if s3 is set, this is required
#[clap(long)]
s3_endpoint: Option<String>,
/// The s3 access key
/// if s3 is set, this is required
#[clap(long)]
s3_access_key: Option<String>,
/// The s3 secret key
/// if s3 is set, this is required
#[clap(long)]
s3_secret_key: Option<String>,
/// The s3 region
/// if s3 is set, this is required
#[clap(long)]
s3_region: Option<String>,
} }
impl ExportCommand { impl ExportCommand {
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> { pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
if self.s3
&& (self.s3_bucket.is_none()
|| self.s3_endpoint.is_none()
|| self.s3_access_key.is_none()
|| self.s3_secret_key.is_none()
|| self.s3_region.is_none())
{
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
}
if !self.s3 && self.output_dir.is_none() {
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
}
let (catalog, schema) = let (catalog, schema) =
database::split_database(&self.database).map_err(BoxedError::new)?; database::split_database(&self.database).map_err(BoxedError::new)?;
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?; let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
@@ -171,43 +126,24 @@ impl ExportCommand {
target: self.target.clone(), target: self.target.clone(),
start_time: self.start_time.clone(), start_time: self.start_time.clone(),
end_time: self.end_time.clone(), end_time: self.end_time.clone(),
s3: self.s3,
s3_bucket: self.s3_bucket.clone(),
s3_endpoint: self.s3_endpoint.clone(),
s3_access_key: self.s3_access_key.clone(),
s3_secret_key: self.s3_secret_key.clone(),
s3_region: self.s3_region.clone(),
})) }))
} }
} }
#[derive(Clone)]
pub struct Export { pub struct Export {
catalog: String, catalog: String,
schema: Option<String>, schema: Option<String>,
database_client: DatabaseClient, database_client: DatabaseClient,
output_dir: Option<String>, output_dir: String,
parallelism: usize, parallelism: usize,
target: ExportTarget, target: ExportTarget,
start_time: Option<String>, start_time: Option<String>,
end_time: Option<String>, end_time: Option<String>,
s3: bool,
s3_bucket: Option<String>,
s3_endpoint: Option<String>,
s3_access_key: Option<String>,
s3_secret_key: Option<String>,
s3_region: Option<String>,
} }
impl Export { impl Export {
fn catalog_path(&self) -> PathBuf { fn catalog_path(&self) -> PathBuf {
if self.s3 { PathBuf::from(&self.output_dir).join(&self.catalog)
PathBuf::from(&self.catalog)
} else if let Some(dir) = &self.output_dir {
PathBuf::from(dir).join(&self.catalog)
} else {
unreachable!("catalog_path: output_dir must be set when not using s3")
}
} }
async fn get_db_names(&self) -> Result<Vec<String>> { async fn get_db_names(&self) -> Result<Vec<String>> {
@@ -364,23 +300,19 @@ impl Export {
let timer = Instant::now(); let timer = Instant::now();
let db_names = self.get_db_names().await?; let db_names = self.get_db_names().await?;
let db_count = db_names.len(); let db_count = db_names.len();
let operator = self.build_operator().await?;
for schema in db_names { for schema in db_names {
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let file = db_dir.join("create_database.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
let create_database = self let create_database = self
.show_create("DATABASE", &self.catalog, &schema, None) .show_create("DATABASE", &self.catalog, &schema, None)
.await?; .await?;
file.write_all(create_database.as_bytes())
let file_path = self.get_file_path(&schema, "create_database.sql"); .await
self.write_to_storage(&operator, &file_path, create_database.into_bytes()) .context(FileIoSnafu)?;
.await?;
info!(
"Exported {}.{} database creation SQL to {}",
self.catalog,
schema,
self.format_output_path(&file_path)
);
} }
let elapsed = timer.elapsed(); let elapsed = timer.elapsed();
@@ -394,267 +326,149 @@ impl Export {
let semaphore = Arc::new(Semaphore::new(self.parallelism)); let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?; let db_names = self.get_db_names().await?;
let db_count = db_names.len(); let db_count = db_names.len();
let operator = Arc::new(self.build_operator().await?);
let mut tasks = Vec::with_capacity(db_names.len()); let mut tasks = Vec::with_capacity(db_names.len());
for schema in db_names { for schema in db_names {
let semaphore_moved = semaphore.clone(); let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let operator = operator.clone();
tasks.push(async move { tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap(); let _permit = semaphore_moved.acquire().await.unwrap();
let (metric_physical_tables, remaining_tables, views) = export_self let (metric_physical_tables, remaining_tables, views) =
.get_table_list(&export_self.catalog, &schema) self.get_table_list(&self.catalog, &schema).await?;
.await?; let table_count =
metric_physical_tables.len() + remaining_tables.len() + views.len();
// Create directory if needed for file system storage let db_dir = self.catalog_path().join(format!("{schema}/"));
if !export_self.s3 { tokio::fs::create_dir_all(&db_dir)
let db_dir = format!("{}/{}/", export_self.catalog, schema); .await
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?; .context(FileIoSnafu)?;
let file = db_dir.join("create_tables.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
let create_table = self.show_create("TABLE", &c, &s, Some(&t)).await?;
file.write_all(create_table.as_bytes())
.await
.context(FileIoSnafu)?;
} }
for (c, s, v) in views {
let file_path = export_self.get_file_path(&schema, "create_tables.sql"); let create_view = self.show_create("VIEW", &c, &s, Some(&v)).await?;
let mut content = Vec::new(); file.write_all(create_view.as_bytes())
.await
// Add table creation SQL .context(FileIoSnafu)?;
for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
content.extend_from_slice(create_table.as_bytes());
} }
// Add view creation SQL
for (c, s, v) in &views {
let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
content.extend_from_slice(create_view.as_bytes());
}
// Write to storage
export_self
.write_to_storage(&operator, &file_path, content)
.await?;
info!( info!(
"Finished exporting {}.{schema} with {} table schemas to path: {}", "Finished exporting {}.{schema} with {table_count} table schemas to path: {}",
export_self.catalog, self.catalog,
metric_physical_tables.len() + remaining_tables.len() + views.len(), db_dir.to_string_lossy()
export_self.format_output_path(&file_path)
); );
Ok::<(), Error>(()) Ok::<(), Error>(())
}); });
} }
let success = self.execute_tasks(tasks).await; let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export schema job failed");
false
}
})
.count();
let elapsed = timer.elapsed(); let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, cost: {elapsed:?}"); info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
Ok(()) Ok(())
} }
async fn build_operator(&self) -> Result<Operator> {
if self.s3 {
self.build_s3_operator().await
} else {
self.build_fs_operator().await
}
}
async fn build_s3_operator(&self) -> Result<Operator> {
let mut builder = services::S3::default().root("").bucket(
self.s3_bucket
.as_ref()
.expect("s3_bucket must be provided when s3 is enabled"),
);
if let Some(endpoint) = self.s3_endpoint.as_ref() {
builder = builder.endpoint(endpoint);
}
if let Some(region) = self.s3_region.as_ref() {
builder = builder.region(region);
}
if let Some(key_id) = self.s3_access_key.as_ref() {
builder = builder.access_key_id(key_id);
}
if let Some(secret_key) = self.s3_secret_key.as_ref() {
builder = builder.secret_access_key(secret_key);
}
let op = Operator::new(builder)
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn build_fs_operator(&self) -> Result<Operator> {
let root = self
.output_dir
.as_ref()
.context(OutputDirNotSetSnafu)?
.clone();
let op = Operator::new(services::Fs::default().root(&root))
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn export_database_data(&self) -> Result<()> { async fn export_database_data(&self) -> Result<()> {
let timer = Instant::now(); let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism)); let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?; let db_names = self.get_db_names().await?;
let db_count = db_names.len(); let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_count); let mut tasks = Vec::with_capacity(db_count);
let operator = Arc::new(self.build_operator().await?);
let with_options = build_with_options(&self.start_time, &self.end_time);
for schema in db_names { for schema in db_names {
let semaphore_moved = semaphore.clone(); let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let with_options_clone = with_options.clone();
let operator = operator.clone();
tasks.push(async move { tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap(); let _permit = semaphore_moved.acquire().await.unwrap();
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
// Create directory if not using S3 let with_options = match (&self.start_time, &self.end_time) {
if !export_self.s3 { (Some(start_time), Some(end_time)) => {
let db_dir = format!("{}/{}/", export_self.catalog, schema); format!(
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?; "WITH (FORMAT='parquet', start_time='{}', end_time='{}')",
} start_time, end_time
)
}
(Some(start_time), None) => {
format!("WITH (FORMAT='parquet', start_time='{}')", start_time)
}
(None, Some(end_time)) => {
format!("WITH (FORMAT='parquet', end_time='{}')", end_time)
}
(None, None) => "WITH (FORMAT='parquet')".to_string(),
};
let (path, connection_part) = export_self.get_storage_params(&schema);
// Execute COPY DATABASE TO command
let sql = format!( let sql = format!(
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#, r#"COPY DATABASE "{}"."{}" TO '{}' {};"#,
export_self.catalog, schema, path, with_options_clone, connection_part self.catalog,
);
info!("Executing sql: {sql}");
export_self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{} data to {}",
export_self.catalog, schema, path
);
// Create copy_from.sql file
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
export_self
.write_to_storage(
&operator,
&copy_from_path,
copy_database_from_sql.into_bytes(),
)
.await?;
info!(
"Finished exporting {}.{} copy_from.sql to {}",
export_self.catalog,
schema, schema,
export_self.format_output_path(&copy_from_path) db_dir.to_str().unwrap(),
with_options
); );
info!("Executing sql: {sql}");
self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{schema} data into path: {}",
self.catalog,
db_dir.to_string_lossy()
);
// The export copy from sql
let copy_from_file = db_dir.join("copy_from.sql");
let mut writer =
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH (FORMAT='parquet');"#,
self.catalog,
schema,
db_dir.to_str().unwrap()
);
writer
.write(copy_database_from_sql.as_bytes())
.await
.context(FileIoSnafu)?;
writer.flush().await.context(FileIoSnafu)?;
info!("Finished exporting {}.{schema} copy_from.sql", self.catalog);
Ok::<(), Error>(()) Ok::<(), Error>(())
}); })
} }
let success = self.execute_tasks(tasks).await; let success = futures::future::join_all(tasks)
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
}
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
format!("{}/{}/{}", self.catalog, schema, file_name)
}
fn format_output_path(&self, file_path: &str) -> String {
if self.s3 {
format!(
"s3://{}/{}",
self.s3_bucket.as_ref().unwrap_or(&String::new()),
file_path
)
} else {
format!(
"{}/{}",
self.output_dir.as_ref().unwrap_or(&String::new()),
file_path
)
}
}
async fn write_to_storage(
&self,
op: &Operator,
file_path: &str,
content: Vec<u8>,
) -> Result<()> {
op.write(file_path, content).await.context(OpenDalSnafu)
}
fn get_storage_params(&self, schema: &str) -> (String, String) {
if self.s3 {
let s3_path = format!(
"s3://{}/{}/{}/",
// Safety: s3_bucket is required when s3 is enabled
self.s3_bucket.as_ref().unwrap(),
self.catalog,
schema
);
// endpoint is optional
let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() {
format!(", ENDPOINT='{}'", endpoint)
} else {
String::new()
};
// Safety: All s3 options are required
let connection_options = format!(
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
self.s3_access_key.as_ref().unwrap(),
self.s3_secret_key.as_ref().unwrap(),
self.s3_region.as_ref().unwrap(),
endpoint_option
);
(s3_path, format!(" CONNECTION ({})", connection_options))
} else {
(
self.catalog_path()
.join(format!("{schema}/"))
.to_string_lossy()
.to_string(),
String::new(),
)
}
}
async fn execute_tasks(
&self,
tasks: Vec<impl std::future::Future<Output = Result<()>>>,
) -> usize {
futures::future::join_all(tasks)
.await .await
.into_iter() .into_iter()
.filter(|r| match r { .filter(|r| match r {
Ok(_) => true, Ok(_) => true,
Err(e) => { Err(e) => {
error!(e; "export job failed"); error!(e; "export database job failed");
false false
} }
}) })
.count() .count();
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
} }
} }
@@ -679,15 +493,3 @@ impl Tool for Export {
} }
} }
} }
/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
fn build_with_options(start_time: &Option<String>, end_time: &Option<String>) -> String {
let mut options = vec!["format = 'parquet'".to_string()];
if let Some(start) = start_time {
options.push(format!("start_time = '{}'", start));
}
if let Some(end) = end_time {
options.push(format!("end_time = '{}'", end));
}
options.join(", ")
}

View File

@@ -17,7 +17,6 @@ api.workspace = true
arc-swap = "1.0" arc-swap = "1.0"
async-trait.workspace = true async-trait.workspace = true
bincode = "1.3" bincode = "1.3"
chrono.workspace = true
common-base.workspace = true common-base.workspace = true
common-catalog.workspace = true common-catalog.workspace = true
common-error.workspace = true common-error.workspace = true

View File

@@ -43,6 +43,7 @@ impl Function for DateFormatFunction {
helper::one_of_sigs2( helper::one_of_sigs2(
vec![ vec![
ConcreteDataType::date_datatype(), ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(), ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(), ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::timestamp_microsecond_datatype(),
@@ -104,6 +105,22 @@ impl Function for DateFormatFunction {
results.push(result.as_deref()); results.push(result.as_deref());
} }
} }
ConcreteDataType::DateTime(_) => {
for i in 0..size {
let datetime = left.get(i).as_datetime();
let format = formats.get(i).as_string();
let result = match (datetime, format) {
(Some(datetime), Some(fmt)) => datetime
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
_ => None,
};
results.push(result.as_deref());
}
}
_ => { _ => {
return UnsupportedInputDataTypeSnafu { return UnsupportedInputDataTypeSnafu {
function: NAME, function: NAME,
@@ -130,7 +147,7 @@ mod tests {
use common_query::prelude::{TypeSignature, Volatility}; use common_query::prelude::{TypeSignature, Volatility};
use datatypes::prelude::{ConcreteDataType, ScalarVector}; use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector}; use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
use super::{DateFormatFunction, *}; use super::{DateFormatFunction, *};
@@ -152,11 +169,16 @@ mod tests {
ConcreteDataType::string_datatype(), ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap() f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
); );
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(), assert!(matches!(f.signature(),
Signature { Signature {
type_signature: TypeSignature::OneOf(sigs), type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable volatility: Volatility::Immutable
} if sigs.len() == 5)); } if sigs.len() == 6));
} }
#[test] #[test]
@@ -240,4 +262,45 @@ mod tests {
} }
} }
} }
#[test]
fn test_datetime_date_format() {
let f = DateFormatFunction;
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-01-01 00:00:00.123"),
None,
Some("1970-01-01 00:00:00.042"),
None,
];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
} }

View File

@@ -118,6 +118,11 @@ mod tests {
ConcreteDataType::date_datatype(), ConcreteDataType::date_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap() f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
); );
assert_eq!(
ConcreteDataType::datetime_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!( assert!(
matches!(f.signature(), matches!(f.signature(),
Signature { Signature {

View File

@@ -23,7 +23,7 @@ use datatypes::arrow::array::AsArray;
use datatypes::arrow::compute::cast; use datatypes::arrow::compute::cast;
use datatypes::arrow::compute::kernels::zip; use datatypes::arrow::compute::kernels::zip;
use datatypes::arrow::datatypes::{ use datatypes::arrow::datatypes::{
DataType as ArrowDataType, Date32Type, TimeUnit, TimestampMicrosecondType, DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
}; };
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
@@ -69,8 +69,9 @@ impl Function for GreatestFunction {
); );
match &input_types[0] { match &input_types[0] {
ConcreteDataType::String(_) => Ok(ConcreteDataType::timestamp_millisecond_datatype()), ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()), ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)), ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
_ => UnsupportedInputDataTypeSnafu { _ => UnsupportedInputDataTypeSnafu {
function: NAME, function: NAME,
@@ -86,6 +87,7 @@ impl Function for GreatestFunction {
vec![ vec![
ConcreteDataType::string_datatype(), ConcreteDataType::string_datatype(),
ConcreteDataType::date_datatype(), ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(), ConcreteDataType::timestamp_nanosecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(), ConcreteDataType::timestamp_millisecond_datatype(),
@@ -107,24 +109,20 @@ impl Function for GreatestFunction {
); );
match columns[0].data_type() { match columns[0].data_type() {
ConcreteDataType::String(_) => { ConcreteDataType::String(_) => {
let column1 = cast( // Treats string as `DateTime` type.
&columns[0].to_arrow_array(), let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None), .context(ArrowComputeSnafu)?;
) let column1 = column1.as_primitive::<Date64Type>();
.context(ArrowComputeSnafu)?; let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
let column1 = column1.as_primitive::<TimestampMillisecondType>(); .context(ArrowComputeSnafu)?;
let column2 = cast( let column2 = column2.as_primitive::<Date64Type>();
&columns[1].to_arrow_array(),
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
)
.context(ArrowComputeSnafu)?;
let column2 = column2.as_primitive::<TimestampMillisecondType>();
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?; let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result = let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
} }
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns), ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
ConcreteDataType::Timestamp(ts_type) => match ts_type { ConcreteDataType::Timestamp(ts_type) => match ts_type {
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns), TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
TimestampType::Millisecond(_) => { TimestampType::Millisecond(_) => {
@@ -157,15 +155,15 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use common_time::timestamp::TimeUnit; use common_time::timestamp::TimeUnit;
use common_time::{Date, Timestamp}; use common_time::{Date, DateTime, Timestamp};
use datatypes::types::{ use datatypes::types::{
DateType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
TimestampSecondType, TimestampNanosecondType, TimestampSecondType,
}; };
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ use datatypes::vectors::{
DateVector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector, DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector, TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
}; };
use paste::paste; use paste::paste;
@@ -180,7 +178,7 @@ mod tests {
ConcreteDataType::string_datatype() ConcreteDataType::string_datatype()
]) ])
.unwrap(), .unwrap(),
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::DateTime(DateTimeType)
); );
let columns = vec![ let columns = vec![
Arc::new(StringVector::from(vec![ Arc::new(StringVector::from(vec![
@@ -196,18 +194,15 @@ mod tests {
let result = function let result = function
.eval(&FunctionContext::default(), &columns) .eval(&FunctionContext::default(), &columns)
.unwrap(); .unwrap();
let result = result let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
.as_any()
.downcast_ref::<TimestampMillisecondVector>()
.unwrap();
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
assert_eq!( assert_eq!(
result.get(0), result.get(0),
Value::Timestamp(Timestamp::from_str("2001-02-01 00:00:00", None).unwrap()) Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
); );
assert_eq!( assert_eq!(
result.get(1), result.get(1),
Value::Timestamp(Timestamp::from_str("2012-12-23 00:00:00", None).unwrap()) Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
); );
} }
@@ -250,33 +245,30 @@ mod tests {
assert_eq!( assert_eq!(
function function
.return_type(&[ .return_type(&[
ConcreteDataType::timestamp_millisecond_datatype(), ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::datetime_datatype()
]) ])
.unwrap(), .unwrap(),
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::DateTime(DateTimeType)
); );
let columns = vec![ let columns = vec![
Arc::new(TimestampMillisecondVector::from_slice(vec![-1, 2])) as _, Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
Arc::new(TimestampMillisecondVector::from_slice(vec![0, 1])) as _, Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
]; ];
let result = function let result = function
.eval(&FunctionContext::default(), &columns) .eval(&FunctionContext::default(), &columns)
.unwrap(); .unwrap();
let result = result let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
.as_any()
.downcast_ref::<TimestampMillisecondVector>()
.unwrap();
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
assert_eq!( assert_eq!(
result.get(0), result.get(0),
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00", None).unwrap()) Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
); );
assert_eq!( assert_eq!(
result.get(1), result.get(1),
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00.002", None).unwrap()) Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
); );
} }

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility}; use common_query::prelude::{Signature, Volatility};
use common_time::{Date, Timestamp}; use common_time::{Date, DateTime, Timestamp};
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{Int64Vector, VectorRef}; use datatypes::vectors::{Int64Vector, VectorRef};
use snafu::ensure; use snafu::ensure;
@@ -32,6 +32,10 @@ const NAME: &str = "to_unixtime";
fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> { fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
let timezone = &func_ctx.query_ctx.timezone(); let timezone = &func_ctx.query_ctx.timezone();
if let Ok(dt) = DateTime::from_str(arg, Some(timezone)) {
return Some(dt.val() / 1000);
}
if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) { if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) {
return Some(ts.split().0); return Some(ts.split().0);
} }
@@ -55,6 +59,12 @@ fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
.collect::<Vec<Option<i64>>>() .collect::<Vec<Option<i64>>>()
} }
fn convert_datetimes_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| vector.get(i).as_datetime().map(|dt| dt.val() / 1000))
.collect::<Vec<Option<i64>>>()
}
impl Function for ToUnixtimeFunction { impl Function for ToUnixtimeFunction {
fn name(&self) -> &str { fn name(&self) -> &str {
NAME NAME
@@ -72,6 +82,7 @@ impl Function for ToUnixtimeFunction {
ConcreteDataType::int32_datatype(), ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(), ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(), ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(), ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(), ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::timestamp_microsecond_datatype(),
@@ -108,6 +119,10 @@ impl Function for ToUnixtimeFunction {
let seconds = convert_dates_to_seconds(vector); let seconds = convert_dates_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds))) Ok(Arc::new(Int64Vector::from(seconds)))
} }
ConcreteDataType::DateTime(_) => {
let seconds = convert_datetimes_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::Timestamp(_) => { ConcreteDataType::Timestamp(_) => {
let seconds = convert_timestamps_to_seconds(vector); let seconds = convert_timestamps_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds))) Ok(Arc::new(Int64Vector::from(seconds)))
@@ -133,7 +148,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ use datatypes::vectors::{
DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector, DateTimeVector, DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
}; };
use super::{ToUnixtimeFunction, *}; use super::{ToUnixtimeFunction, *};
@@ -156,6 +171,7 @@ mod tests {
ConcreteDataType::int32_datatype(), ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(), ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(), ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(), ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(), ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(), ConcreteDataType::timestamp_microsecond_datatype(),
@@ -237,6 +253,31 @@ mod tests {
} }
} }
#[test]
fn test_datetime_to_unixtime() {
let f = ToUnixtimeFunction;
let times = vec![Some(123000), None, Some(42000), None];
let results = [Some(123), None, Some(42), None];
let date_vector = DateTimeVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
}
#[test] #[test]
fn test_timestamp_to_unixtime() { fn test_timestamp_to_unixtime() {
let f = ToUnixtimeFunction; let f = ToUnixtimeFunction;

View File

@@ -17,8 +17,8 @@ use api::v1::column::Values;
use common_base::BitVec; use common_base::BitVec;
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType}; use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
use datatypes::vectors::{ use datatypes::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector, BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
IntervalMonthDayNanoVector, IntervalYearMonthVector, StringVector, TimeMicrosecondVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector, TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
@@ -141,6 +141,12 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
(ConcreteDataType::Date(_), DateVector, date_values, |x| { (ConcreteDataType::Date(_), DateVector, date_values, |x| {
x.val() x.val()
}), }),
(
ConcreteDataType::DateTime(_),
DateTimeVector,
datetime_values,
|x| { x.val() }
),
( (
ConcreteDataType::Timestamp(TimestampType::Second(_)), ConcreteDataType::Timestamp(TimestampType::Second(_)),
TimestampSecondVector, TimestampSecondVector,

View File

@@ -18,13 +18,11 @@ mod print_caller;
mod range_fn; mod range_fn;
mod stack_trace_debug; mod stack_trace_debug;
mod utils; mod utils;
use aggr_func::{impl_aggr_func_type_store, impl_as_aggr_func_creator}; use aggr_func::{impl_aggr_func_type_store, impl_as_aggr_func_creator};
use print_caller::process_print_caller; use print_caller::process_print_caller;
use proc_macro::TokenStream; use proc_macro::TokenStream;
use quote::quote;
use range_fn::process_range_fn; use range_fn::process_range_fn;
use syn::{parse_macro_input, Data, DeriveInput, Fields}; use syn::{parse_macro_input, DeriveInput};
use crate::admin_fn::process_admin_fn; use crate::admin_fn::process_admin_fn;
@@ -138,51 +136,3 @@ pub fn print_caller(args: TokenStream, input: TokenStream) -> TokenStream {
pub fn stack_trace_debug(args: TokenStream, input: TokenStream) -> TokenStream { pub fn stack_trace_debug(args: TokenStream, input: TokenStream) -> TokenStream {
stack_trace_debug::stack_trace_style_impl(args.into(), input.into()).into() stack_trace_debug::stack_trace_style_impl(args.into(), input.into()).into()
} }
/// Generates implementation for `From<&TableMeta> for TableMetaBuilder`
#[proc_macro_derive(ToMetaBuilder)]
pub fn derive_meta_builder(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
let Data::Struct(data_struct) = input.data else {
panic!("ToMetaBuilder can only be derived for structs");
};
let Fields::Named(fields) = data_struct.fields else {
panic!("ToMetaBuilder can only be derived for structs with named fields");
};
// Check that this is being applied to TableMeta struct
if input.ident != "TableMeta" {
panic!("ToMetaBuilder can only be derived for TableMeta struct");
}
let field_init = fields.named.iter().map(|field| {
let field_name = field.ident.as_ref().unwrap();
quote! {
#field_name: Default::default(),
}
});
let field_assignments = fields.named.iter().map(|field| {
let field_name = field.ident.as_ref().unwrap();
quote! {
builder.#field_name(meta.#field_name.clone());
}
});
let gen = quote! {
impl From<&TableMeta> for TableMetaBuilder {
fn from(meta: &TableMeta) -> Self {
let mut builder = Self {
#(#field_init)*
};
#(#field_assignments)*
builder
}
}
};
gen.into()
}

View File

@@ -7,7 +7,6 @@ license.workspace = true
[features] [features]
testing = [] testing = []
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"] pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
[lints] [lints]
workspace = true workspace = true
@@ -58,10 +57,9 @@ serde_json.workspace = true
serde_with.workspace = true serde_with.workspace = true
session.workspace = true session.workspace = true
snafu.workspace = true snafu.workspace = true
sqlx = { workspace = true, optional = true }
store-api.workspace = true store-api.workspace = true
strum.workspace = true strum.workspace = true
table = { workspace = true, features = ["testing"] } table.workspace = true
tokio.workspace = true tokio.workspace = true
tokio-postgres = { workspace = true, optional = true } tokio-postgres = { workspace = true, optional = true }
tonic.workspace = true tonic.workspace = true

View File

@@ -192,8 +192,6 @@ mod tests {
expire_after: Some(300), expire_after: Some(300),
comment: "comment".to_string(), comment: "comment".to_string(),
options: Default::default(), options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
}, },
(1..=3) (1..=3)
.map(|i| { .map(|i| {

View File

@@ -425,14 +425,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
let flow_type = value.flow_type.unwrap_or_default().to_string(); let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert("flow_type".to_string(), flow_type); options.insert("flow_type".to_string(), flow_type);
let mut create_time = chrono::Utc::now(); let flow_info = FlowInfoValue {
if let Some(prev_flow_value) = value.prev_flow_info_value.as_ref()
&& value.task.or_replace
{
create_time = prev_flow_value.get_inner_ref().created_time;
}
let flow_info: FlowInfoValue = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(), source_table_ids: value.source_table_ids.clone(),
sink_table_name, sink_table_name,
flownode_ids, flownode_ids,
@@ -442,8 +435,6 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
expire_after, expire_after,
comment, comment,
options, options,
created_time: create_time,
updated_time: chrono::Utc::now(),
}; };
(flow_info, flow_routes) (flow_info, flow_routes)

View File

@@ -685,36 +685,7 @@ pub enum Error {
operation: String, operation: String,
}, },
#[cfg(feature = "mysql_kvbackend")] #[cfg(feature = "pg_kvbackend")]
#[snafu(display("Failed to execute via MySql, sql: {}", sql))]
MySqlExecution {
sql: String,
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
},
#[cfg(feature = "mysql_kvbackend")]
#[snafu(display("Failed to create connection pool for MySql"))]
CreateMySqlPool {
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
},
#[cfg(feature = "mysql_kvbackend")]
#[snafu(display("Failed to {} MySql transaction", operation))]
MySqlTransaction {
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
operation: String,
},
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
#[snafu(display("Rds transaction retry failed"))] #[snafu(display("Rds transaction retry failed"))]
RdsTransactionRetryFailed { RdsTransactionRetryFailed {
#[snafu(implicit)] #[snafu(implicit)]
@@ -852,13 +823,8 @@ impl ErrorExt for Error {
PostgresExecution { .. } PostgresExecution { .. }
| CreatePostgresPool { .. } | CreatePostgresPool { .. }
| GetPostgresConnection { .. } | GetPostgresConnection { .. }
| PostgresTransaction { .. } => StatusCode::Internal, | PostgresTransaction { .. }
#[cfg(feature = "mysql_kvbackend")] | RdsTransactionRetryFailed { .. } => StatusCode::Internal,
MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
StatusCode::Internal
}
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
RdsTransactionRetryFailed { .. } => StatusCode::Internal,
Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal, Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
} }
} }
@@ -869,29 +835,16 @@ impl ErrorExt for Error {
} }
impl Error { impl Error {
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))] #[cfg(feature = "pg_kvbackend")]
/// Check if the error is a serialization error. /// Check if the error is a serialization error.
pub fn is_serialization_error(&self) -> bool { pub fn is_serialization_error(&self) -> bool {
match self { match self {
#[cfg(feature = "pg_kvbackend")]
Error::PostgresTransaction { error, .. } => { Error::PostgresTransaction { error, .. } => {
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE) error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
} }
#[cfg(feature = "pg_kvbackend")]
Error::PostgresExecution { error, .. } => { Error::PostgresExecution { error, .. } => {
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE) error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
} }
#[cfg(feature = "mysql_kvbackend")]
Error::MySqlExecution {
error: sqlx::Error::Database(database_error),
..
} => {
matches!(
database_error.message(),
"Deadlock found when trying to get lock; try restarting transaction"
| "can't serialize access for this transaction"
)
}
_ => false, _ => false,
} }
} }

View File

@@ -461,8 +461,6 @@ mod tests {
expire_after: Some(300), expire_after: Some(300),
comment: "hi".to_string(), comment: "hi".to_string(),
options: Default::default(), options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
} }
} }
@@ -634,8 +632,6 @@ mod tests {
expire_after: Some(300), expire_after: Some(300),
comment: "hi".to_string(), comment: "hi".to_string(),
options: Default::default(), options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
}; };
let err = flow_metadata_manager let err = flow_metadata_manager
.create_flow_metadata(flow_id, flow_value, flow_routes.clone()) .create_flow_metadata(flow_id, flow_value, flow_routes.clone())
@@ -873,8 +869,6 @@ mod tests {
expire_after: Some(300), expire_after: Some(300),
comment: "hi".to_string(), comment: "hi".to_string(),
options: Default::default(), options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
}; };
let err = flow_metadata_manager let err = flow_metadata_manager
.update_flow_metadata( .update_flow_metadata(

View File

@@ -15,7 +15,6 @@
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::sync::Arc; use std::sync::Arc;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -132,12 +131,6 @@ pub struct FlowInfoValue {
pub(crate) comment: String, pub(crate) comment: String,
/// The options. /// The options.
pub(crate) options: HashMap<String, String>, pub(crate) options: HashMap<String, String>,
/// The created time
#[serde(default)]
pub(crate) created_time: DateTime<Utc>,
/// The updated time.
#[serde(default)]
pub(crate) updated_time: DateTime<Utc>,
} }
impl FlowInfoValue { impl FlowInfoValue {
@@ -178,14 +171,6 @@ impl FlowInfoValue {
pub fn options(&self) -> &HashMap<String, String> { pub fn options(&self) -> &HashMap<String, String> {
&self.options &self.options
} }
pub fn created_time(&self) -> &DateTime<Utc> {
&self.created_time
}
pub fn updated_time(&self) -> &DateTime<Utc> {
&self.updated_time
}
} }
pub type FlowInfoManagerRef = Arc<FlowInfoManager>; pub type FlowInfoManagerRef = Arc<FlowInfoManager>;

View File

@@ -97,19 +97,11 @@ impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey {
pub struct FlowStateValue { pub struct FlowStateValue {
/// For each key, the bytes of the state in memory /// For each key, the bytes of the state in memory
pub state_size: BTreeMap<FlowId, usize>, pub state_size: BTreeMap<FlowId, usize>,
/// For each key, the last execution time of flow in unix timestamp milliseconds.
pub last_exec_time_map: BTreeMap<FlowId, i64>,
} }
impl FlowStateValue { impl FlowStateValue {
pub fn new( pub fn new(state_size: BTreeMap<FlowId, usize>) -> Self {
state_size: BTreeMap<FlowId, usize>, Self { state_size }
last_exec_time_map: BTreeMap<FlowId, i64>,
) -> Self {
Self {
state_size,
last_exec_time_map,
}
} }
} }
@@ -151,15 +143,12 @@ impl FlowStateManager {
pub struct FlowStat { pub struct FlowStat {
/// For each key, the bytes of the state in memory /// For each key, the bytes of the state in memory
pub state_size: BTreeMap<u32, usize>, pub state_size: BTreeMap<u32, usize>,
/// For each key, the last execution time of flow in unix timestamp milliseconds.
pub last_exec_time_map: BTreeMap<FlowId, i64>,
} }
impl From<FlowStateValue> for FlowStat { impl From<FlowStateValue> for FlowStat {
fn from(value: FlowStateValue) -> Self { fn from(value: FlowStateValue) -> Self {
Self { Self {
state_size: value.state_size, state_size: value.state_size,
last_exec_time_map: value.last_exec_time_map,
} }
} }
} }
@@ -168,7 +157,6 @@ impl From<FlowStat> for FlowStateValue {
fn from(value: FlowStat) -> Self { fn from(value: FlowStat) -> Self {
Self { Self {
state_size: value.state_size, state_size: value.state_size,
last_exec_time_map: value.last_exec_time_map,
} }
} }
} }

View File

@@ -40,7 +40,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
.build() .build()
.unwrap(); .unwrap();
let meta = TableMetaBuilder::empty() let meta = TableMetaBuilder::default()
.schema(Arc::new(schema)) .schema(Arc::new(schema))
.primary_key_indices(vec![0]) .primary_key_indices(vec![0])
.engine("engine") .engine("engine")

View File

@@ -31,7 +31,7 @@ use crate::rpc::KeyValue;
pub mod chroot; pub mod chroot;
pub mod etcd; pub mod etcd;
pub mod memory; pub mod memory;
#[cfg(any(feature = "mysql_kvbackend", feature = "pg_kvbackend"))] #[cfg(feature = "pg_kvbackend")]
pub mod rds; pub mod rds;
pub mod test; pub mod test;
pub mod txn; pub mod txn;

View File

@@ -14,11 +14,13 @@
use std::any::Any; use std::any::Any;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::marker::PhantomData; use std::marker::PhantomData;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use async_trait::async_trait; use async_trait::async_trait;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use serde::Serializer;
use super::{KvBackendRef, ResettableKvBackend}; use super::{KvBackendRef, ResettableKvBackend};
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse}; use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
@@ -36,6 +38,19 @@ pub struct MemoryKvBackend<T> {
_phantom: PhantomData<T>, _phantom: PhantomData<T>,
} }
impl<T> Display for MemoryKvBackend<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let kvs = self.kvs.read().unwrap();
for (k, v) in kvs.iter() {
f.serialize_str(&String::from_utf8_lossy(k))?;
f.serialize_str(" -> ")?;
f.serialize_str(&String::from_utf8_lossy(v))?;
f.serialize_str("\n")?;
}
Ok(())
}
}
impl<T> Default for MemoryKvBackend<T> { impl<T> Default for MemoryKvBackend<T> {
fn default() -> Self { fn default() -> Self {
Self { Self {

View File

@@ -33,15 +33,9 @@ use crate::rpc::store::{
}; };
use crate::rpc::KeyValue; use crate::rpc::KeyValue;
#[cfg(feature = "pg_kvbackend")]
mod postgres; mod postgres;
#[cfg(feature = "pg_kvbackend")]
pub use postgres::PgStore;
#[cfg(feature = "mysql_kvbackend")] pub use postgres::PgStore;
mod mysql;
#[cfg(feature = "mysql_kvbackend")]
pub use mysql::MySqlStore;
const RDS_STORE_TXN_RETRY_COUNT: usize = 3; const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
@@ -112,14 +106,6 @@ impl<T: Executor> ExecutorImpl<'_, T> {
} }
} }
#[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
match self {
Self::Default(executor) => executor.execute(query, params).await,
Self::Txn(executor) => executor.execute(query, params).await,
}
}
async fn commit(self) -> Result<()> { async fn commit(self) -> Result<()> {
match self { match self {
Self::Txn(executor) => executor.commit().await, Self::Txn(executor) => executor.commit().await,

View File

@@ -1,650 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_telemetry::debug;
use snafu::ResultExt;
use sqlx::mysql::MySqlRow;
use sqlx::pool::Pool;
use sqlx::{MySql, MySqlPool, Row, Transaction as MySqlTransaction};
use crate::error::{CreateMySqlPoolSnafu, MySqlExecutionSnafu, MySqlTransactionSnafu, Result};
use crate::kv_backend::rds::{
Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
RDS_STORE_TXN_RETRY_COUNT,
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
};
use crate::rpc::KeyValue;
type MySqlClient = Arc<Pool<MySql>>;
pub struct MySqlTxnClient(MySqlTransaction<'static, MySql>);
fn key_value_from_row(row: MySqlRow) -> KeyValue {
// Safety: key and value are the first two columns in the row
KeyValue {
key: row.get_unchecked(0),
value: row.get_unchecked(1),
}
}
const EMPTY: &[u8] = &[0];
/// Type of range template.
#[derive(Debug, Clone, Copy)]
enum RangeTemplateType {
Point,
Range,
Full,
LeftBounded,
Prefix,
}
/// Builds params for the given range template type.
impl RangeTemplateType {
fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
match self {
RangeTemplateType::Point => vec![key],
RangeTemplateType::Range => vec![key, range_end],
RangeTemplateType::Full => vec![],
RangeTemplateType::LeftBounded => vec![key],
RangeTemplateType::Prefix => {
key.push(b'%');
vec![key]
}
}
}
}
/// Templates for range request.
#[derive(Debug, Clone)]
struct RangeTemplate {
point: String,
range: String,
full: String,
left_bounded: String,
prefix: String,
}
impl RangeTemplate {
/// Gets the template for the given type.
fn get(&self, typ: RangeTemplateType) -> &str {
match typ {
RangeTemplateType::Point => &self.point,
RangeTemplateType::Range => &self.range,
RangeTemplateType::Full => &self.full,
RangeTemplateType::LeftBounded => &self.left_bounded,
RangeTemplateType::Prefix => &self.prefix,
}
}
/// Adds limit to the template.
fn with_limit(template: &str, limit: i64) -> String {
if limit == 0 {
return format!("{};", template);
}
format!("{} LIMIT {};", template, limit)
}
}
fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
if start.len() != end.len() {
return false;
}
let l = start.len();
let same_prefix = start[0..l - 1] == end[0..l - 1];
if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
return same_prefix && (*rhs + 1) == *lhs;
}
false
}
/// Determine the template type for range request.
fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
match (key, range_end) {
(_, &[]) => RangeTemplateType::Point,
(EMPTY, EMPTY) => RangeTemplateType::Full,
(_, EMPTY) => RangeTemplateType::LeftBounded,
(start, end) => {
if is_prefix_range(start, end) {
RangeTemplateType::Prefix
} else {
RangeTemplateType::Range
}
}
}
}
/// Generate in placeholders for MySQL.
fn mysql_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
(from..=to).map(|_| "?".to_string()).collect()
}
/// Factory for building sql templates.
struct MySqlTemplateFactory<'a> {
table_name: &'a str,
}
impl<'a> MySqlTemplateFactory<'a> {
/// Creates a new [`SqlTemplateFactory`] with the given table name.
fn new(table_name: &'a str) -> Self {
Self { table_name }
}
/// Builds the template set for the given table name.
fn build(&self) -> MySqlTemplateSet {
let table_name = self.table_name;
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
MySqlTemplateSet {
table_name: table_name.to_string(),
create_table_statement: format!(
// Cannot be more than 3072 bytes in PRIMARY KEY
"CREATE TABLE IF NOT EXISTS {table_name}(k VARBINARY(3072) PRIMARY KEY, v BLOB);",
),
range_template: RangeTemplate {
point: format!("SELECT k, v FROM {table_name} WHERE k = ?"),
range: format!("SELECT k, v FROM {table_name} WHERE k >= ? AND k < ? ORDER BY k"),
full: format!("SELECT k, v FROM {table_name} ? ORDER BY k"),
left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= ? ORDER BY k"),
prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE ? ORDER BY k"),
},
delete_template: RangeTemplate {
point: format!("DELETE FROM {table_name} WHERE k = ?;"),
range: format!("DELETE FROM {table_name} WHERE k >= ? AND k < ?;"),
full: format!("DELETE FROM {table_name}"),
left_bounded: format!("DELETE FROM {table_name} WHERE k >= ?;"),
prefix: format!("DELETE FROM {table_name} WHERE k LIKE ?;"),
},
}
}
}
/// Templates for the given table name.
#[derive(Debug, Clone)]
pub struct MySqlTemplateSet {
table_name: String,
create_table_statement: String,
range_template: RangeTemplate,
delete_template: RangeTemplate,
}
impl MySqlTemplateSet {
/// Generates the sql for batch get.
fn generate_batch_get_query(&self, key_len: usize) -> String {
let table_name = &self.table_name;
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
}
/// Generates the sql for batch delete.
fn generate_batch_delete_query(&self, key_len: usize) -> String {
let table_name = &self.table_name;
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
format!("DELETE FROM {table_name} WHERE k in ({});", in_clause)
}
/// Generates the sql for batch upsert.
/// For MySQL, it also generates a select query to get the previous values.
fn generate_batch_upsert_query(&self, kv_len: usize) -> (String, String) {
let table_name = &self.table_name;
let in_placeholders: Vec<String> = (1..=kv_len).map(|_| "?".to_string()).collect();
let in_clause = in_placeholders.join(", ");
let mut values_placeholders = Vec::new();
for _ in 0..kv_len {
values_placeholders.push("(?, ?)".to_string());
}
let values_clause = values_placeholders.join(", ");
(
format!(r#"SELECT k, v FROM {table_name} WHERE k IN ({in_clause})"#,),
format!(
r#"INSERT INTO {table_name} (k, v) VALUES {values_clause} ON DUPLICATE KEY UPDATE v = VALUES(v);"#,
),
)
}
}
#[async_trait::async_trait]
impl Executor for MySqlClient {
type Transaction<'a>
= MySqlTxnClient
where
Self: 'a;
fn name() -> &'static str {
"MySql"
}
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
let rows = query
.fetch_all(&**self)
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(rows.into_iter().map(key_value_from_row).collect())
}
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
query
.execute(&**self)
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(())
}
async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
// sqlx has no isolation level support for now, so we have to set it manually.
// TODO(CookiePie): Waiting for https://github.com/launchbadge/sqlx/pull/3614 and remove this.
sqlx::query("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
.execute(&**self)
.await
.context(MySqlExecutionSnafu {
sql: "SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE",
})?;
let txn = self
.begin()
.await
.context(MySqlExecutionSnafu { sql: "begin" })?;
Ok(MySqlTxnClient(txn))
}
}
#[async_trait::async_trait]
impl Transaction<'_> for MySqlTxnClient {
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
let rows = query
.fetch_all(&mut *(self.0))
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(rows.into_iter().map(key_value_from_row).collect())
}
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
query
.execute(&mut *(self.0))
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(())
}
/// Caution: sqlx will stuck on the query if two transactions conflict with each other.
/// Don't know if it's a feature or it depends on the database. Be careful.
async fn commit(self) -> Result<()> {
self.0.commit().await.context(MySqlTransactionSnafu {
operation: "commit",
})?;
Ok(())
}
}
pub struct MySqlExecutorFactory {
pool: Arc<Pool<MySql>>,
}
#[async_trait::async_trait]
impl ExecutorFactory<MySqlClient> for MySqlExecutorFactory {
async fn default_executor(&self) -> Result<MySqlClient> {
Ok(self.pool.clone())
}
async fn txn_executor<'a>(
&self,
default_executor: &'a mut MySqlClient,
) -> Result<MySqlTxnClient> {
default_executor.txn_executor().await
}
}
/// A MySQL-backed key-value store.
/// It uses [sqlx::Pool<MySql>] as the connection pool for [RdsStore].
pub type MySqlStore = RdsStore<MySqlClient, MySqlExecutorFactory, MySqlTemplateSet>;
#[async_trait::async_trait]
impl KvQueryExecutor<MySqlClient> for MySqlStore {
async fn range_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: RangeRequest,
) -> Result<RangeResponse> {
let template_type = range_template(&req.key, &req.range_end);
let template = self.sql_template_set.range_template.get(template_type);
let params = template_type.build_params(req.key, req.range_end);
let params_ref = params.iter().collect::<Vec<_>>();
// Always add 1 to limit to check if there is more data
let query =
RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
let limit = req.limit as usize;
debug!("query: {:?}, params: {:?}", query, params);
let mut kvs = query_executor.query(&query, &params_ref).await?;
if req.keys_only {
kvs.iter_mut().for_each(|kv| kv.value = vec![]);
}
// If limit is 0, we always return all data
if limit == 0 || kvs.len() <= limit {
return Ok(RangeResponse { kvs, more: false });
}
// If limit is greater than the number of rows, we remove the last row and set more to true
let removed = kvs.pop();
debug_assert!(removed.is_some());
Ok(RangeResponse { kvs, more: true })
}
async fn batch_put_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchPutRequest,
) -> Result<BatchPutResponse> {
let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
for kv in &req.kvs {
let processed_key = &kv.key;
in_params.push(processed_key);
let processed_value = &kv.value;
values_params.push(processed_key);
values_params.push(processed_value);
}
let in_params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
let values_params = values_params.iter().map(|x| x as _).collect::<Vec<_>>();
let (select, update) = self
.sql_template_set
.generate_batch_upsert_query(req.kvs.len());
// Fast path: if we don't need previous kvs, we can just upsert the keys.
if !req.prev_kv {
query_executor.execute(&update, &values_params).await?;
return Ok(BatchPutResponse::default());
}
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.batch_put_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
let prev_kvs = query_executor.query(&select, &in_params).await?;
query_executor.execute(&update, &values_params).await?;
Ok(BatchPutResponse { prev_kvs })
}
async fn batch_get_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchGetRequest,
) -> Result<BatchGetResponse> {
if req.keys.is_empty() {
return Ok(BatchGetResponse { kvs: vec![] });
}
let query = self
.sql_template_set
.generate_batch_get_query(req.keys.len());
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
let kvs = query_executor.query(&query, &params).await?;
Ok(BatchGetResponse { kvs })
}
async fn delete_range_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: DeleteRangeRequest,
) -> Result<DeleteRangeResponse> {
// Since we need to know the number of deleted keys, we have no fast path here.
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.delete_range_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
let range_get_req = RangeRequest {
key: req.key.clone(),
range_end: req.range_end.clone(),
limit: 0,
keys_only: false,
};
let prev_kvs = self
.range_with_query_executor(query_executor, range_get_req)
.await?
.kvs;
let template_type = range_template(&req.key, &req.range_end);
let template = self.sql_template_set.delete_template.get(template_type);
let params = template_type.build_params(req.key, req.range_end);
let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
query_executor.execute(template, &params_ref).await?;
let mut resp = DeleteRangeResponse::new(prev_kvs.len() as i64);
if req.prev_kv {
resp.with_prev_kvs(prev_kvs);
}
Ok(resp)
}
async fn batch_delete_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchDeleteRequest,
) -> Result<BatchDeleteResponse> {
if req.keys.is_empty() {
return Ok(BatchDeleteResponse::default());
}
let query = self
.sql_template_set
.generate_batch_delete_query(req.keys.len());
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
// Fast path: if we don't need previous kvs, we can just delete the keys.
if !req.prev_kv {
query_executor.execute(&query, &params).await?;
return Ok(BatchDeleteResponse::default());
}
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.batch_delete_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
// Should get previous kvs first
let batch_get_req = BatchGetRequest {
keys: req.keys.clone(),
};
let prev_kvs = self
.batch_get_with_query_executor(query_executor, batch_get_req)
.await?
.kvs;
// Pure `DELETE` has no return value, so we need to use `execute` instead of `query`.
query_executor.execute(&query, &params).await?;
if req.prev_kv {
Ok(BatchDeleteResponse { prev_kvs })
} else {
Ok(BatchDeleteResponse::default())
}
}
}
impl MySqlStore {
/// Create [MySqlStore] impl of [KvBackendRef] from url.
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
let pool = MySqlPool::connect(url)
.await
.context(CreateMySqlPoolSnafu)?;
Self::with_mysql_pool(pool, table_name, max_txn_ops).await
}
/// Create [MySqlStore] impl of [KvBackendRef] from [sqlx::Pool<MySql>].
pub async fn with_mysql_pool(
pool: Pool<MySql>,
table_name: &str,
max_txn_ops: usize,
) -> Result<KvBackendRef> {
// This step ensures the mysql metadata backend is ready to use.
// We check if greptime_metakv table exists, and we will create a new table
// if it does not exist.
let sql_template_set = MySqlTemplateFactory::new(table_name).build();
sqlx::query(&sql_template_set.create_table_statement)
.execute(&pool)
.await
.context(MySqlExecutionSnafu {
sql: sql_template_set.create_table_statement.to_string(),
})?;
Ok(Arc::new(MySqlStore {
max_txn_ops,
sql_template_set,
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
executor_factory: MySqlExecutorFactory {
pool: Arc::new(pool),
},
_phantom: PhantomData,
}))
}
}
#[cfg(test)]
mod tests {
use common_telemetry::init_default_ut_logging;
use super::*;
use crate::kv_backend::test::{
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
unprepare_kv,
};
async fn build_mysql_kv_backend(table_name: &str) -> Option<MySqlStore> {
init_default_ut_logging();
let endpoints = std::env::var("GT_MYSQL_ENDPOINTS").unwrap_or_default();
if endpoints.is_empty() {
return None;
}
let pool = MySqlPool::connect(&endpoints).await.unwrap();
let sql_templates = MySqlTemplateFactory::new(table_name).build();
sqlx::query(&sql_templates.create_table_statement)
.execute(&pool)
.await
.unwrap();
Some(MySqlStore {
max_txn_ops: 128,
sql_template_set: sql_templates,
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
executor_factory: MySqlExecutorFactory {
pool: Arc::new(pool),
},
_phantom: PhantomData,
})
}
#[tokio::test]
async fn test_mysql_put() {
let kv_backend = build_mysql_kv_backend("put_test").await.unwrap();
let prefix = b"put/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_range() {
let kv_backend = build_mysql_kv_backend("range_test").await.unwrap();
let prefix = b"range/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_range_2() {
let kv_backend = build_mysql_kv_backend("range2_test").await.unwrap();
let prefix = b"range2/";
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_get() {
let kv_backend = build_mysql_kv_backend("batch_get_test").await.unwrap();
let prefix = b"batch_get/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_delete() {
let kv_backend = build_mysql_kv_backend("batch_delete_test").await.unwrap();
let prefix = b"batch_delete/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_delete_with_prefix() {
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix_test")
.await
.unwrap();
let prefix = b"batch_delete/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_delete_range() {
let kv_backend = build_mysql_kv_backend("delete_range_test").await.unwrap();
let prefix = b"delete_range/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_compare_and_put() {
let kv_backend = build_mysql_kv_backend("compare_and_put_test")
.await
.unwrap();
let prefix = b"compare_and_put/";
let kv_backend = Arc::new(kv_backend);
test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
}
#[tokio::test]
async fn test_mysql_txn() {
let kv_backend = build_mysql_kv_backend("txn_test").await.unwrap();
test_txn_one_compare_op(&kv_backend).await;
text_txn_multi_compare_op(&kv_backend).await;
test_txn_compare_equal(&kv_backend).await;
test_txn_compare_greater(&kv_backend).await;
test_txn_compare_less(&kv_backend).await;
test_txn_compare_not_equal(&kv_backend).await;
}
}

View File

@@ -153,7 +153,6 @@ impl<'a> PgSqlTemplateFactory<'a> {
/// Builds the template set for the given table name. /// Builds the template set for the given table name.
fn build(&self) -> PgSqlTemplateSet { fn build(&self) -> PgSqlTemplateSet {
let table_name = self.table_name; let table_name = self.table_name;
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
PgSqlTemplateSet { PgSqlTemplateSet {
table_name: table_name.to_string(), table_name: table_name.to_string(),
create_table_statement: format!( create_table_statement: format!(

View File

@@ -34,24 +34,6 @@ pub struct MigrateRegionRequest {
pub timeout: Duration, pub timeout: Duration,
} }
/// A request to add region follower.
#[derive(Debug, Clone)]
pub struct AddRegionFollowerRequest {
/// The region id to add follower.
pub region_id: u64,
/// The peer id to add follower.
pub peer_id: u64,
}
/// A request to remove region follower.
#[derive(Debug, Clone)]
pub struct RemoveRegionFollowerRequest {
/// The region id to remove follower.
pub region_id: u64,
/// The peer id to remove follower.
pub peer_id: u64,
}
/// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`]. /// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result<ProcedureId> { pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result<ProcedureId> {
ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| { ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| {

View File

@@ -0,0 +1,407 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{Display, Formatter, Write};
use chrono::{
Days, LocalResult, Months, NaiveDateTime, TimeDelta, TimeZone as ChronoTimeZone, Utc,
};
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{InvalidDateStrSnafu, Result};
use crate::interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
use crate::timezone::{get_timezone, Timezone};
use crate::util::{datetime_to_utc, format_utc_datetime};
use crate::Date;
const DATETIME_FORMAT: &str = "%F %H:%M:%S%.f";
const DATETIME_FORMAT_WITH_TZ: &str = "%F %H:%M:%S%.f%z";
/// [DateTime] represents the **milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch)**.
#[derive(
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize,
)]
pub struct DateTime(i64);
impl Display for DateTime {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
if let Some(abs_time) = chrono::DateTime::from_timestamp_millis(self.0) {
write!(
f,
"{}",
format_utc_datetime(&abs_time.naive_utc(), DATETIME_FORMAT_WITH_TZ)
)
} else {
write!(f, "DateTime({})", self.0)
}
}
}
impl From<DateTime> for serde_json::Value {
fn from(d: DateTime) -> Self {
serde_json::Value::String(d.to_string())
}
}
impl From<NaiveDateTime> for DateTime {
fn from(value: NaiveDateTime) -> Self {
DateTime::from(value.and_utc().timestamp_millis())
}
}
impl From<i64> for DateTime {
fn from(v: i64) -> Self {
Self(v)
}
}
impl From<Date> for DateTime {
fn from(value: Date) -> Self {
// It's safe, i32 * 86400000 won't be overflow
Self(value.to_secs() * 1000)
}
}
impl DateTime {
/// Try parsing a string into [`DateTime`] with the system timezone.
/// See `DateTime::from_str`.
pub fn from_str_system(s: &str) -> Result<Self> {
Self::from_str(s, None)
}
/// Try parsing a string into [`DateTime`] with the given timezone.
/// Supported format:
/// - RFC3339 in the naive UTC timezone.
/// - `%F %T` with the given timezone
/// - `%F %T%z` with the timezone in string
pub fn from_str(s: &str, timezone: Option<&Timezone>) -> Result<Self> {
let s = s.trim();
let timestamp_millis = if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
dt.naive_utc().and_utc().timestamp_millis()
} else if let Ok(d) = NaiveDateTime::parse_from_str(s, DATETIME_FORMAT) {
match datetime_to_utc(&d, get_timezone(timezone)) {
LocalResult::None => {
return InvalidDateStrSnafu { raw: s }.fail();
}
LocalResult::Single(t) | LocalResult::Ambiguous(t, _) => {
t.and_utc().timestamp_millis()
}
}
} else if let Ok(v) = chrono::DateTime::parse_from_str(s, DATETIME_FORMAT_WITH_TZ) {
v.timestamp_millis()
} else {
return InvalidDateStrSnafu { raw: s }.fail();
};
Ok(Self(timestamp_millis))
}
/// Create a new [DateTime] from milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
pub fn new(millis: i64) -> Self {
Self(millis)
}
/// Get the milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
pub fn val(&self) -> i64 {
self.0
}
/// Convert to [NaiveDateTime].
pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
chrono::DateTime::from_timestamp_millis(self.0).map(|x| x.naive_utc())
}
/// Format DateTime for given format and timezone.
/// If `tz==None`, the server default timezone will used.
pub fn as_formatted_string(
self,
pattern: &str,
timezone: Option<&Timezone>,
) -> Result<Option<String>> {
if let Some(v) = self.to_chrono_datetime() {
let mut formatted = String::new();
match get_timezone(timezone) {
Timezone::Offset(offset) => {
write!(
formatted,
"{}",
offset.from_utc_datetime(&v).format(pattern)
)
.context(crate::error::FormatSnafu { pattern })?;
}
Timezone::Named(tz) => {
write!(formatted, "{}", tz.from_utc_datetime(&v).format(pattern))
.context(crate::error::FormatSnafu { pattern })?;
}
}
return Ok(Some(formatted));
}
Ok(None)
}
pub fn to_chrono_datetime_with_timezone(&self, tz: Option<&Timezone>) -> Option<NaiveDateTime> {
let datetime = self.to_chrono_datetime();
datetime.map(|v| match tz {
Some(Timezone::Offset(offset)) => offset.from_utc_datetime(&v).naive_local(),
Some(Timezone::Named(tz)) => tz.from_utc_datetime(&v).naive_local(),
None => Utc.from_utc_datetime(&v).naive_local(),
})
}
// FIXME(yingwen): remove add/sub intervals later
/// Adds given [IntervalYearMonth] to the current datetime.
pub fn add_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_add_months(Months::new(interval.months as u32))
.map(Into::into)
}
/// Adds given [IntervalDayTime] to the current datetime.
pub fn add_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_add_days(Days::new(interval.days as u64))?
.checked_add_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
.map(Into::into)
}
/// Adds given [IntervalMonthDayNano] to the current datetime.
pub fn add_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_add_months(Months::new(interval.months as u32))?
.checked_add_days(Days::new(interval.days as u64))?
.checked_add_signed(TimeDelta::nanoseconds(interval.nanoseconds))
.map(Into::into)
}
/// Subtracts given [IntervalYearMonth] to the current datetime.
pub fn sub_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_sub_months(Months::new(interval.months as u32))
.map(Into::into)
}
/// Subtracts given [IntervalDayTime] to the current datetime.
pub fn sub_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_sub_days(Days::new(interval.days as u64))?
.checked_sub_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
.map(Into::into)
}
/// Subtracts given [IntervalMonthDayNano] to the current datetime.
pub fn sub_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
let naive_datetime = self.to_chrono_datetime()?;
naive_datetime
.checked_sub_months(Months::new(interval.months as u32))?
.checked_sub_days(Days::new(interval.days as u64))?
.checked_sub_signed(TimeDelta::nanoseconds(interval.nanoseconds))
.map(Into::into)
}
/// Convert to [common_time::date].
pub fn to_date(&self) -> Option<Date> {
self.to_chrono_datetime().map(|d| Date::from(d.date()))
}
pub fn negative(&self) -> Self {
Self(-self.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::timezone::set_default_timezone;
#[test]
pub fn test_new_date_time() {
set_default_timezone(Some("Asia/Shanghai")).unwrap();
assert_eq!("1970-01-01 08:00:00+0800", DateTime::new(0).to_string());
assert_eq!("1970-01-01 08:00:01+0800", DateTime::new(1000).to_string());
assert_eq!("1970-01-01 07:59:59+0800", DateTime::new(-1000).to_string());
}
#[test]
pub fn test_parse_from_string() {
set_default_timezone(Some("Asia/Shanghai")).unwrap();
let time = "1970-01-01 00:00:00+0800";
let dt = DateTime::from_str(time, None).unwrap();
assert_eq!(time, &dt.to_string());
let dt = DateTime::from_str(" 1970-01-01 00:00:00+0800 ", None).unwrap();
assert_eq!(time, &dt.to_string());
}
#[test]
pub fn test_from() {
let d: DateTime = 42.into();
assert_eq!(42, d.val());
}
#[test]
fn test_add_sub_interval() {
let datetime = DateTime::new(1000);
let interval = IntervalDayTime::new(1, 200);
let new_datetime = datetime.add_day_time(interval).unwrap();
assert_eq!(new_datetime.val(), 1000 + 3600 * 24 * 1000 + 200);
assert_eq!(datetime, new_datetime.sub_day_time(interval).unwrap());
}
#[test]
fn test_parse_local_date_time() {
set_default_timezone(Some("Asia/Shanghai")).unwrap();
assert_eq!(
-28800000,
DateTime::from_str("1970-01-01 00:00:00", None)
.unwrap()
.val()
);
assert_eq!(
0,
DateTime::from_str("1970-01-01 08:00:00", None)
.unwrap()
.val()
);
assert_eq!(
42,
DateTime::from_str("1970-01-01 08:00:00.042", None)
.unwrap()
.val()
);
assert_eq!(
42,
DateTime::from_str("1970-01-01 08:00:00.042424", None)
.unwrap()
.val()
);
assert_eq!(
0,
DateTime::from_str(
"1970-01-01 08:00:00",
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
)
.unwrap()
.val()
);
assert_eq!(
-28800000,
DateTime::from_str(
"1970-01-01 00:00:00",
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
)
.unwrap()
.val()
);
assert_eq!(
28800000,
DateTime::from_str(
"1970-01-01 00:00:00",
Some(&Timezone::from_tz_string("-8:00").unwrap())
)
.unwrap()
.val()
);
}
#[test]
fn test_parse_local_date_time_with_tz() {
let ts = DateTime::from_str("1970-01-01 08:00:00+0000", None)
.unwrap()
.val();
assert_eq!(28800000, ts);
let ts = DateTime::from_str("1970-01-01 00:00:00.042+0000", None)
.unwrap()
.val();
assert_eq!(42, ts);
// the string has the time zone info, the argument doesn't change the result
let ts = DateTime::from_str(
"1970-01-01 08:00:00+0000",
Some(&Timezone::from_tz_string("-8:00").unwrap()),
)
.unwrap()
.val();
assert_eq!(28800000, ts);
}
#[test]
fn test_as_formatted_string() {
let d: DateTime = DateTime::new(1000);
assert_eq!(
"1970-01-01",
d.as_formatted_string("%Y-%m-%d", None).unwrap().unwrap()
);
assert_eq!(
"1970-01-01 00:00:01",
d.as_formatted_string("%Y-%m-%d %H:%M:%S", None)
.unwrap()
.unwrap()
);
assert_eq!(
"1970-01-01T00:00:01:000",
d.as_formatted_string("%Y-%m-%dT%H:%M:%S:%3f", None)
.unwrap()
.unwrap()
);
assert_eq!(
"1970-01-01T08:00:01:000",
d.as_formatted_string(
"%Y-%m-%dT%H:%M:%S:%3f",
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
)
.unwrap()
.unwrap()
);
}
#[test]
fn test_from_max_date() {
let date = Date::new(i32::MAX);
let datetime = DateTime::from(date);
assert_eq!(datetime.val(), 185542587100800000);
}
#[test]
fn test_conversion_between_datetime_and_chrono_datetime() {
let cases = [1, 10, 100, 1000, 100000];
for case in cases {
let dt = DateTime::new(case);
let ndt = dt.to_chrono_datetime().unwrap();
let dt2 = DateTime::from(ndt);
assert_eq!(dt, dt2);
}
}
}

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
pub mod date; pub mod date;
pub mod datetime;
pub mod duration; pub mod duration;
pub mod error; pub mod error;
pub mod interval; pub mod interval;
@@ -25,6 +26,7 @@ pub mod ttl;
pub mod util; pub mod util;
pub use date::Date; pub use date::Date;
pub use datetime::DateTime;
pub use duration::Duration; pub use duration::Duration;
pub use interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; pub use interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
pub use range::RangeMillis; pub use range::RangeMillis;

View File

@@ -171,10 +171,6 @@ pub struct S3Config {
pub secret_access_key: SecretString, pub secret_access_key: SecretString,
pub endpoint: Option<String>, pub endpoint: Option<String>,
pub region: Option<String>, pub region: Option<String>,
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
pub enable_virtual_host_style: bool,
#[serde(flatten)] #[serde(flatten)]
pub cache: ObjectStorageCacheConfig, pub cache: ObjectStorageCacheConfig,
pub http_client: HttpClientConfig, pub http_client: HttpClientConfig,
@@ -189,7 +185,6 @@ impl PartialEq for S3Config {
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret() && self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
&& self.endpoint == other.endpoint && self.endpoint == other.endpoint
&& self.region == other.region && self.region == other.region
&& self.enable_virtual_host_style == other.enable_virtual_host_style
&& self.cache == other.cache && self.cache == other.cache
&& self.http_client == other.http_client && self.http_client == other.http_client
} }
@@ -294,7 +289,6 @@ impl Default for S3Config {
root: String::default(), root: String::default(),
access_key_id: SecretString::from(String::default()), access_key_id: SecretString::from(String::default()),
secret_access_key: SecretString::from(String::default()), secret_access_key: SecretString::from(String::default()),
enable_virtual_host_style: false,
endpoint: Option::default(), endpoint: Option::default(),
region: Option::default(), region: Option::default(),
cache: ObjectStorageCacheConfig::default(), cache: ObjectStorageCacheConfig::default(),

View File

@@ -25,6 +25,6 @@ pub mod heartbeat;
pub mod metrics; pub mod metrics;
pub mod region_server; pub mod region_server;
pub mod service; pub mod service;
mod store; pub mod store;
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
pub mod tests; pub mod tests;

View File

@@ -15,7 +15,7 @@
//! object storage utilities //! object storage utilities
mod azblob; mod azblob;
mod fs; pub mod fs;
mod gcs; mod gcs;
mod oss; mod oss;
mod s3; mod s3;

View File

@@ -24,7 +24,8 @@ use crate::config::FileConfig;
use crate::error::{self, Result}; use crate::error::{self, Result};
use crate::store; use crate::store;
pub(crate) async fn new_fs_object_store( /// A helper function to create a file system object store.
pub async fn new_fs_object_store(
data_home: &str, data_home: &str,
_file_config: &FileConfig, _file_config: &FileConfig,
) -> Result<ObjectStore> { ) -> Result<ObjectStore> {

View File

@@ -41,13 +41,10 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
if s3_config.endpoint.is_some() { if s3_config.endpoint.is_some() {
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap()); builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
} };
if s3_config.region.is_some() { if s3_config.region.is_some() {
builder = builder.region(s3_config.region.as_ref().unwrap()); builder = builder.region(s3_config.region.as_ref().unwrap());
} };
if s3_config.enable_virtual_host_style {
builder = builder.enable_virtual_host_style();
}
Ok(ObjectStore::new(builder) Ok(ObjectStore::new(builder)
.context(error::InitBackendSnafu)? .context(error::InitBackendSnafu)?

View File

@@ -30,13 +30,13 @@ use serde::{Deserialize, Serialize};
use crate::error::{self, Error, Result}; use crate::error::{self, Error, Result};
use crate::type_id::LogicalTypeId; use crate::type_id::LogicalTypeId;
use crate::types::{ use crate::types::{
BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType, BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType, DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType, ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
UInt64Type, UInt8Type, VectorType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
}; };
use crate::value::Value; use crate::value::Value;
use crate::vectors::MutableVector; use crate::vectors::MutableVector;
@@ -68,6 +68,7 @@ pub enum ConcreteDataType {
// Date and time types: // Date and time types:
Date(DateType), Date(DateType),
DateTime(DateTimeType),
Timestamp(TimestampType), Timestamp(TimestampType),
Time(TimeType), Time(TimeType),
@@ -106,6 +107,7 @@ impl fmt::Display for ConcreteDataType {
ConcreteDataType::Binary(v) => write!(f, "{}", v.name()), ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
ConcreteDataType::String(v) => write!(f, "{}", v.name()), ConcreteDataType::String(v) => write!(f, "{}", v.name()),
ConcreteDataType::Date(v) => write!(f, "{}", v.name()), ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
ConcreteDataType::DateTime(v) => write!(f, "{}", v.name()),
ConcreteDataType::Timestamp(t) => match t { ConcreteDataType::Timestamp(t) => match t {
TimestampType::Second(v) => write!(f, "{}", v.name()), TimestampType::Second(v) => write!(f, "{}", v.name()),
TimestampType::Millisecond(v) => write!(f, "{}", v.name()), TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
@@ -161,6 +163,7 @@ impl ConcreteDataType {
self, self,
ConcreteDataType::String(_) ConcreteDataType::String(_)
| ConcreteDataType::Date(_) | ConcreteDataType::Date(_)
| ConcreteDataType::DateTime(_)
| ConcreteDataType::Timestamp(_) | ConcreteDataType::Timestamp(_)
| ConcreteDataType::Time(_) | ConcreteDataType::Time(_)
| ConcreteDataType::Interval(_) | ConcreteDataType::Interval(_)
@@ -180,6 +183,7 @@ impl ConcreteDataType {
| ConcreteDataType::Int32(_) | ConcreteDataType::Int32(_)
| ConcreteDataType::Int64(_) | ConcreteDataType::Int64(_)
| ConcreteDataType::Date(_) | ConcreteDataType::Date(_)
| ConcreteDataType::DateTime(_)
| ConcreteDataType::Timestamp(_) | ConcreteDataType::Timestamp(_)
| ConcreteDataType::Time(_) | ConcreteDataType::Time(_)
| ConcreteDataType::Interval(_) | ConcreteDataType::Interval(_)
@@ -381,7 +385,7 @@ impl ConcreteDataType {
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA", &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
&ConcreteDataType::String(_) => "VARCHAR", &ConcreteDataType::String(_) => "VARCHAR",
&ConcreteDataType::Date(_) => "DATE", &ConcreteDataType::Date(_) => "DATE",
&ConcreteDataType::Timestamp(_) => "TIMESTAMP", &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
&ConcreteDataType::Time(_) => "TIME", &ConcreteDataType::Time(_) => "TIME",
&ConcreteDataType::Interval(_) => "INTERVAL", &ConcreteDataType::Interval(_) => "INTERVAL",
&ConcreteDataType::Decimal128(_) => "NUMERIC", &ConcreteDataType::Decimal128(_) => "NUMERIC",
@@ -398,7 +402,7 @@ impl ConcreteDataType {
&ConcreteDataType::Binary(_) => "_BYTEA", &ConcreteDataType::Binary(_) => "_BYTEA",
&ConcreteDataType::String(_) => "_VARCHAR", &ConcreteDataType::String(_) => "_VARCHAR",
&ConcreteDataType::Date(_) => "_DATE", &ConcreteDataType::Date(_) => "_DATE",
&ConcreteDataType::Timestamp(_) => "_TIMESTAMP", &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
&ConcreteDataType::Time(_) => "_TIME", &ConcreteDataType::Time(_) => "_TIME",
&ConcreteDataType::Interval(_) => "_INTERVAL", &ConcreteDataType::Interval(_) => "_INTERVAL",
&ConcreteDataType::Decimal128(_) => "_NUMERIC", &ConcreteDataType::Decimal128(_) => "_NUMERIC",
@@ -437,6 +441,7 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
ArrowDataType::Float32 => Self::float32_datatype(), ArrowDataType::Float32 => Self::float32_datatype(),
ArrowDataType::Float64 => Self::float64_datatype(), ArrowDataType::Float64 => Self::float64_datatype(),
ArrowDataType::Date32 => Self::date_datatype(), ArrowDataType::Date32 => Self::date_datatype(),
ArrowDataType::Date64 => Self::datetime_datatype(),
ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u), ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u), ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(), ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
@@ -485,7 +490,7 @@ macro_rules! impl_new_concrete_type_functions {
impl_new_concrete_type_functions!( impl_new_concrete_type_functions!(
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
Binary, Date, String, Json Binary, Date, DateTime, String, Json
); );
impl ConcreteDataType { impl ConcreteDataType {
@@ -809,6 +814,7 @@ mod tests {
assert!(ConcreteDataType::string_datatype().is_stringifiable()); assert!(ConcreteDataType::string_datatype().is_stringifiable());
assert!(ConcreteDataType::binary_datatype().is_stringifiable()); assert!(ConcreteDataType::binary_datatype().is_stringifiable());
assert!(ConcreteDataType::date_datatype().is_stringifiable()); assert!(ConcreteDataType::date_datatype().is_stringifiable());
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable()); assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable()); assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable()); assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
@@ -837,6 +843,7 @@ mod tests {
assert!(ConcreteDataType::int32_datatype().is_signed()); assert!(ConcreteDataType::int32_datatype().is_signed());
assert!(ConcreteDataType::int64_datatype().is_signed()); assert!(ConcreteDataType::int64_datatype().is_signed());
assert!(ConcreteDataType::date_datatype().is_signed()); assert!(ConcreteDataType::date_datatype().is_signed());
assert!(ConcreteDataType::datetime_datatype().is_signed());
assert!(ConcreteDataType::timestamp_second_datatype().is_signed()); assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed()); assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed()); assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
@@ -871,6 +878,7 @@ mod tests {
assert!(!ConcreteDataType::int32_datatype().is_unsigned()); assert!(!ConcreteDataType::int32_datatype().is_unsigned());
assert!(!ConcreteDataType::int64_datatype().is_unsigned()); assert!(!ConcreteDataType::int64_datatype().is_unsigned());
assert!(!ConcreteDataType::date_datatype().is_unsigned()); assert!(!ConcreteDataType::date_datatype().is_unsigned());
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned()); assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned()); assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned()); assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());

View File

@@ -15,7 +15,7 @@
use std::any::Any; use std::any::Any;
use common_decimal::Decimal128; use common_decimal::Decimal128;
use common_time::Date; use common_time::{Date, DateTime};
use crate::types::{ use crate::types::{
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
@@ -23,8 +23,8 @@ use crate::types::{
}; };
use crate::value::{ListValue, ListValueRef, Value}; use crate::value::{ListValue, ListValueRef, Value};
use crate::vectors::{ use crate::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, ListVector, MutableVector, BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, ListVector,
PrimitiveVector, StringVector, Vector, MutableVector, PrimitiveVector, StringVector, Vector,
}; };
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize { fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
@@ -302,6 +302,27 @@ impl ScalarRef<'_> for Decimal128 {
} }
} }
impl Scalar for DateTime {
type VectorType = DateTimeVector;
type RefType<'a> = DateTime;
fn as_scalar_ref(&self) -> Self::RefType<'_> {
*self
}
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
long
}
}
impl ScalarRef<'_> for DateTime {
type ScalarType = DateTime;
fn to_owned_scalar(&self) -> Self::ScalarType {
*self
}
}
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`. // Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
impl Scalar for ListValue { impl Scalar for ListValue {
@@ -407,6 +428,13 @@ mod tests {
assert_eq!(decimal, decimal.to_owned_scalar()); assert_eq!(decimal, decimal.to_owned_scalar());
} }
#[test]
fn test_datetime_scalar() {
let dt = DateTime::new(123);
assert_eq!(dt, dt.as_scalar_ref());
assert_eq!(dt, dt.to_owned_scalar());
}
#[test] #[test]
fn test_list_value_scalar() { fn test_list_value_scalar() {
let list_value = let list_value =

View File

@@ -40,6 +40,9 @@ pub enum LogicalTypeId {
/// Date representing the elapsed time since UNIX epoch (1970-01-01) /// Date representing the elapsed time since UNIX epoch (1970-01-01)
/// in days (32 bits). /// in days (32 bits).
Date, Date,
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
DateTime,
TimestampSecond, TimestampSecond,
TimestampMillisecond, TimestampMillisecond,
@@ -97,6 +100,7 @@ impl LogicalTypeId {
LogicalTypeId::String => ConcreteDataType::string_datatype(), LogicalTypeId::String => ConcreteDataType::string_datatype(),
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(), LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
LogicalTypeId::Date => ConcreteDataType::date_datatype(), LogicalTypeId::Date => ConcreteDataType::date_datatype(),
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(), LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
LogicalTypeId::TimestampMillisecond => { LogicalTypeId::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype() ConcreteDataType::timestamp_millisecond_datatype()

View File

@@ -16,6 +16,7 @@ mod binary_type;
mod boolean_type; mod boolean_type;
pub mod cast; pub mod cast;
mod date_type; mod date_type;
mod datetime_type;
mod decimal_type; mod decimal_type;
mod dictionary_type; mod dictionary_type;
mod duration_type; mod duration_type;
@@ -33,6 +34,7 @@ pub use binary_type::BinaryType;
pub use boolean_type::BooleanType; pub use boolean_type::BooleanType;
pub use cast::{cast, cast_with_opt}; pub use cast::{cast, cast_with_opt};
pub use date_type::DateType; pub use date_type::DateType;
pub use datetime_type::DateTimeType;
pub use decimal_type::Decimal128Type; pub use decimal_type::Decimal128Type;
pub use dictionary_type::DictionaryType; pub use dictionary_type::DictionaryType;
pub use duration_type::{ pub use duration_type::{

View File

@@ -119,6 +119,10 @@ pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool {
(Date(_), Int32(_) | Timestamp(_) | String(_)) => true, (Date(_), Int32(_) | Timestamp(_) | String(_)) => true,
(Int32(_) | String(_) | Timestamp(_), Date(_)) => true, (Int32(_) | String(_) | Timestamp(_), Date(_)) => true,
(Date(_), Date(_)) => true, (Date(_), Date(_)) => true,
// DateTime type
(DateTime(_), Int64(_) | Timestamp(_) | String(_)) => true,
(Int64(_) | Timestamp(_) | String(_), DateTime(_)) => true,
(DateTime(_), DateTime(_)) => true,
// Timestamp type // Timestamp type
(Timestamp(_), Int64(_) | String(_)) => true, (Timestamp(_), Int64(_) | String(_)) => true,
(Int64(_) | String(_), Timestamp(_)) => true, (Int64(_) | String(_), Timestamp(_)) => true,
@@ -171,7 +175,7 @@ mod tests {
use common_base::bytes::StringBytes; use common_base::bytes::StringBytes;
use common_time::time::Time; use common_time::time::Time;
use common_time::timezone::set_default_timezone; use common_time::timezone::set_default_timezone;
use common_time::{Date, Timestamp}; use common_time::{Date, DateTime, Timestamp};
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use super::*; use super::*;
@@ -270,6 +274,7 @@ mod tests {
null_datatype, null_datatype,
boolean_datatype, boolean_datatype,
date_datatype, date_datatype,
datetime_datatype,
timestamp_second_datatype, timestamp_second_datatype,
binary_datatype binary_datatype
); );
@@ -282,12 +287,23 @@ mod tests {
timestamp_second_datatype, timestamp_second_datatype,
string_datatype string_datatype
); );
// datetime -> other types
test_can_cast!(
Value::DateTime(DateTime::from_str_system("2021-01-01 00:00:00").unwrap()),
null_datatype,
int64_datatype,
timestamp_second_datatype,
string_datatype
);
// timestamp -> other types // timestamp -> other types
test_can_cast!( test_can_cast!(
Value::Timestamp(Timestamp::from_str_utc("2021-01-01 00:00:00").unwrap()), Value::Timestamp(Timestamp::from_str_utc("2021-01-01 00:00:00").unwrap()),
null_datatype, null_datatype,
int64_datatype, int64_datatype,
date_datatype, date_datatype,
datetime_datatype,
string_datatype string_datatype
); );

View File

@@ -55,6 +55,7 @@ impl DataType for DateType {
Value::Int32(v) => Some(Value::Date(Date::from(v))), Value::Int32(v) => Some(Value::Date(Date::from(v))),
Value::String(v) => Date::from_str_utc(v.as_utf8()).map(Value::Date).ok(), Value::String(v) => Date::from_str_utc(v.as_utf8()).map(Value::Date).ok(),
Value::Timestamp(v) => v.to_chrono_date().map(|date| Value::Date(date.into())), Value::Timestamp(v) => v.to_chrono_date().map(|date| Value::Date(date.into())),
Value::DateTime(v) => Some(Value::DateTime(v)),
_ => None, _ => None,
} }
} }

View File

@@ -0,0 +1,140 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use arrow::datatypes::{DataType as ArrowDataType, Date64Type};
use common_time::DateTime;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Result};
use crate::prelude::{LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector};
use crate::types::LogicalPrimitiveType;
use crate::vectors::{DateTimeVector, DateTimeVectorBuilder, PrimitiveVector};
const MILLISECOND_VARIATION: u64 = 3;
/// Data type for [`DateTime`].
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct DateTimeType;
impl DateTimeType {
pub fn precision(&self) -> u64 {
MILLISECOND_VARIATION
}
}
impl DataType for DateTimeType {
fn name(&self) -> String {
"DateTime".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {
LogicalTypeId::DateTime
}
fn default_value(&self) -> Value {
Value::DateTime(DateTime::default())
}
fn as_arrow_type(&self) -> ArrowDataType {
ArrowDataType::Date64
}
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),
Value::Timestamp(v) => v.to_chrono_datetime().map(|d| Value::DateTime(d.into())),
Value::String(v) => DateTime::from_str_system(v.as_utf8())
.map(Value::DateTime)
.ok(),
_ => None,
}
}
}
impl LogicalPrimitiveType for DateTimeType {
type ArrowPrimitive = Date64Type;
type Native = i64;
type Wrapper = DateTime;
type LargestType = Self;
fn build_data_type() -> ConcreteDataType {
ConcreteDataType::datetime_datatype()
}
fn type_name() -> &'static str {
"DateTime"
}
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>> {
vector
.as_any()
.downcast_ref::<DateTimeVector>()
.with_context(|| error::CastTypeSnafu {
msg: format!(
"Failed to cast {} to DateTimeVector",
vector.vector_type_name()
),
})
}
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>> {
match value {
ValueRef::Null => Ok(None),
ValueRef::DateTime(v) => Ok(Some(v)),
other => error::CastTypeSnafu {
msg: format!("Failed to cast value {other:?} to DateTime"),
}
.fail(),
}
}
}
#[cfg(test)]
mod tests {
use common_time::timezone::set_default_timezone;
use common_time::Timestamp;
use super::*;
#[test]
fn test_datetime_cast() {
// cast from Int64
let val = Value::Int64(1000);
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(dt, Value::DateTime(DateTime::from(1000)));
// cast from String
set_default_timezone(Some("Asia/Shanghai")).unwrap();
let val = Value::String("1970-01-01 00:00:00+0800".into());
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(
dt,
Value::DateTime(DateTime::from_str_system("1970-01-01 00:00:00+0800").unwrap())
);
// cast from Timestamp
let val = Value::Timestamp(Timestamp::from_str_utc("2020-09-08 21:42:29+0800").unwrap());
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(
dt,
Value::DateTime(DateTime::from_str_system("2020-09-08 21:42:29+0800").unwrap())
);
}
}

View File

@@ -16,7 +16,7 @@ use std::cmp::Ordering;
use std::fmt; use std::fmt;
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType}; use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
use common_time::Date; use common_time::{Date, DateTime};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use snafu::OptionExt; use snafu::OptionExt;
@@ -25,7 +25,7 @@ use crate::error::{self, Result};
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder}; use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
use crate::type_id::LogicalTypeId; use crate::type_id::LogicalTypeId;
use crate::types::boolean_type::bool_to_numeric; use crate::types::boolean_type::bool_to_numeric;
use crate::types::DateType; use crate::types::{DateTimeType, DateType};
use crate::value::{Value, ValueRef}; use crate::value::{Value, ValueRef};
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector}; use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
@@ -157,6 +157,19 @@ impl WrapperType for Date {
} }
} }
impl WrapperType for DateTime {
type LogicalType = DateTimeType;
type Native = i64;
fn from_native(value: Self::Native) -> Self {
DateTime::new(value)
}
fn into_native(self) -> Self::Native {
self.val()
}
}
macro_rules! define_logical_primitive_type { macro_rules! define_logical_primitive_type {
($Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident) => { ($Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident) => {
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit // We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
@@ -349,6 +362,7 @@ impl DataType for Int64Type {
Value::Float32(v) => num::cast::cast(v).map(Value::Int64), Value::Float32(v) => num::cast::cast(v).map(Value::Int64),
Value::Float64(v) => num::cast::cast(v).map(Value::Int64), Value::Float64(v) => num::cast::cast(v).map(Value::Int64),
Value::String(v) => v.as_utf8().parse::<i64>().map(Value::Int64).ok(), Value::String(v) => v.as_utf8().parse::<i64>().map(Value::Int64).ok(),
Value::DateTime(v) => Some(Value::Int64(v.val())),
Value::Timestamp(v) => Some(Value::Int64(v.value())), Value::Timestamp(v) => Some(Value::Int64(v.value())),
Value::Time(v) => Some(Value::Int64(v.value())), Value::Time(v) => Some(Value::Int64(v.value())),
// We don't allow casting interval type to int. // We don't allow casting interval type to int.

View File

@@ -75,6 +75,7 @@ impl DataType for StringType {
Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))), Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::String(v) => Some(Value::String(v)), Value::String(v) => Some(Value::String(v)),
Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))), Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::DateTime(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))), Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))), Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
Value::IntervalYearMonth(v) => { Value::IntervalYearMonth(v) => {

View File

@@ -132,6 +132,7 @@ macro_rules! impl_data_type_for_timestamp {
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp), Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),
Value::String(v) => Timestamp::from_str_utc(v.as_utf8()).map(Value::Timestamp).ok(), Value::String(v) => Timestamp::from_str_utc(v.as_utf8()).map(Value::Timestamp).ok(),
Value::Int64(v) => Some(Value::Timestamp(Timestamp::new(v, TimeUnit::$unit))), Value::Int64(v) => Some(Value::Timestamp(Timestamp::new(v, TimeUnit::$unit))),
Value::DateTime(v) => Timestamp::new_second(v.val()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
Value::Date(v) => Timestamp::new_second(v.to_secs()).convert_to(TimeUnit::$unit).map(Value::Timestamp), Value::Date(v) => Timestamp::new_second(v.to_secs()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
_ => None _ => None
} }
@@ -201,7 +202,7 @@ impl_data_type_for_timestamp!(Microsecond);
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use common_time::timezone::set_default_timezone; use common_time::timezone::set_default_timezone;
use common_time::Date; use common_time::{Date, DateTime};
use super::*; use super::*;
@@ -248,6 +249,13 @@ mod tests {
.unwrap(); .unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1694589525))); assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1694589525)));
// Datetime -> TimestampSecond
let dt = Value::DateTime(DateTime::from(1234567));
let ts = ConcreteDataType::timestamp_second_datatype()
.try_cast(dt)
.unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1234567)));
// Date -> TimestampMillisecond // Date -> TimestampMillisecond
let d = Value::Date(Date::from_str_utc("1970-01-01").unwrap()); let d = Value::Date(Date::from_str_utc("1970-01-01").unwrap());
let ts = ConcreteDataType::timestamp_millisecond_datatype() let ts = ConcreteDataType::timestamp_millisecond_datatype()

View File

@@ -24,6 +24,7 @@ use common_base::bytes::{Bytes, StringBytes};
use common_decimal::Decimal128; use common_decimal::Decimal128;
use common_telemetry::error; use common_telemetry::error;
use common_time::date::Date; use common_time::date::Date;
use common_time::datetime::DateTime;
use common_time::interval::IntervalUnit; use common_time::interval::IntervalUnit;
use common_time::time::Time; use common_time::time::Time;
use common_time::timestamp::{TimeUnit, Timestamp}; use common_time::timestamp::{TimeUnit, Timestamp};
@@ -74,6 +75,7 @@ pub enum Value {
// Date & Time types: // Date & Time types:
Date(Date), Date(Date),
DateTime(DateTime),
Timestamp(Timestamp), Timestamp(Timestamp),
Time(Time), Time(Time),
Duration(Duration), Duration(Duration),
@@ -110,6 +112,7 @@ impl Display for Value {
write!(f, "{hex}") write!(f, "{hex}")
} }
Value::Date(v) => write!(f, "{v}"), Value::Date(v) => write!(f, "{v}"),
Value::DateTime(v) => write!(f, "{v}"),
Value::Timestamp(v) => write!(f, "{}", v.to_iso8601_string()), Value::Timestamp(v) => write!(f, "{}", v.to_iso8601_string()),
Value::Time(t) => write!(f, "{}", t.to_iso8601_string()), Value::Time(t) => write!(f, "{}", t.to_iso8601_string()),
Value::IntervalYearMonth(v) => { Value::IntervalYearMonth(v) => {
@@ -159,6 +162,7 @@ macro_rules! define_data_type_func {
$struct::String(_) => ConcreteDataType::string_datatype(), $struct::String(_) => ConcreteDataType::string_datatype(),
$struct::Binary(_) => ConcreteDataType::binary_datatype(), $struct::Binary(_) => ConcreteDataType::binary_datatype(),
$struct::Date(_) => ConcreteDataType::date_datatype(), $struct::Date(_) => ConcreteDataType::date_datatype(),
$struct::DateTime(_) => ConcreteDataType::datetime_datatype(),
$struct::Time(t) => ConcreteDataType::time_datatype(*t.unit()), $struct::Time(t) => ConcreteDataType::time_datatype(*t.unit()),
$struct::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()), $struct::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()),
$struct::IntervalYearMonth(_) => { $struct::IntervalYearMonth(_) => {
@@ -218,6 +222,7 @@ impl Value {
Value::String(v) => ValueRef::String(v.as_utf8()), Value::String(v) => ValueRef::String(v.as_utf8()),
Value::Binary(v) => ValueRef::Binary(v), Value::Binary(v) => ValueRef::Binary(v),
Value::Date(v) => ValueRef::Date(*v), Value::Date(v) => ValueRef::Date(*v),
Value::DateTime(v) => ValueRef::DateTime(*v),
Value::List(v) => ValueRef::List(ListValueRef::Ref { val: v }), Value::List(v) => ValueRef::List(ListValueRef::Ref { val: v }),
Value::Timestamp(v) => ValueRef::Timestamp(*v), Value::Timestamp(v) => ValueRef::Timestamp(*v),
Value::Time(v) => ValueRef::Time(*v), Value::Time(v) => ValueRef::Time(*v),
@@ -253,6 +258,14 @@ impl Value {
} }
} }
/// Cast Value to DateTime. Return None if value is not a valid datetime data type.
pub fn as_datetime(&self) -> Option<DateTime> {
match self {
Value::DateTime(t) => Some(*t),
_ => None,
}
}
/// Cast Value to [Time]. Return None if value is not a valid time data type. /// Cast Value to [Time]. Return None if value is not a valid time data type.
pub fn as_time(&self) -> Option<Time> { pub fn as_time(&self) -> Option<Time> {
match self { match self {
@@ -332,6 +345,7 @@ impl Value {
Value::Binary(_) => LogicalTypeId::Binary, Value::Binary(_) => LogicalTypeId::Binary,
Value::List(_) => LogicalTypeId::List, Value::List(_) => LogicalTypeId::List,
Value::Date(_) => LogicalTypeId::Date, Value::Date(_) => LogicalTypeId::Date,
Value::DateTime(_) => LogicalTypeId::DateTime,
Value::Timestamp(t) => match t.unit() { Value::Timestamp(t) => match t.unit() {
TimeUnit::Second => LogicalTypeId::TimestampSecond, TimeUnit::Second => LogicalTypeId::TimestampSecond,
TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond, TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond,
@@ -387,6 +401,7 @@ impl Value {
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())), Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())), Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())),
Value::Date(v) => ScalarValue::Date32(Some(v.val())), Value::Date(v) => ScalarValue::Date32(Some(v.val())),
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
Value::Null => to_null_scalar_value(output_type)?, Value::Null => to_null_scalar_value(output_type)?,
Value::List(list) => { Value::List(list) => {
// Safety: The logical type of the value and output_type are the same. // Safety: The logical type of the value and output_type are the same.
@@ -448,6 +463,7 @@ impl Value {
Value::Float64(x) => Some(Value::Float64(-*x)), Value::Float64(x) => Some(Value::Float64(-*x)),
Value::Decimal128(x) => Some(Value::Decimal128(x.negative())), Value::Decimal128(x) => Some(Value::Decimal128(x.negative())),
Value::Date(x) => Some(Value::Date(x.negative())), Value::Date(x) => Some(Value::Date(x.negative())),
Value::DateTime(x) => Some(Value::DateTime(x.negative())),
Value::Timestamp(x) => Some(Value::Timestamp(x.negative())), Value::Timestamp(x) => Some(Value::Timestamp(x.negative())),
Value::Time(x) => Some(Value::Time(x.negative())), Value::Time(x) => Some(Value::Time(x.negative())),
Value::Duration(x) => Some(Value::Duration(x.negative())), Value::Duration(x) => Some(Value::Duration(x.negative())),
@@ -509,6 +525,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
} }
ConcreteDataType::String(_) => ScalarValue::Utf8(None), ConcreteDataType::String(_) => ScalarValue::Utf8(None),
ConcreteDataType::Date(_) => ScalarValue::Date32(None), ConcreteDataType::Date(_) => ScalarValue::Date32(None),
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None), ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None),
ConcreteDataType::Interval(v) => match v { ConcreteDataType::Interval(v) => match v {
IntervalType::YearMonth(_) => ScalarValue::IntervalYearMonth(None), IntervalType::YearMonth(_) => ScalarValue::IntervalYearMonth(None),
@@ -614,6 +631,7 @@ macro_rules! impl_ord_for_value_like {
($Type::String(v1), $Type::String(v2)) => v1.cmp(v2), ($Type::String(v1), $Type::String(v2)) => v1.cmp(v2),
($Type::Binary(v1), $Type::Binary(v2)) => v1.cmp(v2), ($Type::Binary(v1), $Type::Binary(v2)) => v1.cmp(v2),
($Type::Date(v1), $Type::Date(v2)) => v1.cmp(v2), ($Type::Date(v1), $Type::Date(v2)) => v1.cmp(v2),
($Type::DateTime(v1), $Type::DateTime(v2)) => v1.cmp(v2),
($Type::Timestamp(v1), $Type::Timestamp(v2)) => v1.cmp(v2), ($Type::Timestamp(v1), $Type::Timestamp(v2)) => v1.cmp(v2),
($Type::Time(v1), $Type::Time(v2)) => v1.cmp(v2), ($Type::Time(v1), $Type::Time(v2)) => v1.cmp(v2),
($Type::IntervalYearMonth(v1), $Type::IntervalYearMonth(v2)) => v1.cmp(v2), ($Type::IntervalYearMonth(v1), $Type::IntervalYearMonth(v2)) => v1.cmp(v2),
@@ -694,6 +712,7 @@ impl_try_from_value!(String, StringBytes);
impl_try_from_value!(Binary, Bytes); impl_try_from_value!(Binary, Bytes);
impl_try_from_value!(Date, Date); impl_try_from_value!(Date, Date);
impl_try_from_value!(Time, Time); impl_try_from_value!(Time, Time);
impl_try_from_value!(DateTime, DateTime);
impl_try_from_value!(Timestamp, Timestamp); impl_try_from_value!(Timestamp, Timestamp);
impl_try_from_value!(IntervalYearMonth, IntervalYearMonth); impl_try_from_value!(IntervalYearMonth, IntervalYearMonth);
impl_try_from_value!(IntervalDayTime, IntervalDayTime); impl_try_from_value!(IntervalDayTime, IntervalDayTime);
@@ -737,6 +756,7 @@ impl_value_from!(String, StringBytes);
impl_value_from!(Binary, Bytes); impl_value_from!(Binary, Bytes);
impl_value_from!(Date, Date); impl_value_from!(Date, Date);
impl_value_from!(Time, Time); impl_value_from!(Time, Time);
impl_value_from!(DateTime, DateTime);
impl_value_from!(Timestamp, Timestamp); impl_value_from!(Timestamp, Timestamp);
impl_value_from!(IntervalYearMonth, IntervalYearMonth); impl_value_from!(IntervalYearMonth, IntervalYearMonth);
impl_value_from!(IntervalDayTime, IntervalDayTime); impl_value_from!(IntervalDayTime, IntervalDayTime);
@@ -783,6 +803,7 @@ impl TryFrom<Value> for serde_json::Value {
Value::String(bytes) => serde_json::Value::String(bytes.into_string()), Value::String(bytes) => serde_json::Value::String(bytes.into_string()),
Value::Binary(bytes) => serde_json::to_value(bytes)?, Value::Binary(bytes) => serde_json::to_value(bytes)?,
Value::Date(v) => serde_json::Value::Number(v.val().into()), Value::Date(v) => serde_json::Value::Number(v.val().into()),
Value::DateTime(v) => serde_json::Value::Number(v.val().into()),
Value::List(v) => serde_json::to_value(v)?, Value::List(v) => serde_json::to_value(v)?,
Value::Timestamp(v) => serde_json::to_value(v.value())?, Value::Timestamp(v) => serde_json::to_value(v.value())?,
Value::Time(v) => serde_json::to_value(v.value())?, Value::Time(v) => serde_json::to_value(v.value())?,
@@ -912,6 +933,9 @@ impl TryFrom<ScalarValue> for Value {
Value::List(ListValue::new(items, datatype)) Value::List(ListValue::new(items, datatype))
} }
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null), ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
ScalarValue::Date64(d) => d
.map(|x| Value::DateTime(DateTime::new(x)))
.unwrap_or(Value::Null),
ScalarValue::TimestampSecond(t, _) => t ScalarValue::TimestampSecond(t, _) => t
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Second))) .map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Second)))
.unwrap_or(Value::Null), .unwrap_or(Value::Null),
@@ -970,8 +994,7 @@ impl TryFrom<ScalarValue> for Value {
| ScalarValue::Float16(_) | ScalarValue::Float16(_)
| ScalarValue::Utf8View(_) | ScalarValue::Utf8View(_)
| ScalarValue::BinaryView(_) | ScalarValue::BinaryView(_)
| ScalarValue::Map(_) | ScalarValue::Map(_) => {
| ScalarValue::Date64(_) => {
return error::UnsupportedArrowTypeSnafu { return error::UnsupportedArrowTypeSnafu {
arrow_type: v.data_type(), arrow_type: v.data_type(),
} }
@@ -1000,6 +1023,7 @@ impl From<ValueRef<'_>> for Value {
ValueRef::String(v) => Value::String(v.into()), ValueRef::String(v) => Value::String(v.into()),
ValueRef::Binary(v) => Value::Binary(v.into()), ValueRef::Binary(v) => Value::Binary(v.into()),
ValueRef::Date(v) => Value::Date(v), ValueRef::Date(v) => Value::Date(v),
ValueRef::DateTime(v) => Value::DateTime(v),
ValueRef::Timestamp(v) => Value::Timestamp(v), ValueRef::Timestamp(v) => Value::Timestamp(v),
ValueRef::Time(v) => Value::Time(v), ValueRef::Time(v) => Value::Time(v),
ValueRef::IntervalYearMonth(v) => Value::IntervalYearMonth(v), ValueRef::IntervalYearMonth(v) => Value::IntervalYearMonth(v),
@@ -1039,6 +1063,7 @@ pub enum ValueRef<'a> {
// Date & Time types: // Date & Time types:
Date(Date), Date(Date),
DateTime(DateTime),
Timestamp(Timestamp), Timestamp(Timestamp),
Time(Time), Time(Time),
Duration(Duration), Duration(Duration),
@@ -1150,6 +1175,11 @@ impl<'a> ValueRef<'a> {
impl_as_for_value_ref!(self, Date) impl_as_for_value_ref!(self, Date)
} }
/// Cast itself to [DateTime].
pub fn as_datetime(&self) -> Result<Option<DateTime>> {
impl_as_for_value_ref!(self, DateTime)
}
/// Cast itself to [Timestamp]. /// Cast itself to [Timestamp].
pub fn as_timestamp(&self) -> Result<Option<Timestamp>> { pub fn as_timestamp(&self) -> Result<Option<Timestamp>> {
impl_as_for_value_ref!(self, Timestamp) impl_as_for_value_ref!(self, Timestamp)
@@ -1233,6 +1263,7 @@ impl_value_ref_from!(Int64, i64);
impl_value_ref_from!(Float32, f32); impl_value_ref_from!(Float32, f32);
impl_value_ref_from!(Float64, f64); impl_value_ref_from!(Float64, f64);
impl_value_ref_from!(Date, Date); impl_value_ref_from!(Date, Date);
impl_value_ref_from!(DateTime, DateTime);
impl_value_ref_from!(Timestamp, Timestamp); impl_value_ref_from!(Timestamp, Timestamp);
impl_value_ref_from!(Time, Time); impl_value_ref_from!(Time, Time);
impl_value_ref_from!(IntervalYearMonth, IntervalYearMonth); impl_value_ref_from!(IntervalYearMonth, IntervalYearMonth);
@@ -1296,6 +1327,7 @@ pub fn transform_value_ref_to_json_value<'a>(
} }
} }
ValueRef::Date(v) => serde_json::Value::Number(v.val().into()), ValueRef::Date(v) => serde_json::Value::Number(v.val().into()),
ValueRef::DateTime(v) => serde_json::Value::Number(v.val().into()),
ValueRef::List(v) => serde_json::to_value(v)?, ValueRef::List(v) => serde_json::to_value(v)?,
ValueRef::Timestamp(v) => serde_json::to_value(v.value())?, ValueRef::Timestamp(v) => serde_json::to_value(v.value())?,
ValueRef::Time(v) => serde_json::to_value(v.value())?, ValueRef::Time(v) => serde_json::to_value(v.value())?,
@@ -1394,6 +1426,7 @@ impl ValueRef<'_> {
ValueRef::String(v) => std::mem::size_of_val(v), ValueRef::String(v) => std::mem::size_of_val(v),
ValueRef::Binary(v) => std::mem::size_of_val(v), ValueRef::Binary(v) => std::mem::size_of_val(v),
ValueRef::Date(_) => 4, ValueRef::Date(_) => 4,
ValueRef::DateTime(_) => 8,
ValueRef::Timestamp(_) => 16, ValueRef::Timestamp(_) => 16,
ValueRef::Time(_) => 16, ValueRef::Time(_) => 16,
ValueRef::Duration(_) => 16, ValueRef::Duration(_) => 16,
@@ -1429,9 +1462,7 @@ pub fn column_data_to_json(data: ValueData) -> JsonValue {
.unwrap_or(JsonValue::Null), .unwrap_or(JsonValue::Null),
ValueData::StringValue(s) => JsonValue::String(s), ValueData::StringValue(s) => JsonValue::String(s),
ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()), ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
ValueData::DatetimeValue(d) => { ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
}
ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()), ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
ValueData::TimeMillisecondValue(d) => { ValueData::TimeMillisecondValue(d) => {
JsonValue::String(Time::new_millisecond(d).to_iso8601_string()) JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
@@ -1480,7 +1511,6 @@ mod tests {
#[test] #[test]
fn test_column_data_to_json() { fn test_column_data_to_json() {
set_default_timezone(Some("Asia/Shanghai")).unwrap();
assert_eq!( assert_eq!(
column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())), column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
JsonValue::String("aGVsbG8=".to_string()) JsonValue::String("aGVsbG8=".to_string())
@@ -1539,31 +1569,31 @@ mod tests {
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::DatetimeValue(456)), column_data_to_json(ValueData::DatetimeValue(456)),
JsonValue::String("1970-01-01 08:00:00.000456+0800".to_string()) JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimeSecondValue(789)), column_data_to_json(ValueData::TimeSecondValue(789)),
JsonValue::String("08:13:09+0800".to_string()) JsonValue::String("00:13:09+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimeMillisecondValue(789)), column_data_to_json(ValueData::TimeMillisecondValue(789)),
JsonValue::String("08:00:00.789+0800".to_string()) JsonValue::String("00:00:00.789+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimeMicrosecondValue(789)), column_data_to_json(ValueData::TimeMicrosecondValue(789)),
JsonValue::String("08:00:00.000789+0800".to_string()) JsonValue::String("00:00:00.000789+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)), column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
JsonValue::String("1970-01-15 14:56:07.890+0800".to_string()) JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)), column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
JsonValue::String("2009-02-14 07:31:30.123456789+0800".to_string()) JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::TimestampSecondValue(1234567890)), column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
JsonValue::String("2009-02-14 07:31:30+0800".to_string()) JsonValue::String("2009-02-13 23:31:30+0000".to_string())
); );
assert_eq!( assert_eq!(
column_data_to_json(ValueData::IntervalYearMonthValue(12)), column_data_to_json(ValueData::IntervalYearMonthValue(12)),
@@ -1728,6 +1758,12 @@ mod tests {
); );
assert_eq!(Value::Null, ScalarValue::Date32(None).try_into().unwrap()); assert_eq!(Value::Null, ScalarValue::Date32(None).try_into().unwrap());
assert_eq!(
Value::DateTime(DateTime::new(456)),
ScalarValue::Date64(Some(456)).try_into().unwrap()
);
assert_eq!(Value::Null, ScalarValue::Date64(None).try_into().unwrap());
assert_eq!( assert_eq!(
Value::Timestamp(Timestamp::new(1, TimeUnit::Second)), Value::Timestamp(Timestamp::new(1, TimeUnit::Second)),
ScalarValue::TimestampSecond(Some(1), None) ScalarValue::TimestampSecond(Some(1), None)
@@ -1991,6 +2027,10 @@ mod tests {
&ConcreteDataType::date_datatype(), &ConcreteDataType::date_datatype(),
&Value::Date(Date::new(1)), &Value::Date(Date::new(1)),
); );
check_type_and_value(
&ConcreteDataType::datetime_datatype(),
&Value::DateTime(DateTime::new(1)),
);
check_type_and_value( check_type_and_value(
&ConcreteDataType::timestamp_millisecond_datatype(), &ConcreteDataType::timestamp_millisecond_datatype(),
&Value::Timestamp(Timestamp::new_millisecond(1)), &Value::Timestamp(Timestamp::new_millisecond(1)),
@@ -2129,6 +2169,11 @@ mod tests {
serde_json::Value::Number(5000i32.into()), serde_json::Value::Number(5000i32.into()),
to_json(Value::Date(Date::new(5000))) to_json(Value::Date(Date::new(5000)))
); );
assert_eq!(
serde_json::Value::Number(5000i64.into()),
to_json(Value::DateTime(DateTime::new(5000)))
);
assert_eq!( assert_eq!(
serde_json::Value::Number(1.into()), serde_json::Value::Number(1.into()),
to_json(Value::Timestamp(Timestamp::new_millisecond(1))) to_json(Value::Timestamp(Timestamp::new_millisecond(1)))
@@ -2214,6 +2259,7 @@ mod tests {
); );
check_as_value_ref!(Date, Date::new(103)); check_as_value_ref!(Date, Date::new(103));
check_as_value_ref!(DateTime, DateTime::new(1034));
let list = ListValue { let list = ListValue {
items: vec![], items: vec![],
@@ -2245,6 +2291,7 @@ mod tests {
check_as_null!(as_string); check_as_null!(as_string);
check_as_null!(as_boolean); check_as_null!(as_boolean);
check_as_null!(as_date); check_as_null!(as_date);
check_as_null!(as_datetime);
check_as_null!(as_list); check_as_null!(as_list);
macro_rules! check_as_correct { macro_rules! check_as_correct {
@@ -2257,6 +2304,7 @@ mod tests {
check_as_correct!("hello".as_bytes(), Binary, as_binary); check_as_correct!("hello".as_bytes(), Binary, as_binary);
check_as_correct!(true, Boolean, as_boolean); check_as_correct!(true, Boolean, as_boolean);
check_as_correct!(Date::new(123), Date, as_date); check_as_correct!(Date::new(123), Date, as_date);
check_as_correct!(DateTime::new(12), DateTime, as_datetime);
check_as_correct!(Time::new_second(12), Time, as_time); check_as_correct!(Time::new_second(12), Time, as_time);
check_as_correct!(Duration::new_second(12), Duration, as_duration); check_as_correct!(Duration::new_second(12), Duration, as_duration);
let list = ListValue { let list = ListValue {
@@ -2270,6 +2318,7 @@ mod tests {
assert!(wrong_value.as_string().is_err()); assert!(wrong_value.as_string().is_err());
assert!(wrong_value.as_boolean().is_err()); assert!(wrong_value.as_boolean().is_err());
assert!(wrong_value.as_date().is_err()); assert!(wrong_value.as_date().is_err());
assert!(wrong_value.as_datetime().is_err());
assert!(wrong_value.as_list().is_err()); assert!(wrong_value.as_list().is_err());
assert!(wrong_value.as_time().is_err()); assert!(wrong_value.as_time().is_err());
assert!(wrong_value.as_timestamp().is_err()); assert!(wrong_value.as_timestamp().is_err());
@@ -2297,6 +2346,10 @@ mod tests {
"010203" "010203"
); );
assert_eq!(Value::Date(Date::new(0)).to_string(), "1970-01-01"); assert_eq!(Value::Date(Date::new(0)).to_string(), "1970-01-01");
assert_eq!(
Value::DateTime(DateTime::new(0)).to_string(),
"1970-01-01 08:00:00+0800"
);
assert_eq!( assert_eq!(
Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)).to_string(), Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)).to_string(),
"1970-01-01 08:00:01+0800" "1970-01-01 08:00:01+0800"
@@ -2702,6 +2755,7 @@ mod tests {
check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10); check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10);
check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10); check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10);
check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4); check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4);
check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8);
check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16); check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16);
check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16); check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16);
check_value_ref_size_eq(&ValueRef::IntervalYearMonth(IntervalYearMonth::new(1)), 4); check_value_ref_size_eq(&ValueRef::IntervalYearMonth(IntervalYearMonth::new(1)), 4);

View File

@@ -29,6 +29,7 @@ mod binary;
mod boolean; mod boolean;
mod constant; mod constant;
mod date; mod date;
mod datetime;
mod decimal; mod decimal;
mod duration; mod duration;
mod eq; mod eq;
@@ -47,6 +48,7 @@ pub use binary::{BinaryVector, BinaryVectorBuilder};
pub use boolean::{BooleanVector, BooleanVectorBuilder}; pub use boolean::{BooleanVector, BooleanVectorBuilder};
pub use constant::ConstantVector; pub use constant::ConstantVector;
pub use date::{DateVector, DateVectorBuilder}; pub use date::{DateVector, DateVectorBuilder};
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
pub use decimal::{Decimal128Vector, Decimal128VectorBuilder}; pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
pub use duration::{ pub use duration::{
DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector, DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
@@ -375,7 +377,7 @@ pub mod tests {
// Test Primitive types // Test Primitive types
mutable_primitive_data_type_eq_with_lower!( mutable_primitive_data_type_eq_with_lower!(
Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64, Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
Date, Binary, String Date, DateTime, Binary, String
); );
// Test types about time // Test types about time

View File

@@ -0,0 +1,116 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::types::DateTimeType;
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
/// Vector of [`DateTime`](common_time::Date)
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
/// Builder for [`DateTimeVector`].
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow::array::{Array, PrimitiveArray};
use arrow_array::ArrayRef;
use common_time::timezone::set_default_timezone;
use common_time::DateTime;
use super::*;
use crate::data_type::DataType;
use crate::prelude::{
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
};
use crate::serialize::Serializable;
#[test]
fn test_datetime_vector() {
set_default_timezone(Some("Asia/Shanghai")).unwrap();
let v = DateTimeVector::new(PrimitiveArray::from(vec![1000, 2000, 3000]));
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
assert_eq!(3, v.len());
assert_eq!("DateTimeVector", v.vector_type_name());
assert_eq!(
&arrow::datatypes::DataType::Date64,
v.to_arrow_array().data_type()
);
assert_eq!(Some(DateTime::new(1000)), v.get_data(0));
assert_eq!(Value::DateTime(DateTime::new(1000)), v.get(0));
assert_eq!(ValueRef::DateTime(DateTime::new(1000)), v.get_ref(0));
let mut iter = v.iter_data();
assert_eq!(Some(DateTime::new(1000)), iter.next().unwrap());
assert_eq!(Some(DateTime::new(2000)), iter.next().unwrap());
assert_eq!(Some(DateTime::new(3000)), iter.next().unwrap());
assert!(!v.is_null(0));
assert_eq!(24, v.memory_size());
if let Value::DateTime(d) = v.get(0) {
assert_eq!(1000, d.val());
} else {
unreachable!()
}
assert_eq!(
"[\"1970-01-01 08:00:01+0800\",\"1970-01-01 08:00:02+0800\",\"1970-01-01 08:00:03+0800\"]",
serde_json::to_string(&v.serialize_to_json().unwrap()).unwrap()
);
}
#[test]
fn test_datetime_vector_builder() {
let mut builder = DateTimeVectorBuilder::with_capacity(3);
builder.push(Some(DateTime::new(1)));
builder.push(None);
builder.push(Some(DateTime::new(-1)));
let v = builder.finish();
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
assert_eq!(Value::Null, v.get(1));
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
let input = DateTimeVector::from_wrapper_slice([
DateTime::new(1),
DateTime::new(2),
DateTime::new(3),
]);
let mut builder = DateTimeType.create_mutable_vector(3);
builder.push_value_ref(ValueRef::DateTime(DateTime::new(5)));
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
builder.extend_slice_of(&input, 1, 2).unwrap();
assert!(builder
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
.is_err());
let vector = builder.to_vector();
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice([
DateTime::new(5),
DateTime::new(2),
DateTime::new(3),
]));
assert_eq!(expect, vector);
}
#[test]
fn test_datetime_from_arrow() {
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
let arrow: ArrayRef = Arc::new(vector.as_arrow().slice(0, vector.len())) as _;
let vector2 = DateTimeVector::try_from_arrow_array(arrow).unwrap();
assert_eq!(vector, vector2);
}
}

View File

@@ -20,12 +20,12 @@ use crate::data_type::DataType;
use crate::types::{DurationType, TimeType, TimestampType}; use crate::types::{DurationType, TimeType, TimestampType};
use crate::vectors::constant::ConstantVector; use crate::vectors::constant::ConstantVector;
use crate::vectors::{ use crate::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, DurationMicrosecondVector, BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector,
DurationMillisecondVector, DurationNanosecondVector, DurationSecondVector, DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, ListVector, DurationSecondVector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
PrimitiveVector, StringVector, TimeMicrosecondVector, TimeMillisecondVector, IntervalYearMonthVector, ListVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, TimestampMillisecondVector, TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector, TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
}; };
use crate::with_match_primitive_type_id; use crate::with_match_primitive_type_id;
@@ -83,6 +83,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
Binary(_) | Json(_) | Vector(_) => is_vector_eq!(BinaryVector, lhs, rhs), Binary(_) | Json(_) | Vector(_) => is_vector_eq!(BinaryVector, lhs, rhs),
String(_) => is_vector_eq!(StringVector, lhs, rhs), String(_) => is_vector_eq!(StringVector, lhs, rhs),
Date(_) => is_vector_eq!(DateVector, lhs, rhs), Date(_) => is_vector_eq!(DateVector, lhs, rhs),
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
Timestamp(t) => match t { Timestamp(t) => match t {
TimestampType::Second(_) => { TimestampType::Second(_) => {
is_vector_eq!(TimestampSecondVector, lhs, rhs) is_vector_eq!(TimestampSecondVector, lhs, rhs)
@@ -194,6 +195,7 @@ mod tests {
))); )));
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false]))); assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)]))); assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120]))); assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([ assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
100, 120, 100, 120,

View File

@@ -31,7 +31,7 @@ use crate::prelude::DataType;
use crate::scalars::{Scalar, ScalarVectorBuilder}; use crate::scalars::{Scalar, ScalarVectorBuilder};
use crate::value::{ListValue, ListValueRef, Value}; use crate::value::{ListValue, ListValueRef, Value};
use crate::vectors::{ use crate::vectors::{
BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector, BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Decimal128Vector,
DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector, DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
@@ -179,6 +179,9 @@ impl Helper {
ScalarValue::Date32(v) => { ScalarValue::Date32(v) => {
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length) ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
} }
ScalarValue::Date64(v) => {
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
}
ScalarValue::TimestampSecond(v, _) => { ScalarValue::TimestampSecond(v, _) => {
// Timezone is unimplemented now. // Timezone is unimplemented now.
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length) ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
@@ -241,8 +244,7 @@ impl Helper {
| ScalarValue::Float16(_) | ScalarValue::Float16(_)
| ScalarValue::Utf8View(_) | ScalarValue::Utf8View(_)
| ScalarValue::BinaryView(_) | ScalarValue::BinaryView(_)
| ScalarValue::Map(_) | ScalarValue::Map(_) => {
| ScalarValue::Date64(_) => {
return error::ConversionSnafu { return error::ConversionSnafu {
from: format!("Unsupported scalar value: {value}"), from: format!("Unsupported scalar value: {value}"),
} }
@@ -284,6 +286,7 @@ impl Helper {
Arc::new(StringVector::try_from_arrow_array(array)?) Arc::new(StringVector::try_from_arrow_array(array)?)
} }
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?), ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?), ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
ArrowDataType::Timestamp(unit, _) => match unit { ArrowDataType::Timestamp(unit, _) => match unit {
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?), TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
@@ -359,8 +362,7 @@ impl Helper {
| ArrowDataType::BinaryView | ArrowDataType::BinaryView
| ArrowDataType::Utf8View | ArrowDataType::Utf8View
| ArrowDataType::ListView(_) | ArrowDataType::ListView(_)
| ArrowDataType::LargeListView(_) | ArrowDataType::LargeListView(_) => {
| ArrowDataType::Date64 => {
return error::UnsupportedArrowTypeSnafu { return error::UnsupportedArrowTypeSnafu {
arrow_type: array.as_ref().data_type().clone(), arrow_type: array.as_ref().data_type().clone(),
} }
@@ -409,9 +411,9 @@ impl Helper {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use arrow::array::{ use arrow::array::{
ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array, ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray, Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
}; };
@@ -422,7 +424,7 @@ mod tests {
use common_decimal::Decimal128; use common_decimal::Decimal128;
use common_time::time::Time; use common_time::time::Time;
use common_time::timestamp::TimeUnit; use common_time::timestamp::TimeUnit;
use common_time::{Date, Duration}; use common_time::{Date, DateTime, Duration};
use super::*; use super::*;
use crate::value::Value; use crate::value::Value;
@@ -464,6 +466,16 @@ mod tests {
} }
} }
#[test]
fn test_try_from_scalar_datetime_value() {
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
assert_eq!(3, vector.len());
for i in 0..vector.len() {
assert_eq!(Value::DateTime(DateTime::new(42)), vector.get(i));
}
}
#[test] #[test]
fn test_try_from_scalar_duration_value() { fn test_try_from_scalar_duration_value() {
let vector = let vector =
@@ -594,6 +606,7 @@ mod tests {
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0])); check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
check_try_into_vector(StringArray::from(vec!["hello", "world"])); check_try_into_vector(StringArray::from(vec!["hello", "world"]));
check_try_into_vector(Date32Array::from(vec![1, 2, 3])); check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
let data = vec![None, Some(vec![Some(6), Some(7)])]; let data = vec![None, Some(vec![Some(6), Some(7)])];
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data); let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
check_try_into_vector(list_array); check_try_into_vector(list_array);
@@ -721,6 +734,7 @@ mod tests {
check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0])); check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
check_into_and_from(StringArray::from(vec!["hello", "world"])); check_into_and_from(StringArray::from(vec!["hello", "world"]));
check_into_and_from(Date32Array::from(vec![1, 2, 3])); check_into_and_from(Date32Array::from(vec![1, 2, 3]));
check_into_and_from(Date64Array::from(vec![1, 2, 3]));
check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3])); check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3])); check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));

View File

@@ -32,7 +32,7 @@ pub(crate) use filter_non_constant;
mod tests { mod tests {
use std::sync::Arc; use std::sync::Arc;
use common_time::Date; use common_time::{Date, DateTime};
use crate::scalars::ScalarVector; use crate::scalars::ScalarVector;
use crate::timestamp::{ use crate::timestamp::{
@@ -127,6 +127,8 @@ mod tests {
#[test] #[test]
fn test_filter_date_like() { fn test_filter_date_like() {
impl_filter_date_like_test!(DateVector, Date, new); impl_filter_date_like_test!(DateVector, Date, new);
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native); impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
impl_filter_date_like_test!( impl_filter_date_like_test!(
TimestampMillisecondVector, TimestampMillisecondVector,

View File

@@ -105,7 +105,7 @@ pub(crate) fn find_unique_constant(
mod tests { mod tests {
use std::sync::Arc; use std::sync::Arc;
use common_time::Date; use common_time::{Date, DateTime};
use super::*; use super::*;
use crate::timestamp::*; use crate::timestamp::*;
@@ -358,6 +358,7 @@ mod tests {
#[test] #[test]
fn test_find_unique_date_like() { fn test_find_unique_date_like() {
impl_find_unique_date_like_test!(DateVector, Date, new); impl_find_unique_date_like_test!(DateVector, Date, new);
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from); impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from); impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from); impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);

View File

@@ -41,7 +41,7 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use common_time::timestamp::TimeUnit; use common_time::timestamp::TimeUnit;
use common_time::{Date, Timestamp}; use common_time::{Date, DateTime, Timestamp};
use paste::paste; use paste::paste;
use super::*; use super::*;
@@ -161,6 +161,8 @@ mod tests {
#[test] #[test]
fn test_replicate_date_like() { fn test_replicate_date_like() {
impl_replicate_date_like_test!(DateVector, Date, new); impl_replicate_date_like_test!(DateVector, Date, new);
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
impl_replicate_timestamp_test!(Second); impl_replicate_timestamp_test!(Second);
impl_replicate_timestamp_test!(Millisecond); impl_replicate_timestamp_test!(Millisecond);
impl_replicate_timestamp_test!(Microsecond); impl_replicate_timestamp_test!(Microsecond);

View File

@@ -33,7 +33,7 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use arrow::array::{PrimitiveArray, UInt32Array}; use arrow::array::{PrimitiveArray, UInt32Array};
use common_time::Date; use common_time::{Date, DateTime};
use crate::prelude::VectorRef; use crate::prelude::VectorRef;
use crate::scalars::ScalarVector; use crate::scalars::ScalarVector;
@@ -105,6 +105,7 @@ mod tests {
// test date like type // test date like type
take_time_like_test!(DateVector, Date, new); take_time_like_test!(DateVector, Date, new);
take_time_like_test!(DateTimeVector, DateTime, new);
take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native); take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native);
take_time_like_test!( take_time_like_test!(
TimestampMillisecondVector, TimestampMillisecondVector,

View File

@@ -46,7 +46,6 @@ get-size2 = "0.1.2"
greptime-proto.workspace = true greptime-proto.workspace = true
# This fork of hydroflow is simply for keeping our dependency in our org, and pin the version # This fork of hydroflow is simply for keeping our dependency in our org, and pin the version
# otherwise it is the same with upstream repo # otherwise it is the same with upstream repo
chrono.workspace = true
http.workspace = true http.workspace = true
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" } hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
itertools.workspace = true itertools.workspace = true

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
//! impl `FlowNode` trait for FlowNodeManager so standalone can call them //! impl `FlowNode` trait for FlowNodeManager so standalone can call them
use std::collections::HashMap; use std::collections::HashMap;
use api::v1::flow::{ use api::v1::flow::{

View File

@@ -21,31 +21,19 @@ use crate::FlowWorkerManager;
impl FlowWorkerManager { impl FlowWorkerManager {
pub async fn gen_state_report(&self) -> FlowStat { pub async fn gen_state_report(&self) -> FlowStat {
let mut full_report = BTreeMap::new(); let mut full_report = BTreeMap::new();
let mut last_exec_time_map = BTreeMap::new();
for worker in self.worker_handles.iter() { for worker in self.worker_handles.iter() {
match worker.get_state_size().await { match worker.get_state_size().await {
Ok(state_size) => { Ok(state_size) => {
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v))); full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)))
} }
Err(err) => { Err(err) => {
common_telemetry::error!(err; "Get flow stat size error"); common_telemetry::error!(err; "Get flow stat size error");
} }
} }
match worker.get_last_exec_time_map().await {
Ok(last_exec_time) => {
last_exec_time_map
.extend(last_exec_time.into_iter().map(|(k, v)| (k as u32, v)));
}
Err(err) => {
common_telemetry::error!(err; "Get last exec time error");
}
}
} }
FlowStat { FlowStat {
state_size: full_report, state_size: full_report,
last_exec_time_map,
} }
} }
} }

View File

@@ -41,7 +41,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
.build() .build()
.unwrap(); .unwrap();
let meta = TableMetaBuilder::empty() let meta = TableMetaBuilder::default()
.schema(Arc::new(schema)) .schema(Arc::new(schema))
.primary_key_indices(vec![0]) .primary_key_indices(vec![0])
.engine("engine") .engine("engine")

View File

@@ -98,10 +98,6 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
self.state.set_current_ts(ts); self.state.set_current_ts(ts);
} }
pub fn set_last_exec_time(&mut self, ts: repr::Timestamp) {
self.state.set_last_exec_time(ts);
}
/// Run all available subgraph /// Run all available subgraph
/// ///
/// return true if any subgraph actually executed /// return true if any subgraph actually executed
@@ -216,21 +212,6 @@ impl WorkerHandle {
.build() .build()
}) })
} }
pub async fn get_last_exec_time_map(&self) -> Result<BTreeMap<FlowId, i64>, Error> {
let ret = self
.itc_client
.call_with_resp(Request::QueryLastExecTimeMap)
.await?;
ret.into_query_last_exec_time_map().map_err(|ret| {
InternalSnafu {
reason: format!(
"Flow Node/Worker get_last_exec_time_map failed, expect Response::QueryLastExecTimeMap, found {ret:?}"
),
}
.build()
})
}
} }
impl Drop for WorkerHandle { impl Drop for WorkerHandle {
@@ -354,7 +335,6 @@ impl<'s> Worker<'s> {
pub fn run_tick(&mut self, now: repr::Timestamp) { pub fn run_tick(&mut self, now: repr::Timestamp) {
for (_flow_id, task_state) in self.task_states.iter_mut() { for (_flow_id, task_state) in self.task_states.iter_mut() {
task_state.set_current_ts(now); task_state.set_current_ts(now);
task_state.set_last_exec_time(now);
task_state.run_available(); task_state.run_available();
} }
} }
@@ -415,15 +395,6 @@ impl<'s> Worker<'s> {
} }
Some(Response::QueryStateSize { result: ret }) Some(Response::QueryStateSize { result: ret })
} }
Request::QueryLastExecTimeMap => {
let mut ret = BTreeMap::new();
for (flow_id, task_state) in self.task_states.iter() {
if let Some(last_exec_time) = task_state.state.last_exec_time() {
ret.insert(*flow_id, last_exec_time);
}
}
Some(Response::QueryLastExecTimeMap { result: ret })
}
}; };
Ok(ret) Ok(ret)
} }
@@ -456,7 +427,6 @@ pub enum Request {
}, },
Shutdown, Shutdown,
QueryStateSize, QueryStateSize,
QueryLastExecTimeMap,
} }
#[derive(Debug, EnumAsInner)] #[derive(Debug, EnumAsInner)]
@@ -476,10 +446,6 @@ enum Response {
/// each flow tasks' state size /// each flow tasks' state size
result: BTreeMap<FlowId, usize>, result: BTreeMap<FlowId, usize>,
}, },
QueryLastExecTimeMap {
/// each flow tasks' last execution time
result: BTreeMap<FlowId, i64>,
},
} }
fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) { fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) {

View File

@@ -290,9 +290,7 @@ mod test {
let mfp = MapFilterProject::new(1) let mfp = MapFilterProject::new(1)
.filter(vec![ .filter(vec![
ScalarExpr::Column(0) ScalarExpr::Column(0)
.call_unary(expr::UnaryFunc::Cast( .call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
ConcreteDataType::timestamp_microsecond_datatype(),
))
.call_binary( .call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now), ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Gte, BinaryFunc::Gte,
@@ -302,9 +300,7 @@ mod test {
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()), ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
BinaryFunc::SubInt64, BinaryFunc::SubInt64,
) )
.call_unary(expr::UnaryFunc::Cast( .call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
ConcreteDataType::timestamp_microsecond_datatype(),
))
.call_binary( .call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now), ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Lt, BinaryFunc::Lt,

View File

@@ -45,8 +45,6 @@ pub struct DataflowState {
arrange_used: Vec<ArrangeHandler>, arrange_used: Vec<ArrangeHandler>,
/// the time arrangement need to be expired after a certain time in milliseconds /// the time arrangement need to be expired after a certain time in milliseconds
expire_after: Option<Timestamp>, expire_after: Option<Timestamp>,
/// the last time each subgraph executed
last_exec_time: Option<Timestamp>,
} }
impl DataflowState { impl DataflowState {
@@ -116,14 +114,6 @@ impl DataflowState {
pub fn get_state_size(&self) -> usize { pub fn get_state_size(&self) -> usize {
self.arrange_used.iter().map(|x| x.read().get_size()).sum() self.arrange_used.iter().map(|x| x.read().get_size()).sum()
} }
pub fn set_last_exec_time(&mut self, time: Timestamp) {
self.last_exec_time = Some(time);
}
pub fn last_exec_time(&self) -> Option<Timestamp> {
self.last_exec_time
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View File

@@ -479,7 +479,7 @@ impl ScalarUDFImpl for TumbleExpand {
match (arg_types.first(), arg_types.get(1), arg_types.get(2)) { match (arg_types.first(), arg_types.get(1), arg_types.get(2)) {
(Some(ts), Some(window), opt) => { (Some(ts), Some(window), opt) => {
use arrow_schema::DataType::*; use arrow_schema::DataType::*;
if !matches!(ts, Date32 | Timestamp(_, _)) { if !matches!(ts, Date32 | Date64 | Timestamp(_, _)) {
return Err(DataFusionError::Plan( return Err(DataFusionError::Plan(
format!("Expect timestamp column as first arg for tumble_start, found {:?}", ts) format!("Expect timestamp column as first arg for tumble_start, found {:?}", ts)
)); ));
@@ -491,7 +491,7 @@ impl ScalarUDFImpl for TumbleExpand {
} }
if let Some(start_time) = opt{ if let Some(start_time) = opt{
if !matches!(start_time, Utf8 | Date32 | Timestamp(_, _)){ if !matches!(start_time, Utf8 | Date32 | Date64 | Timestamp(_, _)){
return Err(DataFusionError::Plan( return Err(DataFusionError::Plan(
format!("Expect start_time to either be date, timestamp or string, found {:?}", start_time) format!("Expect start_time to either be date, timestamp or string, found {:?}", start_time)
)); ));

View File

@@ -554,6 +554,8 @@ fn get_ts_as_millisecond(arg: Value) -> Result<repr::Timestamp, EvalError> {
ts.convert_to(TimeUnit::Millisecond) ts.convert_to(TimeUnit::Millisecond)
.context(OverflowSnafu)? .context(OverflowSnafu)?
.value() .value()
} else if let Some(ts) = arg.as_datetime() {
ts.val()
} else { } else {
InvalidArgumentSnafu { InvalidArgumentSnafu {
reason: "Expect input to be timestamp or datetime type", reason: "Expect input to be timestamp or datetime type",

View File

@@ -759,7 +759,7 @@ fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> b
#[allow(clippy::too_many_lines)] #[allow(clippy::too_many_lines)]
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use common_time::Timestamp; use common_time::DateTime;
use super::*; use super::*;
@@ -813,13 +813,13 @@ mod test {
( (
AggregateFunc::MaxDateTime, AggregateFunc::MaxDateTime,
vec![ vec![
(Value::Timestamp(Timestamp::from(0)), 1), (Value::DateTime(DateTime::from(0)), 1),
(Value::Timestamp(Timestamp::from(1)), 1), (Value::DateTime(DateTime::from(1)), 1),
(Value::Null, 1), (Value::Null, 1),
], ],
( (
Value::Timestamp(Timestamp::from(1)), Value::DateTime(DateTime::from(1)),
vec![Value::Timestamp(Timestamp::from(1)), 2i64.into()], vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
), ),
), ),
( (

View File

@@ -267,7 +267,7 @@ impl AggregateFunc {
MaxBool => (boolean_datatype, Max), MaxBool => (boolean_datatype, Max),
MaxString => (string_datatype, Max), MaxString => (string_datatype, Max),
MaxDate => (date_datatype, Max), MaxDate => (date_datatype, Max),
MaxDateTime => (timestamp_microsecond_datatype, Max), MaxDateTime => (datetime_datatype, Max),
MaxTimestamp => (timestamp_second_datatype, Max), MaxTimestamp => (timestamp_second_datatype, Max),
MaxTime => (time_second_datatype, Max), MaxTime => (time_second_datatype, Max),
MaxDuration => (duration_second_datatype, Max), MaxDuration => (duration_second_datatype, Max),
@@ -283,7 +283,7 @@ impl AggregateFunc {
MinBool => (boolean_datatype, Min), MinBool => (boolean_datatype, Min),
MinString => (string_datatype, Min), MinString => (string_datatype, Min),
MinDate => (date_datatype, Min), MinDate => (date_datatype, Min),
MinDateTime => (timestamp_microsecond_datatype, Min), MinDateTime => (datetime_datatype, Min),
MinTimestamp => (timestamp_second_datatype, Min), MinTimestamp => (timestamp_second_datatype, Min),
MinTime => (time_second_datatype, Min), MinTime => (time_second_datatype, Min),
MinDuration => (duration_second_datatype, Min), MinDuration => (duration_second_datatype, Min),

View File

@@ -154,18 +154,14 @@ impl HeartbeatTask {
}; };
let flow_stat = latest_report let flow_stat = latest_report
.as_ref() .as_ref()
.map(|report| api::v1::meta::FlowStat { .map(|report| {
flow_stat_size: report report
.state_size .state_size
.iter() .iter()
.map(|(k, v)| (*k, *v as u64)) .map(|(k, v)| (*k, *v as u64))
.collect(), .collect()
flow_last_exec_time_map: report })
.last_exec_time_map .map(|f| api::v1::meta::FlowStat { flow_stat_size: f });
.iter()
.map(|(k, v)| (*k, *v))
.collect(),
});
Some(HeartbeatRequest { Some(HeartbeatRequest {
mailbox_message, mailbox_message,

View File

@@ -68,10 +68,13 @@ pub fn value_to_internal_ts(value: Value) -> Result<i64, EvalError> {
let ty = arg.data_type(); let ty = arg.data_type();
matches!( matches!(
ty, ty,
ConcreteDataType::Date(..) | ConcreteDataType::Timestamp(..) ConcreteDataType::Date(..)
| ConcreteDataType::DateTime(..)
| ConcreteDataType::Timestamp(..)
) )
}; };
match value { match value {
Value::DateTime(ts) => Ok(ts.val()),
Value::Int64(ts) => Ok(ts), Value::Int64(ts) => Ok(ts),
arg if is_supported_time_type(&arg) => { arg if is_supported_time_type(&arg) => {
let arg_ty = arg.data_type(); let arg_ty = arg.data_type();
@@ -211,7 +214,7 @@ impl From<Row> for ProtoRow {
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use common_time::{Date, Timestamp}; use common_time::{Date, DateTime};
use super::*; use super::*;
@@ -241,7 +244,7 @@ mod test {
{ {
let a = Value::from(1i32); let a = Value::from(1i32);
let b = Value::from(1i64); let b = Value::from(1i64);
let c = Value::Timestamp(Timestamp::new_millisecond(1i64)); let c = Value::DateTime(DateTime::new(1i64));
let d = Value::from(1.0); let d = Value::from(1.0);
assert!(value_to_internal_ts(a).is_err()); assert!(value_to_internal_ts(a).is_err());

View File

@@ -238,13 +238,6 @@ pub enum Error {
source: servers::error::Error, source: servers::error::Error,
}, },
#[snafu(display("Failed to create logical plan for prometheus label values query"))]
PrometheusLabelValuesQueryPlan {
#[snafu(implicit)]
location: Location,
source: query::promql::error::Error,
},
#[snafu(display("Failed to describe schema for given statement"))] #[snafu(display("Failed to describe schema for given statement"))]
DescribeStatement { DescribeStatement {
#[snafu(implicit)] #[snafu(implicit)]
@@ -373,8 +366,6 @@ impl ErrorExt for Error {
| Error::PrometheusMetricNamesQueryPlan { source, .. } | Error::PrometheusMetricNamesQueryPlan { source, .. }
| Error::ExecutePromql { source, .. } => source.status_code(), | Error::ExecutePromql { source, .. } => source.status_code(),
Error::PrometheusLabelValuesQueryPlan { source, .. } => source.status_code(),
Error::CollectRecordbatch { .. } => StatusCode::EngineExecuteQuery, Error::CollectRecordbatch { .. } => StatusCode::EngineExecuteQuery,
Error::SqlExecIntercepted { source, .. } => source.status_code(), Error::SqlExecIntercepted { source, .. } => source.status_code(),

View File

@@ -26,7 +26,6 @@ mod region_query;
pub mod standalone; pub mod standalone;
use std::sync::Arc; use std::sync::Arc;
use std::time::SystemTime;
use async_trait::async_trait; use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq}; use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
@@ -472,21 +471,6 @@ impl PrometheusHandler for Instance {
.context(ExecuteQuerySnafu) .context(ExecuteQuerySnafu)
} }
async fn query_label_values(
&self,
metric: String,
label_name: String,
matchers: Vec<Matcher>,
start: SystemTime,
end: SystemTime,
ctx: &QueryContextRef,
) -> server_error::Result<Vec<String>> {
self.handle_query_label_values(metric, label_name, matchers, start, end, ctx)
.await
.map_err(BoxedError::new)
.context(ExecuteQuerySnafu)
}
fn catalog_manager(&self) -> CatalogManagerRef { fn catalog_manager(&self) -> CatalogManagerRef {
self.catalog_manager.clone() self.catalog_manager.clone()
} }

View File

@@ -133,7 +133,6 @@ impl FrontendBuilder {
.context(error::CacheRequiredSnafu { .context(error::CacheRequiredSnafu {
name: TABLE_FLOWNODE_SET_CACHE_NAME, name: TABLE_FLOWNODE_SET_CACHE_NAME,
})?; })?;
let inserter = Arc::new(Inserter::new( let inserter = Arc::new(Inserter::new(
self.catalog_manager.clone(), self.catalog_manager.clone(),
partition_manager.clone(), partition_manager.clone(),

View File

@@ -12,26 +12,20 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::time::SystemTime;
use catalog::information_schema::TABLES; use catalog::information_schema::TABLES;
use client::OutputData; use client::OutputData;
use common_catalog::consts::INFORMATION_SCHEMA_NAME; use common_catalog::consts::INFORMATION_SCHEMA_NAME;
use common_catalog::format_full_table_name;
use common_recordbatch::util; use common_recordbatch::util;
use common_telemetry::tracing; use common_telemetry::tracing;
use datatypes::prelude::Value; use datatypes::prelude::Value;
use promql_parser::label::{Matcher, Matchers}; use promql_parser::label::Matcher;
use query::promql;
use query::promql::planner::PromPlanner;
use servers::prometheus; use servers::prometheus;
use session::context::QueryContextRef; use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use crate::error::{ use crate::error::{
CatalogSnafu, CollectRecordbatchSnafu, ExecLogicalPlanSnafu, CatalogSnafu, CollectRecordbatchSnafu, ExecLogicalPlanSnafu,
PrometheusLabelValuesQueryPlanSnafu, PrometheusMetricNamesQueryPlanSnafu, ReadTableSnafu, PrometheusMetricNamesQueryPlanSnafu, ReadTableSnafu, Result, TableNotFoundSnafu,
Result, TableNotFoundSnafu,
}; };
use crate::instance::Instance; use crate::instance::Instance;
@@ -102,77 +96,4 @@ impl Instance {
Ok(results) Ok(results)
} }
/// Handles label values query request, returns the values.
#[tracing::instrument(skip_all)]
pub(crate) async fn handle_query_label_values(
&self,
metric: String,
label_name: String,
matchers: Vec<Matcher>,
start: SystemTime,
end: SystemTime,
ctx: &QueryContextRef,
) -> Result<Vec<String>> {
let table_schema = ctx.current_schema();
let table = self
.catalog_manager
.table(ctx.current_catalog(), &table_schema, &metric, Some(ctx))
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
})?;
let dataframe = self
.query_engine
.read_table(table.clone())
.with_context(|_| ReadTableSnafu {
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
})?;
let scan_plan = dataframe.into_logical_plan();
let filter_conditions =
PromPlanner::matchers_to_expr(Matchers::new(matchers), scan_plan.schema())
.context(PrometheusLabelValuesQueryPlanSnafu)?;
let logical_plan = promql::label_values::rewrite_label_values_query(
table,
scan_plan,
filter_conditions,
label_name,
start,
end,
)
.context(PrometheusLabelValuesQueryPlanSnafu)?;
let results = self
.query_engine
.execute(logical_plan, ctx.clone())
.await
.context(ExecLogicalPlanSnafu)?;
let batches = match results.data {
OutputData::Stream(stream) => util::collect(stream)
.await
.context(CollectRecordbatchSnafu)?,
OutputData::RecordBatches(rbs) => rbs.take(),
_ => unreachable!("should not happen"),
};
let mut results = Vec::with_capacity(batches.iter().map(|b| b.num_rows()).sum());
for batch in batches {
// Only one column the results, ensured by `prometheus::label_values_matchers_to_plan`.
let names = batch.column(0);
for i in 0..names.len() {
let Value::String(name) = names.get(i) else {
unreachable!();
};
results.push(name.into_string());
}
}
Ok(results)
}
} }

View File

@@ -29,7 +29,6 @@ prost.workspace = true
puffin.workspace = true puffin.workspace = true
regex.workspace = true regex.workspace = true
regex-automata.workspace = true regex-automata.workspace = true
roaring = "0.10"
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
snafu.workspace = true snafu.workspace = true

View File

@@ -1,868 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::io;
use std::ops::RangeInclusive;
use common_base::BitVec;
/// `BitmapType` enumerates how bitmaps are encoded within the inverted index.
pub use greptime_proto::v1::index::BitmapType;
use roaring::RoaringBitmap;
/// A bitmap representation supporting both BitVec and RoaringBitmap formats.
///
/// This enum provides unified bitmap operations while allowing efficient storage
/// in different formats. The implementation automatically handles type conversions
/// when performing operations between different formats.
///
/// # Examples
///
/// Creating a new Roaring bitmap:
/// ```
/// use bitmap::Bitmap;
/// let bitmap = Bitmap::new_roaring();
/// assert!(bitmap.is_empty());
/// ```
///
/// Creating a full BitVec bitmap:
/// ```
/// use bitmap::Bitmap;
/// let bitmap = Bitmap::full_bitvec(10);
/// assert_eq!(bitmap.count_ones(), 10);
/// ```
#[derive(Debug, Clone, PartialEq)]
pub enum Bitmap {
Roaring(RoaringBitmap),
BitVec(BitVec),
}
impl Bitmap {
/// Creates a new empty BitVec-based bitmap.
pub fn new_bitvec() -> Self {
Bitmap::BitVec(BitVec::EMPTY)
}
/// Creates a new empty RoaringBitmap-based bitmap.
pub fn new_roaring() -> Self {
Bitmap::Roaring(RoaringBitmap::new())
}
/// Creates a full BitVec-based bitmap with all bits set to 1.
///
/// # Arguments
/// * `size` - The number of bits to allocate and set
pub fn full_bitvec(size: usize) -> Self {
Bitmap::BitVec(BitVec::repeat(true, size))
}
/// Creates a full RoaringBitmap-based bitmap with bits 0..size set to 1.
///
/// # Arguments
/// * `size` - The exclusive upper bound for the bit range
pub fn full_roaring(size: usize) -> Self {
let mut roaring = RoaringBitmap::new();
roaring.insert_range(0..size as u32);
Bitmap::Roaring(roaring)
}
/// Returns the number of bits set to 1 in the bitmap.
pub fn count_ones(&self) -> usize {
match self {
Bitmap::BitVec(bitvec) => bitvec.count_ones(),
Bitmap::Roaring(roaring) => roaring.len() as _,
}
}
/// Checks if the bitmap contains no set bits.
pub fn is_empty(&self) -> bool {
match self {
Bitmap::BitVec(bitvec) => bitvec.is_empty(),
Bitmap::Roaring(roaring) => roaring.is_empty(),
}
}
/// Inserts a range of bits into the bitmap.
///
/// # Arguments
/// * `range` - Inclusive range of bits to set
pub fn insert_range(&mut self, range: RangeInclusive<usize>) {
match self {
Bitmap::BitVec(bitvec) => {
if *range.end() >= bitvec.len() {
bitvec.resize(range.end() + 1, false);
}
for i in range {
bitvec.set(i, true);
}
}
Bitmap::Roaring(roaring) => {
let range = *range.start() as u32..=*range.end() as u32;
roaring.insert_range(range);
}
}
}
/// Serializes the bitmap into a byte buffer using the specified format.
///
/// # Arguments
/// * `serialize_type` - Target format for serialization
/// * `writer` - Output writer to write the serialized data
pub fn serialize_into(
&self,
serialize_type: BitmapType,
mut writer: impl io::Write,
) -> io::Result<()> {
match (self, serialize_type) {
(Bitmap::BitVec(bitvec), BitmapType::BitVec) => {
writer.write_all(bitvec.as_raw_slice())?;
}
(Bitmap::Roaring(roaring), BitmapType::Roaring) => {
roaring.serialize_into(writer)?;
}
(Bitmap::BitVec(bitvec), BitmapType::Roaring) => {
let bitmap = Bitmap::bitvec_to_roaring(bitvec.clone());
bitmap.serialize_into(writer)?;
}
(Bitmap::Roaring(roaring), BitmapType::BitVec) => {
let bitvec = Bitmap::roaring_to_bitvec(roaring);
writer.write_all(bitvec.as_raw_slice())?;
}
}
Ok(())
}
/// Computes the size of the serialized bitmap in bytes.
///
/// # Arguments
/// * `bitmap_type` - Format of data to be serialized
pub fn serialized_size(&self, bitmap_type: BitmapType) -> usize {
match (self, bitmap_type) {
(Bitmap::BitVec(bitvec), BitmapType::BitVec) => bitvec.as_raw_slice().len(),
(Bitmap::Roaring(roaring), BitmapType::Roaring) => roaring.serialized_size(),
(Bitmap::BitVec(bitvec), BitmapType::Roaring) => {
let bitmap = Bitmap::bitvec_to_roaring(bitvec.clone());
bitmap.serialized_size()
}
(Bitmap::Roaring(roaring), BitmapType::BitVec) => {
let bitvec = Bitmap::roaring_to_bitvec(roaring);
bitvec.as_raw_slice().len()
}
}
}
/// Deserializes a bitmap from a byte buffer.
///
/// # Arguments
/// * `buf` - Input buffer containing serialized data
/// * `bitmap_type` - Format of the serialized data
pub fn deserialize_from(buf: &[u8], bitmap_type: BitmapType) -> std::io::Result<Self> {
match bitmap_type {
BitmapType::BitVec => {
let bitvec = BitVec::from_slice(buf);
Ok(Bitmap::BitVec(bitvec))
}
BitmapType::Roaring => {
let roaring = RoaringBitmap::deserialize_from(buf)?;
Ok(Bitmap::Roaring(roaring))
}
}
}
/// Computes the union with another bitmap (in-place).
///
/// If the other bitmap is a different type, it will be converted to match
/// the current bitmap's type.
pub fn union(&mut self, other: Self) {
if self.is_empty() {
*self = other;
return;
}
match (self, other) {
(Bitmap::BitVec(bitvec1), bitmap) => {
let bitvec2 = bitmap.into_bitvec();
if bitvec1.len() > bitvec2.len() {
*bitvec1 |= bitvec2
} else {
*bitvec1 = bitvec2 | &*bitvec1;
}
}
(Bitmap::Roaring(roaring1), bitmap) => {
let roaring2 = bitmap.into_roaring();
*roaring1 |= roaring2;
}
}
}
/// Computes the intersection with another bitmap (in-place).
///
/// If the other bitmap is a different type, it will be converted to match
/// the current bitmap's type.
pub fn intersect(&mut self, other: Self) {
match (self, other) {
(Bitmap::BitVec(bitvec1), bitmap) => {
let mut bitvec2 = bitmap.into_bitvec();
let len = (bitvec1.len() - bitvec1.trailing_zeros())
.min(bitvec2.len() - bitvec2.trailing_zeros());
bitvec1.truncate(len);
bitvec2.truncate(len);
*bitvec1 &= bitvec2;
}
(Bitmap::Roaring(roaring1), bitmap) => {
let roaring2 = bitmap.into_roaring();
*roaring1 &= roaring2;
}
}
}
/// Returns an iterator over the indices of set bits.
pub fn iter_ones(&self) -> Box<dyn Iterator<Item = usize> + '_> {
match self {
Bitmap::BitVec(bitvec) => Box::new(bitvec.iter_ones()),
Bitmap::Roaring(roaring) => Box::new(roaring.iter().map(|x| x as usize)),
}
}
/// Creates a bitmap from bytes in LSB0 (least significant bit first) order.
///
/// # Arguments
/// * `bytes` - Input bytes in LSB0 order
/// * `bitmap_type` - Type of bitmap to create
pub fn from_lsb0_bytes(bytes: &[u8], bitmap_type: BitmapType) -> Self {
match bitmap_type {
BitmapType::BitVec => {
let bitvec = BitVec::from_slice(bytes);
Bitmap::BitVec(bitvec)
}
BitmapType::Roaring => {
let roaring = RoaringBitmap::from_lsb0_bytes(0, bytes);
Bitmap::Roaring(roaring)
}
}
}
/// Computes memory usage of the bitmap in bytes.
pub fn memory_usage(&self) -> usize {
match self {
Bitmap::BitVec(bitvec) => bitvec.capacity(),
Bitmap::Roaring(roaring) => {
let stat = roaring.statistics();
(stat.n_bytes_array_containers
+ stat.n_bytes_bitset_containers
+ stat.n_bytes_run_containers) as usize
}
}
}
fn into_bitvec(self) -> BitVec {
match self {
Bitmap::BitVec(bitvec) => bitvec,
Bitmap::Roaring(roaring) => Self::roaring_to_bitvec(&roaring),
}
}
fn into_roaring(self) -> RoaringBitmap {
match self {
Bitmap::Roaring(roaring) => roaring,
Bitmap::BitVec(bitvec) => Self::bitvec_to_roaring(bitvec),
}
}
fn roaring_to_bitvec(roaring: &RoaringBitmap) -> BitVec {
let max_value = roaring.max().unwrap_or(0);
let mut bitvec = BitVec::repeat(false, max_value as usize + 1);
for i in roaring {
bitvec.set(i as usize, true);
}
bitvec
}
fn bitvec_to_roaring(mut bitvec: BitVec) -> RoaringBitmap {
bitvec.resize(bitvec.capacity(), false);
RoaringBitmap::from_lsb0_bytes(0, bitvec.as_raw_slice())
}
}
impl Default for Bitmap {
fn default() -> Self {
Bitmap::new_roaring()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_full_bitmaps() {
let bv = Bitmap::full_bitvec(10);
assert_eq!(bv.count_ones(), 10);
let rb = Bitmap::full_roaring(10);
assert_eq!(rb.count_ones(), 10);
}
#[test]
fn test_serialization_roundtrip() {
let original = Bitmap::full_roaring(100);
let mut buf = Vec::new();
// Serialize as Roaring
original
.serialize_into(BitmapType::Roaring, &mut buf)
.unwrap();
let deserialized = Bitmap::deserialize_from(&buf, BitmapType::Roaring).unwrap();
assert_eq!(original, deserialized);
// Serialize as BitVec
buf.clear();
original
.serialize_into(BitmapType::BitVec, &mut buf)
.unwrap();
let deserialized = Bitmap::deserialize_from(&buf, BitmapType::BitVec).unwrap();
assert_eq!(original.count_ones(), deserialized.count_ones());
}
#[test]
fn test_union_fulls() {
// Test BitVec union
let mut bv1 = Bitmap::full_bitvec(3); // 0-2: 111
let bv2 = Bitmap::full_bitvec(5); // 0-4: 11111
bv1.union(bv2);
assert_eq!(bv1.count_ones(), 5);
let mut bv1 = Bitmap::full_bitvec(5); // 0-4: 11111
let bv2 = Bitmap::full_bitvec(3); // 0-2: 111
bv1.union(bv2);
assert_eq!(bv1.count_ones(), 5);
// Test Roaring union
let mut rb1 = Bitmap::full_roaring(3); // 0-2: 111
let rb2 = Bitmap::full_roaring(5); // 0-4: 11111
rb1.union(rb2);
assert_eq!(rb1.count_ones(), 5);
let mut rb1 = Bitmap::full_roaring(5); // 0-4: 11111
let rb2 = Bitmap::full_roaring(3); // 0-2: 111
rb1.union(rb2);
assert_eq!(rb1.count_ones(), 5);
// Test cross-type union
let mut rb = Bitmap::full_roaring(5); // 0-4: 11111
let bv = Bitmap::full_bitvec(3); // 0-2: 111
rb.union(bv);
assert_eq!(rb.count_ones(), 5);
let mut bv = Bitmap::full_bitvec(5); // 0-4: 11111
let rb = Bitmap::full_roaring(3); // 0-2: 111
bv.union(rb);
assert_eq!(bv.count_ones(), 5);
let mut rb = Bitmap::full_roaring(3); // 0-2: 111
let bv = Bitmap::full_bitvec(5); // 0-4: 11111
rb.union(bv);
assert_eq!(rb.count_ones(), 5);
let mut bv = Bitmap::full_bitvec(3); // 0-2: 111
let rb = Bitmap::full_roaring(5); // 0-4: 11111
bv.union(rb);
assert_eq!(bv.count_ones(), 5);
}
#[test]
fn test_union_bitvec() {
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
bv1.union(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::BitVec)
);
// Test different lengths
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101, 0b00000001], BitmapType::BitVec);
bv1.union(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::BitVec)
);
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
bv1.union(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::BitVec)
);
// Test empty bitmaps
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::new_bitvec();
bv1.union(bv2);
assert!(bv1.is_empty());
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
bv1.union(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
);
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
let bv2 = Bitmap::new_bitvec();
bv1.union(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
);
// Test empty and full bitmaps
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::full_bitvec(8);
bv1.union(bv2);
assert_eq!(bv1, Bitmap::full_bitvec(8));
let mut bv1 = Bitmap::full_bitvec(8);
let bv2 = Bitmap::new_bitvec();
bv1.union(bv2);
assert_eq!(bv1, Bitmap::full_bitvec(8));
}
#[test]
fn test_union_roaring() {
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
rb1.union(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::Roaring)
);
// Test different lengths
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101, 0b00000001], BitmapType::Roaring);
rb1.union(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::Roaring)
);
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
rb1.union(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::Roaring)
);
// Test empty bitmaps
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::new_roaring();
rb1.union(rb2);
assert!(rb1.is_empty());
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
rb1.union(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
);
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
let rb2 = Bitmap::new_roaring();
rb1.union(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
);
// Test empty and full bit
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::full_roaring(8);
rb1.union(rb2);
assert_eq!(rb1, Bitmap::full_roaring(8));
let mut rb1 = Bitmap::full_roaring(8);
let rb2 = Bitmap::new_roaring();
rb1.union(rb2);
assert_eq!(rb1, Bitmap::full_roaring(8));
}
#[test]
fn test_union_mixed() {
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
rb.union(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::Roaring)
);
let mut bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
let rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
bv.union(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::BitVec)
);
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let bv = Bitmap::full_bitvec(8);
rb.union(bv);
assert_eq!(rb, Bitmap::full_roaring(8));
let mut bv = Bitmap::full_bitvec(8);
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
bv.union(rb);
assert_eq!(bv, Bitmap::full_bitvec(8));
let mut rb = Bitmap::new_roaring();
let bv = Bitmap::full_bitvec(8);
rb.union(bv);
assert_eq!(rb, Bitmap::full_bitvec(8));
let mut bv = Bitmap::full_bitvec(8);
let rb = Bitmap::new_roaring();
bv.union(rb);
assert_eq!(bv, Bitmap::full_bitvec(8));
let mut rb = Bitmap::new_roaring();
let bv = Bitmap::new_bitvec();
rb.union(bv);
assert!(rb.is_empty());
let mut bv = Bitmap::new_bitvec();
let rb = Bitmap::new_roaring();
bv.union(rb);
assert!(bv.is_empty());
let mut rb = Bitmap::new_roaring();
let bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
rb.union(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
);
let mut bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
let rb = Bitmap::new_roaring();
bv.union(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
);
let mut rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
let bv = Bitmap::new_bitvec();
rb.union(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
);
let mut bv = Bitmap::new_bitvec();
let rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
bv.union(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
);
}
#[test]
fn test_intersect_fulls() {
// Test BitVec intersect
let mut bv1 = Bitmap::full_bitvec(3); // 0-2: 111
let bv2 = Bitmap::full_bitvec(5); // 0-4: 11111
bv1.intersect(bv2);
assert_eq!(bv1.count_ones(), 3);
let mut bv1 = Bitmap::full_bitvec(5); // 0-4: 11111
let bv2 = Bitmap::full_bitvec(3); // 0-2: 111
bv1.intersect(bv2);
assert_eq!(bv1.count_ones(), 3);
// Test Roaring intersect
let mut rb1 = Bitmap::full_roaring(3); // 0-2: 111
let rb2 = Bitmap::full_roaring(5); // 0-4: 11111
rb1.intersect(rb2);
assert_eq!(rb1.count_ones(), 3);
let mut rb1 = Bitmap::full_roaring(5); // 0-4: 11111
let rb2 = Bitmap::full_roaring(3); // 0-2: 111
rb1.intersect(rb2);
assert_eq!(rb1.count_ones(), 3);
// Test cross-type intersect
let mut rb = Bitmap::full_roaring(5); // 0-4: 11111
let bv = Bitmap::full_bitvec(3); // 0-2: 111
rb.intersect(bv);
assert_eq!(rb.count_ones(), 3);
let mut bv = Bitmap::full_bitvec(5); // 0-4: 11111
let rb = Bitmap::full_roaring(3); // 0-2: 111
bv.intersect(rb);
assert_eq!(bv.count_ones(), 3);
let mut rb = Bitmap::full_roaring(3); // 0-2: 111
let bv = Bitmap::full_bitvec(5); // 0-4: 11111
rb.intersect(bv);
assert_eq!(rb.count_ones(), 3);
let mut bv = Bitmap::full_bitvec(3); // 0-2: 111
let rb = Bitmap::full_roaring(5); // 0-4: 11111
bv.intersect(rb);
assert_eq!(bv.count_ones(), 3);
}
#[test]
fn test_intersect_bitvec() {
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
bv1.intersect(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
// Test different lengths
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
bv1.intersect(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::BitVec);
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
bv1.intersect(bv2);
assert_eq!(
bv1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
// Test empty bitmaps
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::new_bitvec();
bv1.intersect(bv2);
assert!(bv1.is_empty());
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
bv1.intersect(bv2);
assert!(bv1.is_empty());
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
let bv2 = Bitmap::new_bitvec();
bv1.intersect(bv2);
assert!(bv1.is_empty());
// Test empty and full bitmaps
let mut bv1 = Bitmap::new_bitvec();
let bv2 = Bitmap::full_bitvec(8);
bv1.intersect(bv2);
assert!(bv1.is_empty());
let mut bv1 = Bitmap::full_bitvec(8);
let bv2 = Bitmap::new_bitvec();
bv1.intersect(bv2);
assert!(bv1.is_empty());
}
#[test]
fn test_intersect_roaring() {
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
rb1.intersect(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
// Test different lengths
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
rb1.intersect(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::Roaring);
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
rb1.intersect(rb2);
assert_eq!(
rb1,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
// Test empty bitmaps
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::new_roaring();
rb1.intersect(rb2);
assert!(rb1.is_empty());
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
rb1.intersect(rb2);
assert!(rb1.is_empty());
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let rb2 = Bitmap::new_roaring();
rb1.intersect(rb2);
assert!(rb1.is_empty());
// Test empty and full bitmaps
let mut rb1 = Bitmap::new_roaring();
let rb2 = Bitmap::full_roaring(8);
rb1.intersect(rb2);
assert!(rb1.is_empty());
let mut rb1 = Bitmap::full_roaring(8);
let rb2 = Bitmap::new_roaring();
rb1.intersect(rb2);
assert!(rb1.is_empty());
}
#[test]
fn test_intersect_mixed() {
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
rb.intersect(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
bv.intersect(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
let bv = Bitmap::full_bitvec(8);
rb.intersect(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring)
);
let mut bv = Bitmap::full_bitvec(8);
let rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
bv.intersect(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec)
);
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
let bv = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
rb.intersect(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::BitVec);
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
bv.intersect(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::Roaring);
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
rb.intersect(bv);
assert_eq!(
rb,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
);
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
let rb = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
bv.intersect(rb);
assert_eq!(
bv,
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
);
let mut rb = Bitmap::new_roaring();
let bv = Bitmap::full_bitvec(8);
rb.intersect(bv);
assert!(rb.is_empty());
let mut bv = Bitmap::full_bitvec(8);
let rb = Bitmap::new_roaring();
bv.intersect(rb);
assert!(bv.is_empty());
let mut bv = Bitmap::new_bitvec();
let rb = Bitmap::full_roaring(8);
bv.intersect(rb);
assert!(bv.is_empty());
let mut rb = Bitmap::full_roaring(8);
let bv = Bitmap::new_bitvec();
rb.intersect(bv);
assert!(rb.is_empty());
let mut rb = Bitmap::new_roaring();
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
rb.intersect(bv);
assert!(rb.is_empty());
let mut bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
let rb = Bitmap::new_roaring();
bv.intersect(rb);
assert!(bv.is_empty());
let mut bv = Bitmap::new_bitvec();
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
bv.intersect(rb);
assert!(bv.is_empty());
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
let bv = Bitmap::new_bitvec();
rb.intersect(bv);
assert!(rb.is_empty());
}
#[test]
fn test_insert_range() {
let mut bv = Bitmap::new_bitvec();
bv.insert_range(0..=5);
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 5]);
let mut rb = Bitmap::new_roaring();
rb.insert_range(0..=5);
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 5]);
let mut bv = Bitmap::new_bitvec();
bv.insert_range(10..=10);
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![10]);
let mut rb = Bitmap::new_roaring();
rb.insert_range(10..=10);
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![10]);
}
}

View File

@@ -17,7 +17,6 @@ pub mod sort_create;
use async_trait::async_trait; use async_trait::async_trait;
use crate::bitmap::BitmapType;
use crate::inverted_index::error::Result; use crate::inverted_index::error::Result;
use crate::inverted_index::format::writer::InvertedIndexWriter; use crate::inverted_index::format::writer::InvertedIndexWriter;
use crate::BytesRef; use crate::BytesRef;
@@ -54,9 +53,5 @@ pub trait InvertedIndexCreator: Send {
/// Finalizes the index creation process, ensuring all data is properly indexed and stored /// Finalizes the index creation process, ensuring all data is properly indexed and stored
/// in the provided writer /// in the provided writer
async fn finish( async fn finish(&mut self, writer: &mut dyn InvertedIndexWriter) -> Result<()>;
&mut self,
writer: &mut dyn InvertedIndexWriter,
bitmap_type: BitmapType,
) -> Result<()>;
} }

View File

@@ -17,23 +17,22 @@ mod intermediate_rw;
mod merge_stream; mod merge_stream;
use async_trait::async_trait; use async_trait::async_trait;
use common_base::BitVec;
use futures::Stream; use futures::Stream;
use crate::bitmap::Bitmap;
use crate::inverted_index::error::Result; use crate::inverted_index::error::Result;
use crate::inverted_index::format::writer::ValueStream;
use crate::{Bytes, BytesRef}; use crate::{Bytes, BytesRef};
/// A stream of sorted values along with their associated bitmap /// A stream of sorted values along with their associated bitmap
pub type SortedStream = Box<dyn Stream<Item = Result<(Bytes, Bitmap)>> + Send + Unpin>; pub type SortedStream = Box<dyn Stream<Item = Result<(Bytes, BitVec)>> + Send + Unpin>;
/// Output of a sorting operation, encapsulating a bitmap for null values and a stream of sorted items /// Output of a sorting operation, encapsulating a bitmap for null values and a stream of sorted items
pub struct SortOutput { pub struct SortOutput {
/// Bitmap indicating which segments have null values /// Bitmap indicating which segments have null values
pub segment_null_bitmap: Bitmap, pub segment_null_bitmap: BitVec,
/// Stream of sorted items /// Stream of sorted items
pub sorted_stream: ValueStream, pub sorted_stream: SortedStream,
/// Total number of rows in the sorted data /// Total number of rows in the sorted data
pub total_row_count: usize, pub total_row_count: usize,

View File

@@ -20,11 +20,11 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait; use async_trait::async_trait;
use common_base::BitVec;
use common_telemetry::{debug, error}; use common_telemetry::{debug, error};
use futures::stream; use futures::stream;
use snafu::ResultExt; use snafu::ResultExt;
use crate::bitmap::Bitmap;
use crate::external_provider::ExternalTempFileProvider; use crate::external_provider::ExternalTempFileProvider;
use crate::inverted_index::create::sort::intermediate_rw::{ use crate::inverted_index::create::sort::intermediate_rw::{
IntermediateReader, IntermediateWriter, IntermediateReader, IntermediateWriter,
@@ -45,10 +45,18 @@ pub struct ExternalSorter {
temp_file_provider: Arc<dyn ExternalTempFileProvider>, temp_file_provider: Arc<dyn ExternalTempFileProvider>,
/// Bitmap indicating which segments have null values /// Bitmap indicating which segments have null values
segment_null_bitmap: Bitmap, segment_null_bitmap: BitVec,
/// In-memory buffer to hold values and their corresponding bitmaps until memory threshold is exceeded /// In-memory buffer to hold values and their corresponding bitmaps until memory threshold is exceeded
values_buffer: BTreeMap<Bytes, (Bitmap, usize)>, values_buffer: BTreeMap<Bytes, BitVec>,
/// Count of rows in the last dumped buffer, used to streamline memory usage of `values_buffer`.
///
/// After data is dumped to external files, `last_dump_row_count` is updated to reflect the new starting point
/// for `BitVec` indexing. This means each `BitVec` in `values_buffer` thereafter encodes positions relative to
/// this count, not from 0. This mechanism effectively shrinks the memory footprint of each `BitVec`, helping manage
/// memory use more efficiently by focusing only on newly ingested data post-dump.
last_dump_row_count: usize,
/// Count of all rows ingested so far /// Count of all rows ingested so far
total_row_count: usize, total_row_count: usize,
@@ -85,14 +93,14 @@ impl Sorter for ExternalSorter {
return Ok(()); return Ok(());
} }
let segment_index_range = self.segment_index_range(n); let segment_index_range = self.segment_index_range(n, value.is_none());
self.total_row_count += n; self.total_row_count += n;
if let Some(value) = value { if let Some(value) = value {
let memory_diff = self.push_not_null(value, segment_index_range); let memory_diff = self.push_not_null(value, segment_index_range);
self.may_dump_buffer(memory_diff).await self.may_dump_buffer(memory_diff).await
} else { } else {
self.segment_null_bitmap.insert_range(segment_index_range); set_bits(&mut self.segment_null_bitmap, segment_index_range);
Ok(()) Ok(())
} }
} }
@@ -109,10 +117,15 @@ impl Sorter for ExternalSorter {
// TODO(zhongzc): k-way merge instead of 2-way merge // TODO(zhongzc): k-way merge instead of 2-way merge
let mut tree_nodes: VecDeque<SortedStream> = VecDeque::with_capacity(readers.len() + 1); let mut tree_nodes: VecDeque<SortedStream> = VecDeque::with_capacity(readers.len() + 1);
let leading_zeros = self.last_dump_row_count / self.segment_row_count;
tree_nodes.push_back(Box::new(stream::iter( tree_nodes.push_back(Box::new(stream::iter(
mem::take(&mut self.values_buffer) mem::take(&mut self.values_buffer)
.into_iter() .into_iter()
.map(|(value, (bitmap, _))| Ok((value, bitmap))), .map(move |(value, mut bitmap)| {
bitmap.resize(bitmap.len() + leading_zeros, false);
bitmap.shift_right(leading_zeros);
Ok((value, bitmap))
}),
))); )));
for (_, reader) in readers { for (_, reader) in readers {
tree_nodes.push_back(IntermediateReader::new(reader).into_stream().await?); tree_nodes.push_back(IntermediateReader::new(reader).into_stream().await?);
@@ -148,10 +161,11 @@ impl ExternalSorter {
index_name, index_name,
temp_file_provider, temp_file_provider,
segment_null_bitmap: Bitmap::new_bitvec(), // bitvec is more efficient for many null values segment_null_bitmap: BitVec::new(),
values_buffer: BTreeMap::new(), values_buffer: BTreeMap::new(),
total_row_count: 0, total_row_count: 0,
last_dump_row_count: 0,
segment_row_count, segment_row_count,
current_memory_usage: 0, current_memory_usage: 0,
@@ -181,7 +195,7 @@ impl ExternalSorter {
} }
/// Pushes the non-null values to the values buffer and sets the bits within /// Pushes the non-null values to the values buffer and sets the bits within
/// the specified range in the given bitmap to true. /// the specified range in the given BitVec to true.
/// Returns the memory usage difference of the buffer after the operation. /// Returns the memory usage difference of the buffer after the operation.
fn push_not_null( fn push_not_null(
&mut self, &mut self,
@@ -189,23 +203,20 @@ impl ExternalSorter {
segment_index_range: RangeInclusive<usize>, segment_index_range: RangeInclusive<usize>,
) -> usize { ) -> usize {
match self.values_buffer.get_mut(value) { match self.values_buffer.get_mut(value) {
Some((bitmap, mem_usage)) => { Some(bitmap) => {
bitmap.insert_range(segment_index_range); let old_len = bitmap.as_raw_slice().len();
let new_usage = bitmap.memory_usage() + value.len(); set_bits(bitmap, segment_index_range);
let diff = new_usage - *mem_usage;
*mem_usage = new_usage;
diff bitmap.as_raw_slice().len() - old_len
} }
None => { None => {
let mut bitmap = Bitmap::new_roaring(); let mut bitmap = BitVec::default();
bitmap.insert_range(segment_index_range); set_bits(&mut bitmap, segment_index_range);
let mem_usage = bitmap.memory_usage() + value.len(); let mem_diff = bitmap.as_raw_slice().len() + value.len();
self.values_buffer self.values_buffer.insert(value.to_vec(), bitmap);
.insert(value.to_vec(), (bitmap, mem_usage));
mem_usage mem_diff
} }
} }
} }
@@ -246,8 +257,12 @@ impl ExternalSorter {
.fetch_sub(memory_usage, Ordering::Relaxed); .fetch_sub(memory_usage, Ordering::Relaxed);
self.current_memory_usage = 0; self.current_memory_usage = 0;
let bitmap_leading_zeros = self.last_dump_row_count / self.segment_row_count;
self.last_dump_row_count =
self.total_row_count - self.total_row_count % self.segment_row_count; // align to segment
let entries = values.len(); let entries = values.len();
IntermediateWriter::new(writer).write_all(values.into_iter().map(|(k, (b, _))| (k, b))).await.inspect(|_| IntermediateWriter::new(writer).write_all(values, bitmap_leading_zeros as _).await.inspect(|_|
debug!("Dumped {entries} entries ({memory_usage} bytes) to intermediate file {file_id} for index {index_name}") debug!("Dumped {entries} entries ({memory_usage} bytes) to intermediate file {file_id} for index {index_name}")
).inspect_err(|e| ).inspect_err(|e|
error!(e; "Failed to dump {entries} entries to intermediate file {file_id} for index {index_name}") error!(e; "Failed to dump {entries} entries to intermediate file {file_id} for index {index_name}")
@@ -256,8 +271,13 @@ impl ExternalSorter {
/// Determines the segment index range for the row index range /// Determines the segment index range for the row index range
/// `[row_begin, row_begin + n - 1]` /// `[row_begin, row_begin + n - 1]`
fn segment_index_range(&self, n: usize) -> RangeInclusive<usize> { fn segment_index_range(&self, n: usize, is_null: bool) -> RangeInclusive<usize> {
let row_begin = self.total_row_count; let row_begin = if is_null {
self.total_row_count
} else {
self.total_row_count - self.last_dump_row_count
};
let start = self.segment_index(row_begin); let start = self.segment_index(row_begin);
let end = self.segment_index(row_begin + n - 1); let end = self.segment_index(row_begin + n - 1);
start..=end start..=end
@@ -269,6 +289,16 @@ impl ExternalSorter {
} }
} }
/// Sets the bits within the specified range in the given `BitVec` to true
fn set_bits(bitmap: &mut BitVec, index_range: RangeInclusive<usize>) {
if *index_range.end() >= bitmap.len() {
bitmap.resize(index_range.end() + 1, false);
}
for index in index_range {
bitmap.set(index, true);
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::HashMap; use std::collections::HashMap;
@@ -300,7 +330,7 @@ mod tests {
move |index_name, file_id| { move |index_name, file_id| {
assert_eq!(index_name, "test"); assert_eq!(index_name, "test");
let mut files = files.lock().unwrap(); let mut files = files.lock().unwrap();
let (writer, reader) = duplex(1024 * 1024); let (writer, reader) = duplex(8 * 1024);
files.insert(file_id.to_string(), Box::new(reader.compat())); files.insert(file_id.to_string(), Box::new(reader.compat()));
Ok(Box::new(writer.compat_write())) Ok(Box::new(writer.compat_write()))
} }

View File

@@ -19,24 +19,29 @@
//! The serialization format is as follows: //! The serialization format is as follows:
//! //!
//! ```text //! ```text
//! [magic][item][item]...[item] //! [magic][bitmap leading zeros][item][item]...[item]
//! [4] [?] //! [4] [4] [?]
//! //!
//! Each [item] is structured as: //! Each [item] is structured as:
//! [value len][value][bitmap len][bitmap] //! [value len][value][bitmap len][bitmap]
//! [8] [?] [8] [?] //! [8] [?] [8] [?]
//! ``` //! ```
//! //!
//! Each item represents a value and its associated bitmap, serialized with their lengths for //! The format starts with a 4-byte magic identifier, followed by a 4-byte
//! bitmap leading zeros count, indicating how many leading zeros are in the
//! fixed-size region of the bitmap. Following that, each item represents
//! a value and its associated bitmap, serialized with their lengths for
//! easier deserialization. //! easier deserialization.
mod codec_v1; mod codec_v1;
use std::collections::BTreeMap;
use asynchronous_codec::{FramedRead, FramedWrite}; use asynchronous_codec::{FramedRead, FramedWrite};
use common_base::BitVec;
use futures::{stream, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, StreamExt}; use futures::{stream, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, StreamExt};
use snafu::ResultExt; use snafu::ResultExt;
use crate::bitmap::{Bitmap, BitmapType};
use crate::inverted_index::create::sort::SortedStream; use crate::inverted_index::create::sort::SortedStream;
use crate::inverted_index::error::{ use crate::inverted_index::error::{
CloseSnafu, FlushSnafu, ReadSnafu, Result, UnknownIntermediateCodecMagicSnafu, WriteSnafu, CloseSnafu, FlushSnafu, ReadSnafu, Result, UnknownIntermediateCodecMagicSnafu, WriteSnafu,
@@ -57,13 +62,12 @@ impl<W: AsyncWrite + Unpin> IntermediateWriter<W> {
/// Serializes and writes all provided values to the wrapped writer /// Serializes and writes all provided values to the wrapped writer
pub async fn write_all( pub async fn write_all(
mut self, mut self,
values: impl IntoIterator<Item = (Bytes, Bitmap)>, values: BTreeMap<Bytes, BitVec>,
bitmap_leading_zeros: u32,
) -> Result<()> { ) -> Result<()> {
let (codec_magic, encoder) = ( let (codec_magic, encoder) = (
codec_v1::CODEC_V1_MAGIC, codec_v1::CODEC_V1_MAGIC,
codec_v1::IntermediateItemEncoderV1 { codec_v1::IntermediateItemEncoderV1,
bitmap_type: BitmapType::Roaring,
},
); );
self.writer self.writer
@@ -71,6 +75,11 @@ impl<W: AsyncWrite + Unpin> IntermediateWriter<W> {
.await .await
.context(WriteSnafu)?; .context(WriteSnafu)?;
self.writer
.write_all(&bitmap_leading_zeros.to_be_bytes())
.await
.context(WriteSnafu)?;
let value_stream = stream::iter(values.into_iter().map(Ok)); let value_stream = stream::iter(values.into_iter().map(Ok));
let frame_write = FramedWrite::new(&mut self.writer, encoder); let frame_write = FramedWrite::new(&mut self.writer, encoder);
// `forward()` will flush and close the writer when the stream ends // `forward()` will flush and close the writer when the stream ends
@@ -103,9 +112,17 @@ impl<R: AsyncRead + Unpin + Send + 'static> IntermediateReader<R> {
.context(ReadSnafu)?; .context(ReadSnafu)?;
let decoder = match &magic { let decoder = match &magic {
codec_v1::CODEC_V1_MAGIC => codec_v1::IntermediateItemDecoderV1 { codec_v1::CODEC_V1_MAGIC => {
bitmap_type: BitmapType::Roaring, let bitmap_leading_zeros = {
}, let mut buf = [0u8; 4];
self.reader.read_exact(&mut buf).await.context(ReadSnafu)?;
u32::from_be_bytes(buf)
};
codec_v1::IntermediateItemDecoderV1 {
bitmap_leading_zeros,
}
}
_ => return UnknownIntermediateCodecMagicSnafu { magic }.fail(), _ => return UnknownIntermediateCodecMagicSnafu { magic }.fail(),
}; };
@@ -115,7 +132,6 @@ impl<R: AsyncRead + Unpin + Send + 'static> IntermediateReader<R> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::BTreeMap;
use std::io::{Seek, SeekFrom}; use std::io::{Seek, SeekFrom};
use futures::io::{AllowStdIo, Cursor}; use futures::io::{AllowStdIo, Cursor};
@@ -124,10 +140,6 @@ mod tests {
use super::*; use super::*;
use crate::inverted_index::error::Error; use crate::inverted_index::error::Error;
fn bitmap(bytes: &[u8]) -> Bitmap {
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
}
#[tokio::test] #[tokio::test]
async fn test_intermediate_read_write_basic() { async fn test_intermediate_read_write_basic() {
let file_r = tempfile().unwrap(); let file_r = tempfile().unwrap();
@@ -136,12 +148,12 @@ mod tests {
let buf_w = AllowStdIo::new(file_w); let buf_w = AllowStdIo::new(file_w);
let values = BTreeMap::from_iter([ let values = BTreeMap::from_iter([
(Bytes::from("a"), bitmap(&[0b10101010])), (Bytes::from("a"), BitVec::from_slice(&[0b10101010])),
(Bytes::from("b"), bitmap(&[0b01010101])), (Bytes::from("b"), BitVec::from_slice(&[0b01010101])),
]); ]);
let writer = IntermediateWriter::new(buf_w); let writer = IntermediateWriter::new(buf_w);
writer.write_all(values.clone()).await.unwrap(); writer.write_all(values.clone(), 0).await.unwrap();
// reset the handle // reset the handle
buf_r.seek(SeekFrom::Start(0)).unwrap(); buf_r.seek(SeekFrom::Start(0)).unwrap();
@@ -149,9 +161,48 @@ mod tests {
let mut stream = reader.into_stream().await.unwrap(); let mut stream = reader.into_stream().await.unwrap();
let a = stream.next().await.unwrap().unwrap(); let a = stream.next().await.unwrap().unwrap();
assert_eq!(a, (Bytes::from("a"), bitmap(&[0b10101010]))); assert_eq!(a, (Bytes::from("a"), BitVec::from_slice(&[0b10101010])));
let b = stream.next().await.unwrap().unwrap(); let b = stream.next().await.unwrap().unwrap();
assert_eq!(b, (Bytes::from("b"), bitmap(&[0b01010101]))); assert_eq!(b, (Bytes::from("b"), BitVec::from_slice(&[0b01010101])));
assert!(stream.next().await.is_none());
}
#[tokio::test]
async fn test_intermediate_read_write_with_prefix_zeros() {
let file_r = tempfile().unwrap();
let file_w = file_r.try_clone().unwrap();
let mut buf_r = AllowStdIo::new(file_r);
let buf_w = AllowStdIo::new(file_w);
let values = BTreeMap::from_iter([
(Bytes::from("a"), BitVec::from_slice(&[0b10101010])),
(Bytes::from("b"), BitVec::from_slice(&[0b01010101])),
]);
let writer = IntermediateWriter::new(buf_w);
writer.write_all(values.clone(), 8).await.unwrap();
// reset the handle
buf_r.seek(SeekFrom::Start(0)).unwrap();
let reader = IntermediateReader::new(buf_r);
let mut stream = reader.into_stream().await.unwrap();
let a = stream.next().await.unwrap().unwrap();
assert_eq!(
a,
(
Bytes::from("a"),
BitVec::from_slice(&[0b00000000, 0b10101010])
)
);
let b = stream.next().await.unwrap().unwrap();
assert_eq!(
b,
(
Bytes::from("b"),
BitVec::from_slice(&[0b00000000, 0b01010101])
)
);
assert!(stream.next().await.is_none()); assert!(stream.next().await.is_none());
} }
@@ -162,7 +213,7 @@ mod tests {
let values = BTreeMap::new(); let values = BTreeMap::new();
let writer = IntermediateWriter::new(&mut buf); let writer = IntermediateWriter::new(&mut buf);
writer.write_all(values.clone()).await.unwrap(); writer.write_all(values.clone(), 0).await.unwrap();
let reader = IntermediateReader::new(Cursor::new(buf)); let reader = IntermediateReader::new(Cursor::new(buf));
let mut stream = reader.into_stream().await.unwrap(); let mut stream = reader.into_stream().await.unwrap();

View File

@@ -16,10 +16,9 @@ use std::io;
use asynchronous_codec::{BytesMut, Decoder, Encoder}; use asynchronous_codec::{BytesMut, Decoder, Encoder};
use bytes::{Buf, BufMut}; use bytes::{Buf, BufMut};
use greptime_proto::v1::index::BitmapType; use common_base::BitVec;
use snafu::ResultExt; use snafu::ResultExt;
use crate::bitmap::Bitmap;
use crate::inverted_index::error::{CommonIoSnafu, Error, Result}; use crate::inverted_index::error::{CommonIoSnafu, Error, Result};
use crate::Bytes; use crate::Bytes;
@@ -29,42 +28,37 @@ const U64_LENGTH: usize = std::mem::size_of::<u64>();
pub const CODEC_V1_MAGIC: &[u8; 4] = b"im01"; pub const CODEC_V1_MAGIC: &[u8; 4] = b"im01";
/// Serializes items of external sorting intermediate files. /// Serializes items of external sorting intermediate files.
pub struct IntermediateItemEncoderV1 { pub struct IntermediateItemEncoderV1;
pub bitmap_type: BitmapType,
}
/// [`FramedWrite`] requires the [`Encoder`] trait to be implemented. /// [`FramedWrite`] requires the [`Encoder`] trait to be implemented.
impl Encoder for IntermediateItemEncoderV1 { impl Encoder for IntermediateItemEncoderV1 {
type Item<'a> = (Bytes, Bitmap); type Item<'a> = (Bytes, BitVec);
type Error = Error; type Error = Error;
fn encode(&mut self, item: (Bytes, Bitmap), dst: &mut BytesMut) -> Result<()> { fn encode(&mut self, item: (Bytes, BitVec), dst: &mut BytesMut) -> Result<()> {
let value_bytes = item.0; let value_bytes = item.0;
let bitmap_size = item.1.serialized_size(self.bitmap_type); let bitmap_bytes = item.1.into_vec();
dst.reserve(U64_LENGTH * 2 + value_bytes.len() + bitmap_size); dst.reserve(U64_LENGTH * 2 + value_bytes.len() + bitmap_bytes.len());
dst.put_u64_le(value_bytes.len() as u64); dst.put_u64_le(value_bytes.len() as u64);
dst.extend_from_slice(&value_bytes); dst.extend_from_slice(&value_bytes);
dst.put_u64_le(bitmap_size as u64); dst.put_u64_le(bitmap_bytes.len() as u64);
item.1 dst.extend_from_slice(&bitmap_bytes);
.serialize_into(self.bitmap_type, &mut dst.writer())
.context(CommonIoSnafu)?;
Ok(()) Ok(())
} }
} }
/// Deserializes items of external sorting intermediate files. /// Deserializes items of external sorting intermediate files.
pub struct IntermediateItemDecoderV1 { pub struct IntermediateItemDecoderV1 {
pub bitmap_type: BitmapType, pub(crate) bitmap_leading_zeros: u32,
} }
/// [`FramedRead`] requires the [`Decoder`] trait to be implemented. /// [`FramedRead`] requires the [`Decoder`] trait to be implemented.
impl Decoder for IntermediateItemDecoderV1 { impl Decoder for IntermediateItemDecoderV1 {
type Item = (Bytes, Bitmap); type Item = (Bytes, BitVec);
type Error = Error; type Error = Error;
/// Decodes the `src` into `(Bytes, RoaringBitmap)`. Returns `None` if /// Decodes the `src` into `(Bytes, BitVec)`. Returns `None` if
/// the `src` does not contain enough data for a complete item. /// the `src` does not contain enough data for a complete item.
/// ///
/// Only after successful decoding, the `src` is advanced. Otherwise, /// Only after successful decoding, the `src` is advanced. Otherwise,
@@ -98,8 +92,8 @@ impl Decoder for IntermediateItemDecoderV1 {
return Ok(None); return Ok(None);
} }
let bitmap = Bitmap::deserialize_from(&buf[..bitmap_len], self.bitmap_type) let mut bitmap = BitVec::repeat(false, self.bitmap_leading_zeros as _);
.context(CommonIoSnafu)?; bitmap.extend_from_raw_slice(&buf[..bitmap_len]);
let item = (value_bytes.to_vec(), bitmap); let item = (value_bytes.to_vec(), bitmap);
@@ -119,29 +113,25 @@ impl From<io::Error> for Error {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use common_base::bit_vec::prelude::{bitvec, Lsb0};
fn bitmap(bytes: &[u8]) -> Bitmap { use super::*;
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
}
#[test] #[test]
fn test_intermediate_codec_basic() { fn test_intermediate_codec_basic() {
let mut encoder = IntermediateItemEncoderV1 { let mut encoder = IntermediateItemEncoderV1;
bitmap_type: BitmapType::Roaring,
};
let mut buf = BytesMut::new(); let mut buf = BytesMut::new();
let item = (b"hello".to_vec(), bitmap(&[0b10101010])); let item = (b"hello".to_vec(), BitVec::from_slice(&[0b10101010]));
encoder.encode(item.clone(), &mut buf).unwrap(); encoder.encode(item.clone(), &mut buf).unwrap();
let mut decoder = IntermediateItemDecoderV1 { let mut decoder = IntermediateItemDecoderV1 {
bitmap_type: BitmapType::Roaring, bitmap_leading_zeros: 0,
}; };
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item); assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
assert_eq!(decoder.decode(&mut buf).unwrap(), None); assert_eq!(decoder.decode(&mut buf).unwrap(), None);
let item1 = (b"world".to_vec(), bitmap(&[0b01010101])); let item1 = (b"world".to_vec(), BitVec::from_slice(&[0b01010101]));
encoder.encode(item.clone(), &mut buf).unwrap(); encoder.encode(item.clone(), &mut buf).unwrap();
encoder.encode(item1.clone(), &mut buf).unwrap(); encoder.encode(item1.clone(), &mut buf).unwrap();
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item); assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
@@ -152,16 +142,14 @@ mod tests {
#[test] #[test]
fn test_intermediate_codec_empty_item() { fn test_intermediate_codec_empty_item() {
let mut encoder = IntermediateItemEncoderV1 { let mut encoder = IntermediateItemEncoderV1;
bitmap_type: BitmapType::Roaring,
};
let mut buf = BytesMut::new(); let mut buf = BytesMut::new();
let item = (b"".to_vec(), bitmap(&[])); let item = (b"".to_vec(), BitVec::from_slice(&[]));
encoder.encode(item.clone(), &mut buf).unwrap(); encoder.encode(item.clone(), &mut buf).unwrap();
let mut decoder = IntermediateItemDecoderV1 { let mut decoder = IntermediateItemDecoderV1 {
bitmap_type: BitmapType::Roaring, bitmap_leading_zeros: 0,
}; };
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item); assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
assert_eq!(decoder.decode(&mut buf).unwrap(), None); assert_eq!(decoder.decode(&mut buf).unwrap(), None);
@@ -170,19 +158,17 @@ mod tests {
#[test] #[test]
fn test_intermediate_codec_partial() { fn test_intermediate_codec_partial() {
let mut encoder = IntermediateItemEncoderV1 { let mut encoder = IntermediateItemEncoderV1;
bitmap_type: BitmapType::Roaring,
};
let mut buf = BytesMut::new(); let mut buf = BytesMut::new();
let item = (b"hello".to_vec(), bitmap(&[0b10101010])); let item = (b"hello".to_vec(), BitVec::from_slice(&[0b10101010]));
encoder.encode(item.clone(), &mut buf).unwrap(); encoder.encode(item.clone(), &mut buf).unwrap();
let partial_length = U64_LENGTH + 3; let partial_length = U64_LENGTH + 3;
let mut partial_bytes = buf.split_to(partial_length); let mut partial_bytes = buf.split_to(partial_length);
let mut decoder = IntermediateItemDecoderV1 { let mut decoder = IntermediateItemDecoderV1 {
bitmap_type: BitmapType::Roaring, bitmap_leading_zeros: 0,
}; };
assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None); // not enough data assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None); // not enough data
partial_bytes.extend_from_slice(&buf[..]); partial_bytes.extend_from_slice(&buf[..]);
@@ -190,4 +176,25 @@ mod tests {
assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None); assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None);
assert!(partial_bytes.is_empty()); assert!(partial_bytes.is_empty());
} }
#[test]
fn test_intermediate_codec_prefix_zeros() {
let mut encoder = IntermediateItemEncoderV1;
let mut buf = BytesMut::new();
let item = (b"hello".to_vec(), bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]);
encoder.encode(item.clone(), &mut buf).unwrap();
let mut decoder = IntermediateItemDecoderV1 {
bitmap_leading_zeros: 3,
};
let decoded_item = decoder.decode(&mut buf).unwrap().unwrap();
assert_eq!(decoded_item.0, b"hello");
assert_eq!(
decoded_item.1,
bitvec![u8, Lsb0; 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0]
);
assert_eq!(decoder.decode(&mut buf).unwrap(), None);
assert!(buf.is_empty());
}
} }

View File

@@ -16,10 +16,10 @@ use std::cmp::Ordering;
use std::pin::Pin; use std::pin::Pin;
use std::task::{Context, Poll}; use std::task::{Context, Poll};
use common_base::BitVec;
use futures::{ready, Stream, StreamExt}; use futures::{ready, Stream, StreamExt};
use pin_project::pin_project; use pin_project::pin_project;
use crate::bitmap::Bitmap;
use crate::inverted_index::create::sort::SortedStream; use crate::inverted_index::create::sort::SortedStream;
use crate::inverted_index::error::Result; use crate::inverted_index::error::Result;
use crate::Bytes; use crate::Bytes;
@@ -28,10 +28,10 @@ use crate::Bytes;
#[pin_project] #[pin_project]
pub struct MergeSortedStream { pub struct MergeSortedStream {
stream1: Option<SortedStream>, stream1: Option<SortedStream>,
peek1: Option<(Bytes, Bitmap)>, peek1: Option<(Bytes, BitVec)>,
stream2: Option<SortedStream>, stream2: Option<SortedStream>,
peek2: Option<(Bytes, Bitmap)>, peek2: Option<(Bytes, BitVec)>,
} }
impl MergeSortedStream { impl MergeSortedStream {
@@ -49,7 +49,7 @@ impl MergeSortedStream {
} }
impl Stream for MergeSortedStream { impl Stream for MergeSortedStream {
type Item = Result<(Bytes, Bitmap)>; type Item = Result<(Bytes, BitVec)>;
/// Polls both streams and returns the next item from the stream that has the smaller next item. /// Polls both streams and returns the next item from the stream that has the smaller next item.
/// If both streams have the same next item, the bitmaps are unioned together. /// If both streams have the same next item, the bitmaps are unioned together.
@@ -89,77 +89,77 @@ impl Stream for MergeSortedStream {
} }
/// Merges two bitmaps by bit-wise OR'ing them together, preserving all bits from both /// Merges two bitmaps by bit-wise OR'ing them together, preserving all bits from both
fn merge_bitmaps(mut bitmap1: Bitmap, bitmap2: Bitmap) -> Bitmap { fn merge_bitmaps(bitmap1: BitVec, bitmap2: BitVec) -> BitVec {
bitmap1.union(bitmap2); // make sure longer bitmap is on the left to avoid truncation
bitmap1 #[allow(clippy::if_same_then_else)]
if bitmap1.len() > bitmap2.len() {
bitmap1 | bitmap2
} else {
bitmap2 | bitmap1
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use futures::stream; use futures::stream;
use greptime_proto::v1::index::BitmapType;
use super::*; use super::*;
use crate::inverted_index::error::Error; use crate::inverted_index::error::Error;
fn bitmap(bytes: &[u8]) -> Bitmap { fn sorted_stream_from_vec(vec: Vec<(Bytes, BitVec)>) -> SortedStream {
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
}
fn sorted_stream_from_vec(vec: Vec<(Bytes, Bitmap)>) -> SortedStream {
Box::new(stream::iter(vec.into_iter().map(Ok::<_, Error>))) Box::new(stream::iter(vec.into_iter().map(Ok::<_, Error>)))
} }
#[tokio::test] #[tokio::test]
async fn test_merge_sorted_stream_non_overlapping() { async fn test_merge_sorted_stream_non_overlapping() {
let stream1 = sorted_stream_from_vec(vec![ let stream1 = sorted_stream_from_vec(vec![
(Bytes::from("apple"), bitmap(&[0b10101010])), (Bytes::from("apple"), BitVec::from_slice(&[0b10101010])),
(Bytes::from("orange"), bitmap(&[0b01010101])), (Bytes::from("orange"), BitVec::from_slice(&[0b01010101])),
]); ]);
let stream2 = sorted_stream_from_vec(vec![ let stream2 = sorted_stream_from_vec(vec![
(Bytes::from("banana"), bitmap(&[0b10101010])), (Bytes::from("banana"), BitVec::from_slice(&[0b10101010])),
(Bytes::from("peach"), bitmap(&[0b01010101])), (Bytes::from("peach"), BitVec::from_slice(&[0b01010101])),
]); ]);
let mut merged_stream = MergeSortedStream::merge(stream1, stream2); let mut merged_stream = MergeSortedStream::merge(stream1, stream2);
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("apple")); assert_eq!(item.0, Bytes::from("apple"));
assert_eq!(item.1, bitmap(&[0b10101010])); assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("banana")); assert_eq!(item.0, Bytes::from("banana"));
assert_eq!(item.1, bitmap(&[0b10101010])); assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("orange")); assert_eq!(item.0, Bytes::from("orange"));
assert_eq!(item.1, bitmap(&[0b01010101])); assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("peach")); assert_eq!(item.0, Bytes::from("peach"));
assert_eq!(item.1, bitmap(&[0b01010101])); assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
assert!(merged_stream.next().await.is_none()); assert!(merged_stream.next().await.is_none());
} }
#[tokio::test] #[tokio::test]
async fn test_merge_sorted_stream_overlapping() { async fn test_merge_sorted_stream_overlapping() {
let stream1 = sorted_stream_from_vec(vec![ let stream1 = sorted_stream_from_vec(vec![
(Bytes::from("apple"), bitmap(&[0b10101010])), (Bytes::from("apple"), BitVec::from_slice(&[0b10101010])),
(Bytes::from("orange"), bitmap(&[0b10101010])), (Bytes::from("orange"), BitVec::from_slice(&[0b10101010])),
]); ]);
let stream2 = sorted_stream_from_vec(vec![ let stream2 = sorted_stream_from_vec(vec![
(Bytes::from("apple"), bitmap(&[0b01010101])), (Bytes::from("apple"), BitVec::from_slice(&[0b01010101])),
(Bytes::from("peach"), bitmap(&[0b01010101])), (Bytes::from("peach"), BitVec::from_slice(&[0b01010101])),
]); ]);
let mut merged_stream = MergeSortedStream::merge(stream1, stream2); let mut merged_stream = MergeSortedStream::merge(stream1, stream2);
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("apple")); assert_eq!(item.0, Bytes::from("apple"));
assert_eq!(item.1, bitmap(&[0b11111111])); assert_eq!(item.1, BitVec::from_slice(&[0b11111111]));
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("orange")); assert_eq!(item.0, Bytes::from("orange"));
assert_eq!(item.1, bitmap(&[0b10101010])); assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
let item = merged_stream.next().await.unwrap().unwrap(); let item = merged_stream.next().await.unwrap().unwrap();
assert_eq!(item.0, Bytes::from("peach")); assert_eq!(item.0, Bytes::from("peach"));
assert_eq!(item.1, bitmap(&[0b01010101])); assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
assert!(merged_stream.next().await.is_none()); assert!(merged_stream.next().await.is_none());
} }

View File

@@ -18,7 +18,6 @@ use std::num::NonZeroUsize;
use async_trait::async_trait; use async_trait::async_trait;
use snafu::ensure; use snafu::ensure;
use crate::bitmap::BitmapType;
use crate::inverted_index::create::sort::{SortOutput, Sorter}; use crate::inverted_index::create::sort::{SortOutput, Sorter};
use crate::inverted_index::create::InvertedIndexCreator; use crate::inverted_index::create::InvertedIndexCreator;
use crate::inverted_index::error::{InconsistentRowCountSnafu, Result}; use crate::inverted_index::error::{InconsistentRowCountSnafu, Result};
@@ -69,11 +68,7 @@ impl InvertedIndexCreator for SortIndexCreator {
} }
/// Finalizes the sorting for all indexes and writes them using the inverted index writer /// Finalizes the sorting for all indexes and writes them using the inverted index writer
async fn finish( async fn finish(&mut self, writer: &mut dyn InvertedIndexWriter) -> Result<()> {
&mut self,
writer: &mut dyn InvertedIndexWriter,
bitmap_type: BitmapType,
) -> Result<()> {
let mut output_row_count = None; let mut output_row_count = None;
for (index_name, mut sorter) in self.sorters.drain() { for (index_name, mut sorter) in self.sorters.drain() {
let SortOutput { let SortOutput {
@@ -93,7 +88,7 @@ impl InvertedIndexCreator for SortIndexCreator {
); );
writer writer
.add_index(index_name, segment_null_bitmap, sorted_stream, bitmap_type) .add_index(index_name, segment_null_bitmap, sorted_stream)
.await?; .await?;
} }
@@ -122,9 +117,9 @@ mod tests {
use futures::{stream, StreamExt}; use futures::{stream, StreamExt};
use super::*; use super::*;
use crate::bitmap::Bitmap; use crate::inverted_index::create::sort::SortedStream;
use crate::inverted_index::error::Error; use crate::inverted_index::error::Error;
use crate::inverted_index::format::writer::{MockInvertedIndexWriter, ValueStream}; use crate::inverted_index::format::writer::MockInvertedIndexWriter;
use crate::Bytes; use crate::Bytes;
#[tokio::test] #[tokio::test]
@@ -148,10 +143,11 @@ mod tests {
} }
let mut mock_writer = MockInvertedIndexWriter::new(); let mut mock_writer = MockInvertedIndexWriter::new();
mock_writer.expect_add_index().times(3).returning( mock_writer
|name, null_bitmap, stream, bitmap_type| { .expect_add_index()
.times(3)
.returning(|name, null_bitmap, stream| {
assert!(null_bitmap.is_empty()); assert!(null_bitmap.is_empty());
assert_eq!(bitmap_type, BitmapType::Roaring);
match name.as_str() { match name.as_str() {
"a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]), "a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]),
"b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]), "b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]),
@@ -159,8 +155,7 @@ mod tests {
_ => panic!("unexpected index name: {}", name), _ => panic!("unexpected index name: {}", name),
} }
Ok(()) Ok(())
}, });
);
mock_writer mock_writer
.expect_finish() .expect_finish()
.times(1) .times(1)
@@ -170,10 +165,7 @@ mod tests {
Ok(()) Ok(())
}); });
creator creator.finish(&mut mock_writer).await.unwrap();
.finish(&mut mock_writer, BitmapType::Roaring)
.await
.unwrap();
} }
#[tokio::test] #[tokio::test]
@@ -199,9 +191,8 @@ mod tests {
let mut mock_writer = MockInvertedIndexWriter::new(); let mut mock_writer = MockInvertedIndexWriter::new();
mock_writer mock_writer
.expect_add_index() .expect_add_index()
.returning(|name, null_bitmap, stream, bitmap_type| { .returning(|name, null_bitmap, stream| {
assert!(null_bitmap.is_empty()); assert!(null_bitmap.is_empty());
assert_eq!(bitmap_type, BitmapType::Roaring);
match name.as_str() { match name.as_str() {
"a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]), "a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]),
"b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]), "b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]),
@@ -212,7 +203,7 @@ mod tests {
}); });
mock_writer.expect_finish().never(); mock_writer.expect_finish().never();
let res = creator.finish(&mut mock_writer, BitmapType::Roaring).await; let res = creator.finish(&mut mock_writer).await;
assert!(matches!(res, Err(Error::InconsistentRowCount { .. }))); assert!(matches!(res, Err(Error::InconsistentRowCount { .. })));
} }
@@ -228,9 +219,8 @@ mod tests {
let mut mock_writer = MockInvertedIndexWriter::new(); let mut mock_writer = MockInvertedIndexWriter::new();
mock_writer mock_writer
.expect_add_index() .expect_add_index()
.returning(|name, null_bitmap, stream, bitmap_type| { .returning(|name, null_bitmap, stream| {
assert!(null_bitmap.is_empty()); assert!(null_bitmap.is_empty());
assert_eq!(bitmap_type, BitmapType::Roaring);
assert!(matches!(name.as_str(), "a" | "b" | "c")); assert!(matches!(name.as_str(), "a" | "b" | "c"));
assert!(stream_to_values(stream).is_empty()); assert!(stream_to_values(stream).is_empty());
Ok(()) Ok(())
@@ -244,10 +234,7 @@ mod tests {
Ok(()) Ok(())
}); });
creator creator.finish(&mut mock_writer).await.unwrap();
.finish(&mut mock_writer, BitmapType::Roaring)
.await
.unwrap();
} }
fn set_bit(bit_vec: &mut BitVec, index: usize) { fn set_bit(bit_vec: &mut BitVec, index: usize) {
@@ -296,21 +283,20 @@ mod tests {
async fn output(&mut self) -> Result<SortOutput> { async fn output(&mut self) -> Result<SortOutput> {
let segment_null_bitmap = self.values.remove(&None).unwrap_or_default(); let segment_null_bitmap = self.values.remove(&None).unwrap_or_default();
let segment_null_bitmap = Bitmap::BitVec(segment_null_bitmap);
Ok(SortOutput { Ok(SortOutput {
segment_null_bitmap, segment_null_bitmap,
sorted_stream: Box::new(stream::iter( sorted_stream: Box::new(stream::iter(
std::mem::take(&mut self.values) std::mem::take(&mut self.values)
.into_iter() .into_iter()
.map(|(v, b)| Ok((v.unwrap(), Bitmap::BitVec(b)))), .map(|(v, b)| Ok((v.unwrap(), b))),
)), )),
total_row_count: self.total_row_count, total_row_count: self.total_row_count,
}) })
} }
} }
fn stream_to_values(stream: ValueStream) -> Vec<Bytes> { fn stream_to_values(stream: SortedStream) -> Vec<Bytes> {
futures::executor::block_on(async { futures::executor::block_on(async {
stream.map(|r| r.unwrap().0).collect::<Vec<Bytes>>().await stream.map(|r| r.unwrap().0).collect::<Vec<Bytes>>().await
}) })

View File

@@ -110,14 +110,6 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Failed to decode bitmap"))]
DecodeBitmap {
#[snafu(source)]
error: IoError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to decode protobuf"))] #[snafu(display("Failed to decode protobuf"))]
DecodeProto { DecodeProto {
#[snafu(source)] #[snafu(source)]
@@ -248,7 +240,6 @@ impl ErrorExt for Error {
| CommonIo { .. } | CommonIo { .. }
| UnknownIntermediateCodecMagic { .. } | UnknownIntermediateCodecMagic { .. }
| FstCompile { .. } | FstCompile { .. }
| DecodeBitmap { .. }
| InvalidFooterPayloadSize { .. } | InvalidFooterPayloadSize { .. }
| BlobSizeTooSmall { .. } => StatusCode::Unexpected, | BlobSizeTooSmall { .. } => StatusCode::Unexpected,

View File

@@ -18,11 +18,11 @@ use std::sync::Arc;
use async_trait::async_trait; use async_trait::async_trait;
use bytes::Bytes; use bytes::Bytes;
use common_base::BitVec;
use greptime_proto::v1::index::InvertedIndexMetas; use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::ResultExt; use snafu::ResultExt;
use crate::bitmap::{Bitmap, BitmapType}; use crate::inverted_index::error::{DecodeFstSnafu, Result};
use crate::inverted_index::error::{DecodeBitmapSnafu, DecodeFstSnafu, Result};
pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader; pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
use crate::inverted_index::FstMap; use crate::inverted_index::FstMap;
@@ -67,25 +67,17 @@ pub trait InvertedIndexReader: Send + Sync {
} }
/// Retrieves the bitmap from the given offset and size. /// Retrieves the bitmap from the given offset and size.
async fn bitmap(&self, offset: u64, size: u32, bitmap_type: BitmapType) -> Result<Bitmap> { async fn bitmap(&self, offset: u64, size: u32) -> Result<BitVec> {
self.range_read(offset, size).await.and_then(|bytes| { self.range_read(offset, size).await.map(BitVec::from_vec)
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
})
} }
/// Retrieves the multiple bitmaps from the given ranges. /// Retrieves the multiple bitmaps from the given ranges.
async fn bitmap_deque( async fn bitmap_deque(&mut self, ranges: &[Range<u64>]) -> Result<VecDeque<BitVec>> {
&mut self, Ok(self
ranges: &[(Range<u64>, BitmapType)], .read_vec(ranges)
) -> Result<VecDeque<Bitmap>> { .await?
let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
let bytes = self.read_vec(&ranges).await?;
bytes
.into_iter() .into_iter()
.zip(types) .map(|bytes| BitVec::from_slice(bytes.as_ref()))
.map(|(bytes, bitmap_type)| { .collect::<VecDeque<_>>())
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
})
.collect::<Result<VecDeque<_>>>()
} }
} }

Some files were not shown because too many files have changed in this diff Show More