mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
Compare commits
31 Commits
v0.13.0-ni
...
async_deco
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6e7fb5e08 | ||
|
|
a5df3954f3 | ||
|
|
32fd850c20 | ||
|
|
2bfdae4f8f | ||
|
|
fcb898e9a4 | ||
|
|
8fa2fdfc42 | ||
|
|
4dc1a1d60f | ||
|
|
e375a18011 | ||
|
|
e0ff701e51 | ||
|
|
25645a3303 | ||
|
|
b32ea7d84c | ||
|
|
f164f6eaf3 | ||
|
|
af1920defc | ||
|
|
7c97fae522 | ||
|
|
b8070adc3a | ||
|
|
11bfb17328 | ||
|
|
1d87bd2d43 | ||
|
|
ababeaf538 | ||
|
|
2cbf51d0be | ||
|
|
3059b04b19 | ||
|
|
352b197be4 | ||
|
|
d0254f9705 | ||
|
|
8a86903c73 | ||
|
|
0bd322a078 | ||
|
|
3811e3f632 | ||
|
|
c14aa176b5 | ||
|
|
a922dcd9df | ||
|
|
530ff53422 | ||
|
|
73ca39f37e | ||
|
|
0acc6b0354 | ||
|
|
face361fcb |
@@ -52,7 +52,7 @@ runs:
|
||||
uses: ./.github/actions/build-greptime-binary
|
||||
with:
|
||||
base-image: ubuntu
|
||||
features: servers/dashboard,pg_kvbackend
|
||||
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
|
||||
cargo-profile: ${{ inputs.cargo-profile }}
|
||||
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
|
||||
version: ${{ inputs.version }}
|
||||
@@ -70,7 +70,7 @@ runs:
|
||||
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
|
||||
with:
|
||||
base-image: centos
|
||||
features: servers/dashboard,pg_kvbackend
|
||||
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
|
||||
cargo-profile: ${{ inputs.cargo-profile }}
|
||||
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
|
||||
version: ${{ inputs.version }}
|
||||
|
||||
7
.github/workflows/dev-build.yml
vendored
7
.github/workflows/dev-build.yml
vendored
@@ -238,6 +238,13 @@ jobs:
|
||||
version: ${{ needs.allocate-runners.outputs.version }}
|
||||
push-latest-tag: false # Don't push the latest tag to registry.
|
||||
dev-mode: true # Only build the standard images.
|
||||
|
||||
- name: Echo Docker image tag to step summary
|
||||
run: |
|
||||
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Set build result
|
||||
id: set-build-result
|
||||
|
||||
10
.github/workflows/develop.yml
vendored
10
.github/workflows/develop.yml
vendored
@@ -111,7 +111,7 @@ jobs:
|
||||
- name: Build greptime binaries
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend"
|
||||
- name: Pack greptime binaries
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -270,7 +270,7 @@ jobs:
|
||||
- name: Build greptime bianry
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
|
||||
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend"
|
||||
- name: Pack greptime binary
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -687,7 +687,7 @@ jobs:
|
||||
working-directory: tests-integration/fixtures
|
||||
run: docker compose up -d --wait
|
||||
- name: Run nextest cases
|
||||
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
|
||||
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
|
||||
RUST_BACKTRACE: 1
|
||||
@@ -704,6 +704,7 @@ jobs:
|
||||
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
|
||||
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
@@ -739,7 +740,7 @@ jobs:
|
||||
working-directory: tests-integration/fixtures
|
||||
run: docker compose up -d --wait
|
||||
- name: Run nextest cases
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
|
||||
RUST_BACKTRACE: 1
|
||||
@@ -755,6 +756,7 @@ jobs:
|
||||
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
|
||||
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
|
||||
52
.github/workflows/grafana.yml
vendored
Normal file
52
.github/workflows/grafana.yml
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
name: Check Grafana Panels
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'grafana/**' # Trigger only when files under the grafana/ directory change
|
||||
|
||||
jobs:
|
||||
check-panels:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Check out the repository
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# Install jq (required for the script)
|
||||
- name: Install jq
|
||||
run: sudo apt-get install -y jq
|
||||
|
||||
# Make the check.sh script executable
|
||||
- name: Make check.sh executable
|
||||
run: chmod +x grafana/check.sh
|
||||
|
||||
# Run the check.sh script
|
||||
- name: Run check.sh
|
||||
run: ./grafana/check.sh
|
||||
|
||||
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
|
||||
- name: Check if this is a pull request
|
||||
id: check-pr
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "is_pull_request=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_pull_request=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
# Make the summary.sh script executable
|
||||
- name: Make summary.sh executable
|
||||
if: steps.check-pr.outputs.is_pull_request == 'true'
|
||||
run: chmod +x grafana/summary.sh
|
||||
|
||||
# Run the summary.sh script and add its output to the GitHub Job Summary
|
||||
- name: Run summary.sh and add to Job Summary
|
||||
if: steps.check-pr.outputs.is_pull_request == 'true'
|
||||
run: |
|
||||
SUMMARY=$(./grafana/summary.sh)
|
||||
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
|
||||
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
|
||||
74
Cargo.lock
generated
74
Cargo.lock
generated
@@ -1693,6 +1693,7 @@ dependencies = [
|
||||
"humantime",
|
||||
"meta-client",
|
||||
"nu-ansi-term",
|
||||
"opendal",
|
||||
"query",
|
||||
"rand",
|
||||
"reqwest",
|
||||
@@ -2015,6 +2016,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"bincode",
|
||||
"chrono",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
@@ -2196,6 +2198,7 @@ dependencies = [
|
||||
"serde_with",
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
@@ -4116,11 +4119,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.34"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0"
|
||||
checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"libz-rs-sys",
|
||||
"libz-sys",
|
||||
"miniz_oxide",
|
||||
]
|
||||
@@ -4167,6 +4171,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"cache",
|
||||
"catalog",
|
||||
"chrono",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
@@ -4701,7 +4706,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c5419bbd20cb42e568ec325a4d71a3c94cc327e1#c5419bbd20cb42e568ec325a4d71a3c94cc327e1"
|
||||
dependencies = [
|
||||
"prost 0.13.3",
|
||||
"serde",
|
||||
@@ -5566,6 +5571,7 @@ dependencies = [
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-automata 0.4.8",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
@@ -5897,15 +5903,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.3"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
|
||||
checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"thiserror 1.0.64",
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6273,6 +6279,15 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-rs-sys"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"
|
||||
dependencies = [
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.20"
|
||||
@@ -6716,6 +6731,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
@@ -6816,9 +6832,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.0"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
|
||||
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
]
|
||||
@@ -8270,7 +8286,7 @@ dependencies = [
|
||||
"rand",
|
||||
"ring",
|
||||
"rust_decimal",
|
||||
"thiserror 2.0.6",
|
||||
"thiserror 2.0.12",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
@@ -8382,7 +8398,7 @@ dependencies = [
|
||||
"greptime-proto",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"jsonpath-rust 0.7.3",
|
||||
"jsonpath-rust 0.7.5",
|
||||
"lazy_static",
|
||||
"moka",
|
||||
"once_cell",
|
||||
@@ -8760,6 +8776,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"futures",
|
||||
@@ -8773,8 +8790,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql-parser"
|
||||
version = "0.4.3"
|
||||
source = "git+https://github.com/GreptimeTeam/promql-parser.git?rev=27abb8e16003a50c720f00d6c85f41f5fa2a2a8e#27abb8e16003a50c720f00d6c85f41f5fa2a2a8e"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c6b1429bdd199d53bd58b745075c1652efedbe2746e5d4f0d56d3184dda48ec"
|
||||
dependencies = [
|
||||
"cfgrammar",
|
||||
"chrono",
|
||||
@@ -9632,6 +9650,16 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "robust"
|
||||
version = "1.1.0"
|
||||
@@ -11051,7 +11079,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"smallvec",
|
||||
"thiserror 2.0.6",
|
||||
"thiserror 2.0.12",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
@@ -11136,7 +11164,7 @@ dependencies = [
|
||||
"smallvec",
|
||||
"sqlx-core",
|
||||
"stringprep",
|
||||
"thiserror 2.0.6",
|
||||
"thiserror 2.0.12",
|
||||
"tracing",
|
||||
"whoami",
|
||||
]
|
||||
@@ -11174,7 +11202,7 @@ dependencies = [
|
||||
"smallvec",
|
||||
"sqlx-core",
|
||||
"stringprep",
|
||||
"thiserror 2.0.6",
|
||||
"thiserror 2.0.12",
|
||||
"tracing",
|
||||
"whoami",
|
||||
]
|
||||
@@ -11955,11 +11983,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.6"
|
||||
version = "2.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
|
||||
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.6",
|
||||
"thiserror-impl 2.0.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -11975,9 +12003,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.6"
|
||||
version = "2.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
|
||||
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -13936,6 +13964,12 @@ dependencies = [
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zlib-rs"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.11.2+zstd.1.5.2"
|
||||
|
||||
11
Cargo.toml
11
Cargo.toml
@@ -126,10 +126,11 @@ deadpool-postgres = "0.12"
|
||||
derive_builder = "0.12"
|
||||
dotenv = "0.15"
|
||||
etcd-client = "0.14"
|
||||
flate2 = { version = "1.1.0", default-features = false, features = ["zlib-rs"] }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c5419bbd20cb42e568ec325a4d71a3c94cc327e1" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -160,9 +161,7 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
|
||||
paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
|
||||
"ser",
|
||||
], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
|
||||
promql-parser = { version = "0.5", features = ["ser"] }
|
||||
prost = "0.13"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.8"
|
||||
@@ -190,6 +189,10 @@ shadow-rs = "0.38"
|
||||
similar-asserts = "1.6.0"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sqlx = { version = "0.8", features = [
|
||||
"runtime-tokio-rustls",
|
||||
"mysql",
|
||||
] }
|
||||
sysinfo = "0.30"
|
||||
# on branch v0.52.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
|
||||
|
||||
@@ -231,6 +231,7 @@ overwrite_entry_start_id = false
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
@@ -318,6 +318,7 @@ retry_delay = "500ms"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
19
grafana/check.sh
Executable file
19
grafana/check.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BASEDIR=$(dirname "$0")
|
||||
|
||||
# Use jq to check for panels with empty or missing descriptions
|
||||
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
|
||||
.panels[]
|
||||
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
|
||||
')
|
||||
|
||||
# Check if any invalid panels were found
|
||||
if [[ -n "$invalid_panels" ]]; then
|
||||
echo "Error: The following panels have empty or missing descriptions:"
|
||||
echo "$invalid_panels"
|
||||
exit 1
|
||||
else
|
||||
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
|
||||
exit 0
|
||||
fi
|
||||
File diff suppressed because it is too large
Load Diff
11
grafana/summary.sh
Executable file
11
grafana/summary.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo '| Title | Description | Expressions |
|
||||
|---|---|---|'
|
||||
|
||||
cat $BASEDIR/greptimedb-cluster.json | jq -r '
|
||||
.panels |
|
||||
map(select(.type == "stat" or .type == "timeseries")) |
|
||||
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
|
||||
'
|
||||
@@ -19,9 +19,7 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{
|
||||
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
|
||||
};
|
||||
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
|
||||
use datatypes::prelude::{ConcreteDataType, ValueRef};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::types::{
|
||||
@@ -29,8 +27,8 @@ use datatypes::types::{
|
||||
};
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
|
||||
Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
|
||||
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
|
||||
@@ -118,7 +116,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::Json => ConcreteDataType::json_datatype(),
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
@@ -271,7 +269,6 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
ConcreteDataType::Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||
@@ -476,7 +473,6 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
|
||||
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => match val.unit() {
|
||||
TimeUnit::Second => values.timestamp_second_values.push(val.value()),
|
||||
TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()),
|
||||
@@ -577,12 +573,11 @@ pub fn pb_value_to_value_ref<'a>(
|
||||
ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()),
|
||||
ValueData::StringValue(string) => ValueRef::String(string.as_str()),
|
||||
ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)),
|
||||
ValueData::DatetimeValue(d) => ValueRef::DateTime(DateTime::new(*d)),
|
||||
ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)),
|
||||
ValueData::TimestampMillisecondValue(t) => {
|
||||
ValueRef::Timestamp(Timestamp::new_millisecond(*t))
|
||||
}
|
||||
ValueData::TimestampMicrosecondValue(t) => {
|
||||
ValueData::DatetimeValue(t) | ValueData::TimestampMicrosecondValue(t) => {
|
||||
ValueRef::Timestamp(Timestamp::new_microsecond(*t))
|
||||
}
|
||||
ValueData::TimestampNanosecondValue(t) => {
|
||||
@@ -651,7 +646,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
|
||||
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
|
||||
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
|
||||
ConcreteDataType::DateTime(_) => Arc::new(DateTimeVector::from_vec(values.datetime_values)),
|
||||
ConcreteDataType::Timestamp(unit) => match unit {
|
||||
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
|
||||
values.timestamp_second_values,
|
||||
@@ -787,11 +781,6 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
.collect(),
|
||||
ConcreteDataType::DateTime(_) => values
|
||||
.datetime_values
|
||||
.into_iter()
|
||||
.map(|v| Value::DateTime(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Date(_) => values
|
||||
.date_values
|
||||
.into_iter()
|
||||
@@ -947,9 +936,6 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
|
||||
Value::Date(v) => v1::Value {
|
||||
value_data: Some(ValueData::DateValue(v.val())),
|
||||
},
|
||||
Value::DateTime(v) => v1::Value {
|
||||
value_data: Some(ValueData::DatetimeValue(v.val())),
|
||||
},
|
||||
Value::Timestamp(v) => match v.unit() {
|
||||
TimeUnit::Second => v1::Value {
|
||||
value_data: Some(ValueData::TimestampSecondValue(v.value())),
|
||||
@@ -1066,7 +1052,6 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
|
||||
Value::Date(v) => Some(ValueData::DateValue(v.val())),
|
||||
Value::DateTime(v) => Some(ValueData::DatetimeValue(v.val())),
|
||||
Value::Timestamp(v) => Some(match v.unit() {
|
||||
TimeUnit::Second => ValueData::TimestampSecondValue(v.value()),
|
||||
TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()),
|
||||
@@ -1248,7 +1233,7 @@ mod tests {
|
||||
ColumnDataTypeWrapper::date_datatype().into()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ColumnDataTypeWrapper::datetime_datatype().into()
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1339,10 +1324,6 @@ mod tests {
|
||||
ColumnDataTypeWrapper::date_datatype(),
|
||||
ConcreteDataType::date_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper::datetime_datatype(),
|
||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
@@ -1830,17 +1811,6 @@ mod tests {
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
datetime,
|
||||
vec![1.into(), 2.into(), 3.into()],
|
||||
datetime,
|
||||
vec![
|
||||
Value::DateTime(1.into()),
|
||||
Value::DateTime(2.into()),
|
||||
Value::DateTime(3.into())
|
||||
]
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn test_vectors_to_rows_for_different_types() {
|
||||
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);
|
||||
|
||||
@@ -132,6 +132,15 @@ pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<C
|
||||
Ok((!options.options.is_empty()).then_some(options))
|
||||
}
|
||||
|
||||
/// Tries to construct a `ColumnOptions` for inverted index.
|
||||
pub fn options_from_inverted() -> ColumnOptions {
|
||||
let mut options = ColumnOptions::default();
|
||||
options
|
||||
.options
|
||||
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
|
||||
options
|
||||
}
|
||||
|
||||
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
|
||||
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
|
||||
match analyzer {
|
||||
|
||||
@@ -77,7 +77,7 @@ trait SystemSchemaProviderInner {
|
||||
fn system_table(&self, name: &str) -> Option<SystemTableRef>;
|
||||
|
||||
fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef {
|
||||
let table_meta = TableMetaBuilder::default()
|
||||
let table_meta = TableMetaBuilder::empty()
|
||||
.schema(table.schema())
|
||||
.primary_key_indices(vec![])
|
||||
.next_column_id(0)
|
||||
|
||||
@@ -365,10 +365,6 @@ impl InformationSchemaColumnsBuilder {
|
||||
self.numeric_scales.push(None);
|
||||
|
||||
match &column_schema.data_type {
|
||||
ConcreteDataType::DateTime(datetime_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(datetime_type.precision() as i64));
|
||||
}
|
||||
ConcreteDataType::Timestamp(ts_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(ts_type.precision() as i64));
|
||||
|
||||
@@ -28,16 +28,19 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datatypes::prelude::ConcreteDataType as CDT;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMillisecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
|
||||
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
|
||||
UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, Result,
|
||||
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::{Predicates, FLOWS};
|
||||
use crate::system_schema::information_schema::InformationTable;
|
||||
@@ -59,6 +62,10 @@ pub const SOURCE_TABLE_IDS: &str = "source_table_ids";
|
||||
pub const SINK_TABLE_NAME: &str = "sink_table_name";
|
||||
pub const FLOWNODE_IDS: &str = "flownode_ids";
|
||||
pub const OPTIONS: &str = "options";
|
||||
pub const CREATED_TIME: &str = "created_time";
|
||||
pub const UPDATED_TIME: &str = "updated_time";
|
||||
pub const LAST_EXECUTION_TIME: &str = "last_execution_time";
|
||||
pub const SOURCE_TABLE_NAMES: &str = "source_table_names";
|
||||
|
||||
/// The `information_schema.flows` to provides information about flows in databases.
|
||||
#[derive(Debug)]
|
||||
@@ -99,6 +106,14 @@ impl InformationSchemaFlows {
|
||||
(SINK_TABLE_NAME, CDT::string_datatype(), false),
|
||||
(FLOWNODE_IDS, CDT::string_datatype(), true),
|
||||
(OPTIONS, CDT::string_datatype(), true),
|
||||
(CREATED_TIME, CDT::timestamp_millisecond_datatype(), false),
|
||||
(UPDATED_TIME, CDT::timestamp_millisecond_datatype(), false),
|
||||
(
|
||||
LAST_EXECUTION_TIME,
|
||||
CDT::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
),
|
||||
(SOURCE_TABLE_NAMES, CDT::string_datatype(), true),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable))
|
||||
@@ -170,6 +185,10 @@ struct InformationSchemaFlowsBuilder {
|
||||
sink_table_names: StringVectorBuilder,
|
||||
flownode_id_groups: StringVectorBuilder,
|
||||
option_groups: StringVectorBuilder,
|
||||
created_time: TimestampMillisecondVectorBuilder,
|
||||
updated_time: TimestampMillisecondVectorBuilder,
|
||||
last_execution_time: TimestampMillisecondVectorBuilder,
|
||||
source_table_names: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaFlowsBuilder {
|
||||
@@ -196,6 +215,10 @@ impl InformationSchemaFlowsBuilder {
|
||||
sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
created_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
updated_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
last_execution_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
source_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,13 +258,14 @@ impl InformationSchemaFlowsBuilder {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
flow_name: flow_name.to_string(),
|
||||
})?;
|
||||
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
|
||||
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)
|
||||
.await?;
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_flow(
|
||||
async fn add_flow(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
flow_id: FlowId,
|
||||
@@ -290,6 +314,36 @@ impl InformationSchemaFlowsBuilder {
|
||||
input: format!("{:?}", flow_info.options()),
|
||||
},
|
||||
)?));
|
||||
self.created_time
|
||||
.push(Some(flow_info.created_time().timestamp_millis().into()));
|
||||
self.updated_time
|
||||
.push(Some(flow_info.updated_time().timestamp_millis().into()));
|
||||
self.last_execution_time
|
||||
.push(flow_stat.as_ref().and_then(|state| {
|
||||
state
|
||||
.last_exec_time_map
|
||||
.get(&flow_id)
|
||||
.map(|v| TimestampMillisecond::new(*v))
|
||||
}));
|
||||
|
||||
let mut source_table_names = vec![];
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
source_table_names.extend(
|
||||
catalog_manager
|
||||
.tables_by_ids(&catalog_name, &schema_name, flow_info.source_table_ids())
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|table| table.table_info().full_table_name()),
|
||||
);
|
||||
}
|
||||
|
||||
let source_table_names = source_table_names.join(",");
|
||||
self.source_table_names.push(Some(&source_table_names));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -307,6 +361,10 @@ impl InformationSchemaFlowsBuilder {
|
||||
Arc::new(self.sink_table_names.finish()),
|
||||
Arc::new(self.flownode_id_groups.finish()),
|
||||
Arc::new(self.option_groups.finish()),
|
||||
Arc::new(self.created_time.finish()),
|
||||
Arc::new(self.updated_time.finish()),
|
||||
Arc::new(self.last_execution_time.finish()),
|
||||
Arc::new(self.source_table_names.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
|
||||
|
||||
use super::table_names::*;
|
||||
use crate::system_schema::utils::tables::{
|
||||
bigint_column, datetime_column, string_column, string_columns,
|
||||
bigint_column, string_column, string_columns, timestamp_micro_column,
|
||||
};
|
||||
|
||||
const NO_VALUE: &str = "NO";
|
||||
@@ -163,17 +163,17 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("EVENT_BODY"),
|
||||
string_column("EVENT_DEFINITION"),
|
||||
string_column("EVENT_TYPE"),
|
||||
datetime_column("EXECUTE_AT"),
|
||||
timestamp_micro_column("EXECUTE_AT"),
|
||||
bigint_column("INTERVAL_VALUE"),
|
||||
string_column("INTERVAL_FIELD"),
|
||||
string_column("SQL_MODE"),
|
||||
datetime_column("STARTS"),
|
||||
datetime_column("ENDS"),
|
||||
timestamp_micro_column("STARTS"),
|
||||
timestamp_micro_column("ENDS"),
|
||||
string_column("STATUS"),
|
||||
string_column("ON_COMPLETION"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
datetime_column("LAST_EXECUTED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
timestamp_micro_column("LAST_ALTERED"),
|
||||
timestamp_micro_column("LAST_EXECUTED"),
|
||||
string_column("EVENT_COMMENT"),
|
||||
bigint_column("ORIGINATOR"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
@@ -204,10 +204,10 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
bigint_column("INITIAL_SIZE"),
|
||||
bigint_column("MAXIMUM_SIZE"),
|
||||
bigint_column("AUTOEXTEND_SIZE"),
|
||||
datetime_column("CREATION_TIME"),
|
||||
datetime_column("LAST_UPDATE_TIME"),
|
||||
datetime_column("LAST_ACCESS_TIME"),
|
||||
datetime_column("RECOVER_TIME"),
|
||||
timestamp_micro_column("CREATION_TIME"),
|
||||
timestamp_micro_column("LAST_UPDATE_TIME"),
|
||||
timestamp_micro_column("LAST_ACCESS_TIME"),
|
||||
timestamp_micro_column("RECOVER_TIME"),
|
||||
bigint_column("TRANSACTION_COUNTER"),
|
||||
string_column("VERSION"),
|
||||
string_column("ROW_FORMAT"),
|
||||
@@ -217,9 +217,9 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
bigint_column("MAX_DATA_LENGTH"),
|
||||
bigint_column("INDEX_LENGTH"),
|
||||
bigint_column("DATA_FREE"),
|
||||
datetime_column("CREATE_TIME"),
|
||||
datetime_column("UPDATE_TIME"),
|
||||
datetime_column("CHECK_TIME"),
|
||||
timestamp_micro_column("CREATE_TIME"),
|
||||
timestamp_micro_column("UPDATE_TIME"),
|
||||
timestamp_micro_column("CHECK_TIME"),
|
||||
string_column("CHECKSUM"),
|
||||
string_column("STATUS"),
|
||||
string_column("EXTRA"),
|
||||
@@ -330,8 +330,8 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("SQL_DATA_ACCESS"),
|
||||
string_column("SQL_PATH"),
|
||||
string_column("SECURITY_TYPE"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
timestamp_micro_column("LAST_ALTERED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("ROUTINE_COMMENT"),
|
||||
string_column("DEFINER"),
|
||||
@@ -383,7 +383,7 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("ACTION_REFERENCE_NEW_TABLE"),
|
||||
string_column("ACTION_REFERENCE_OLD_ROW"),
|
||||
string_column("ACTION_REFERENCE_NEW_ROW"),
|
||||
datetime_column("CREATED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("DEFINER"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
|
||||
@@ -20,17 +20,18 @@ use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_time::datetime::DateTime;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMicrosecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
|
||||
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
|
||||
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
|
||||
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
@@ -127,9 +128,21 @@ impl InformationSchemaPartitions {
|
||||
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"create_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"update_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"check_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"partition_comment",
|
||||
@@ -200,7 +213,7 @@ struct InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder,
|
||||
partition_ordinal_positions: Int64VectorBuilder,
|
||||
partition_expressions: StringVectorBuilder,
|
||||
create_times: DateTimeVectorBuilder,
|
||||
create_times: TimestampMicrosecondVectorBuilder,
|
||||
partition_ids: UInt64VectorBuilder,
|
||||
}
|
||||
|
||||
@@ -220,7 +233,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
@@ -324,7 +337,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
};
|
||||
|
||||
self.partition_expressions.push(expressions.as_deref());
|
||||
self.create_times.push(Some(DateTime::from(
|
||||
self.create_times.push(Some(TimestampMicrosecond::from(
|
||||
table_info.meta.created_on.timestamp_millis(),
|
||||
)));
|
||||
self.partition_ids.push(Some(partition.id.as_u64()));
|
||||
@@ -342,8 +355,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
Arc::new(Int64Vector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let null_datetime_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(DateTimeVector::from(vec![None])),
|
||||
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(TimestampMicrosecondVector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let partition_methods = Arc::new(ConstantVector::new(
|
||||
@@ -373,8 +386,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
null_i64_vector.clone(),
|
||||
Arc::new(self.create_times.finish()),
|
||||
// TODO(dennis): supports update_time
|
||||
null_datetime_vector.clone(),
|
||||
null_datetime_vector,
|
||||
null_timestampmicrosecond_vector.clone(),
|
||||
null_timestampmicrosecond_vector,
|
||||
null_i64_vector,
|
||||
null_string_vector.clone(),
|
||||
null_string_vector.clone(),
|
||||
|
||||
@@ -30,7 +30,8 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
|
||||
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -105,9 +106,21 @@ impl InformationSchemaTables {
|
||||
ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(CREATE_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(UPDATE_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(CHECK_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
CREATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
UPDATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CHECK_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true),
|
||||
@@ -182,9 +195,9 @@ struct InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder,
|
||||
data_free: UInt64VectorBuilder,
|
||||
auto_increment: UInt64VectorBuilder,
|
||||
create_time: DateTimeVectorBuilder,
|
||||
update_time: DateTimeVectorBuilder,
|
||||
check_time: DateTimeVectorBuilder,
|
||||
create_time: TimestampMicrosecondVectorBuilder,
|
||||
update_time: TimestampMicrosecondVectorBuilder,
|
||||
check_time: TimestampMicrosecondVectorBuilder,
|
||||
table_collation: StringVectorBuilder,
|
||||
checksum: UInt64VectorBuilder,
|
||||
create_options: StringVectorBuilder,
|
||||
@@ -219,9 +232,9 @@ impl InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
|
||||
@@ -51,10 +51,10 @@ pub fn bigint_column(name: &str) -> ColumnSchema {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn datetime_column(name: &str) -> ColumnSchema {
|
||||
pub fn timestamp_micro_column(name: &str) -> ColumnSchema {
|
||||
ColumnSchema::new(
|
||||
str::to_lowercase(name),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[features]
|
||||
pg_kvbackend = ["common-meta/pg_kvbackend"]
|
||||
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -43,6 +44,10 @@ futures.workspace = true
|
||||
humantime.workspace = true
|
||||
meta-client.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
opendal = { version = "0.51.1", features = [
|
||||
"services-fs",
|
||||
"services-s3",
|
||||
] }
|
||||
query.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
|
||||
@@ -23,6 +23,8 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
use common_meta::kv_backend::rds::MySqlStore;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
use common_meta::kv_backend::rds::PgStore;
|
||||
use common_meta::peer::Peer;
|
||||
@@ -63,6 +65,9 @@ pub struct BenchTableMetadataCommand {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
count: u32,
|
||||
}
|
||||
@@ -86,6 +91,16 @@ impl BenchTableMetadataCommand {
|
||||
kv_backend
|
||||
};
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
let kv_backend = if let Some(mysql_addr) = &self.mysql_addr {
|
||||
info!("Using mysql as kv backend");
|
||||
MySqlStore::with_url(mysql_addr, "greptime_metakv", 128)
|
||||
.await
|
||||
.unwrap()
|
||||
} else {
|
||||
kv_backend
|
||||
};
|
||||
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
|
||||
|
||||
let tool = BenchTableMetadata {
|
||||
|
||||
@@ -276,6 +276,24 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("OpenDAL operator failed"))]
|
||||
OpenDal {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: opendal::Error,
|
||||
},
|
||||
#[snafu(display("S3 config need be set"))]
|
||||
S3ConfigNotSet {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Output directory not set"))]
|
||||
OutputDirNotSet {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -319,6 +337,9 @@ impl ErrorExt for Error {
|
||||
| Error::BuildClient { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
Error::OpenDal { .. } => StatusCode::Internal,
|
||||
Error::S3ConfigNotSet { .. } => StatusCode::InvalidArguments,
|
||||
Error::OutputDirNotSet { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::BuildRuntime { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -21,15 +21,18 @@ use async_trait::async_trait;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use opendal::layers::LoggingLayer;
|
||||
use opendal::{services, Operator};
|
||||
use serde_json::Value;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::database::{parse_proxy_opts, DatabaseClient};
|
||||
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::error::{
|
||||
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
|
||||
SchemaNotFoundSnafu,
|
||||
};
|
||||
use crate::{database, Tool};
|
||||
|
||||
type TableReference = (String, String, String);
|
||||
@@ -52,8 +55,9 @@ pub struct ExportCommand {
|
||||
addr: String,
|
||||
|
||||
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export
|
||||
/// for local export.
|
||||
#[clap(long)]
|
||||
output_dir: String,
|
||||
output_dir: Option<String>,
|
||||
|
||||
/// The name of the catalog to export.
|
||||
#[clap(long, default_value = "greptime-*")]
|
||||
@@ -101,10 +105,51 @@ pub struct ExportCommand {
|
||||
/// Disable proxy server, if set, will not use any proxy.
|
||||
#[clap(long)]
|
||||
no_proxy: bool,
|
||||
|
||||
/// if export data to s3
|
||||
#[clap(long)]
|
||||
s3: bool,
|
||||
|
||||
/// The s3 bucket name
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_bucket: Option<String>,
|
||||
|
||||
/// The s3 endpoint
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_endpoint: Option<String>,
|
||||
|
||||
/// The s3 access key
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_access_key: Option<String>,
|
||||
|
||||
/// The s3 secret key
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_secret_key: Option<String>,
|
||||
|
||||
/// The s3 region
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_region: Option<String>,
|
||||
}
|
||||
|
||||
impl ExportCommand {
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
if self.s3
|
||||
&& (self.s3_bucket.is_none()
|
||||
|| self.s3_endpoint.is_none()
|
||||
|| self.s3_access_key.is_none()
|
||||
|| self.s3_secret_key.is_none()
|
||||
|| self.s3_region.is_none())
|
||||
{
|
||||
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
|
||||
}
|
||||
if !self.s3 && self.output_dir.is_none() {
|
||||
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
|
||||
}
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
|
||||
@@ -126,24 +171,43 @@ impl ExportCommand {
|
||||
target: self.target.clone(),
|
||||
start_time: self.start_time.clone(),
|
||||
end_time: self.end_time.clone(),
|
||||
s3: self.s3,
|
||||
s3_bucket: self.s3_bucket.clone(),
|
||||
s3_endpoint: self.s3_endpoint.clone(),
|
||||
s3_access_key: self.s3_access_key.clone(),
|
||||
s3_secret_key: self.s3_secret_key.clone(),
|
||||
s3_region: self.s3_region.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Export {
|
||||
catalog: String,
|
||||
schema: Option<String>,
|
||||
database_client: DatabaseClient,
|
||||
output_dir: String,
|
||||
output_dir: Option<String>,
|
||||
parallelism: usize,
|
||||
target: ExportTarget,
|
||||
start_time: Option<String>,
|
||||
end_time: Option<String>,
|
||||
s3: bool,
|
||||
s3_bucket: Option<String>,
|
||||
s3_endpoint: Option<String>,
|
||||
s3_access_key: Option<String>,
|
||||
s3_secret_key: Option<String>,
|
||||
s3_region: Option<String>,
|
||||
}
|
||||
|
||||
impl Export {
|
||||
fn catalog_path(&self) -> PathBuf {
|
||||
PathBuf::from(&self.output_dir).join(&self.catalog)
|
||||
if self.s3 {
|
||||
PathBuf::from(&self.catalog)
|
||||
} else if let Some(dir) = &self.output_dir {
|
||||
PathBuf::from(dir).join(&self.catalog)
|
||||
} else {
|
||||
unreachable!("catalog_path: output_dir must be set when not using s3")
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_db_names(&self) -> Result<Vec<String>> {
|
||||
@@ -300,19 +364,23 @@ impl Export {
|
||||
let timer = Instant::now();
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let operator = self.build_operator().await?;
|
||||
|
||||
for schema in db_names {
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let file = db_dir.join("create_database.sql");
|
||||
let mut file = File::create(file).await.context(FileIoSnafu)?;
|
||||
let create_database = self
|
||||
.show_create("DATABASE", &self.catalog, &schema, None)
|
||||
.await?;
|
||||
file.write_all(create_database.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
|
||||
let file_path = self.get_file_path(&schema, "create_database.sql");
|
||||
self.write_to_storage(&operator, &file_path, create_database.into_bytes())
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Exported {}.{} database creation SQL to {}",
|
||||
self.catalog,
|
||||
schema,
|
||||
self.format_output_path(&file_path)
|
||||
);
|
||||
}
|
||||
|
||||
let elapsed = timer.elapsed();
|
||||
@@ -326,149 +394,267 @@ impl Export {
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let operator = Arc::new(self.build_operator().await?);
|
||||
let mut tasks = Vec::with_capacity(db_names.len());
|
||||
|
||||
for schema in db_names {
|
||||
let semaphore_moved = semaphore.clone();
|
||||
let export_self = self.clone();
|
||||
let operator = operator.clone();
|
||||
tasks.push(async move {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let (metric_physical_tables, remaining_tables, views) =
|
||||
self.get_table_list(&self.catalog, &schema).await?;
|
||||
let table_count =
|
||||
metric_physical_tables.len() + remaining_tables.len() + views.len();
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let file = db_dir.join("create_tables.sql");
|
||||
let mut file = File::create(file).await.context(FileIoSnafu)?;
|
||||
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
|
||||
let create_table = self.show_create("TABLE", &c, &s, Some(&t)).await?;
|
||||
file.write_all(create_table.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
}
|
||||
for (c, s, v) in views {
|
||||
let create_view = self.show_create("VIEW", &c, &s, Some(&v)).await?;
|
||||
file.write_all(create_view.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let (metric_physical_tables, remaining_tables, views) = export_self
|
||||
.get_table_list(&export_self.catalog, &schema)
|
||||
.await?;
|
||||
|
||||
// Create directory if needed for file system storage
|
||||
if !export_self.s3 {
|
||||
let db_dir = format!("{}/{}/", export_self.catalog, schema);
|
||||
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
|
||||
}
|
||||
|
||||
let file_path = export_self.get_file_path(&schema, "create_tables.sql");
|
||||
let mut content = Vec::new();
|
||||
|
||||
// Add table creation SQL
|
||||
for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
|
||||
let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
|
||||
content.extend_from_slice(create_table.as_bytes());
|
||||
}
|
||||
|
||||
// Add view creation SQL
|
||||
for (c, s, v) in &views {
|
||||
let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
|
||||
content.extend_from_slice(create_view.as_bytes());
|
||||
}
|
||||
|
||||
// Write to storage
|
||||
export_self
|
||||
.write_to_storage(&operator, &file_path, content)
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Finished exporting {}.{schema} with {table_count} table schemas to path: {}",
|
||||
self.catalog,
|
||||
db_dir.to_string_lossy()
|
||||
"Finished exporting {}.{schema} with {} table schemas to path: {}",
|
||||
export_self.catalog,
|
||||
metric_physical_tables.len() + remaining_tables.len() + views.len(),
|
||||
export_self.format_output_path(&file_path)
|
||||
);
|
||||
|
||||
Ok::<(), Error>(())
|
||||
});
|
||||
}
|
||||
|
||||
let success = futures::future::join_all(tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.filter(|r| match r {
|
||||
Ok(_) => true,
|
||||
Err(e) => {
|
||||
error!(e; "export schema job failed");
|
||||
false
|
||||
}
|
||||
})
|
||||
.count();
|
||||
|
||||
let success = self.execute_tasks(tasks).await;
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_operator(&self) -> Result<Operator> {
|
||||
if self.s3 {
|
||||
self.build_s3_operator().await
|
||||
} else {
|
||||
self.build_fs_operator().await
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_s3_operator(&self) -> Result<Operator> {
|
||||
let mut builder = services::S3::default().root("").bucket(
|
||||
self.s3_bucket
|
||||
.as_ref()
|
||||
.expect("s3_bucket must be provided when s3 is enabled"),
|
||||
);
|
||||
|
||||
if let Some(endpoint) = self.s3_endpoint.as_ref() {
|
||||
builder = builder.endpoint(endpoint);
|
||||
}
|
||||
|
||||
if let Some(region) = self.s3_region.as_ref() {
|
||||
builder = builder.region(region);
|
||||
}
|
||||
|
||||
if let Some(key_id) = self.s3_access_key.as_ref() {
|
||||
builder = builder.access_key_id(key_id);
|
||||
}
|
||||
|
||||
if let Some(secret_key) = self.s3_secret_key.as_ref() {
|
||||
builder = builder.secret_access_key(secret_key);
|
||||
}
|
||||
|
||||
let op = Operator::new(builder)
|
||||
.context(OpenDalSnafu)?
|
||||
.layer(LoggingLayer::default())
|
||||
.finish();
|
||||
Ok(op)
|
||||
}
|
||||
|
||||
async fn build_fs_operator(&self) -> Result<Operator> {
|
||||
let root = self
|
||||
.output_dir
|
||||
.as_ref()
|
||||
.context(OutputDirNotSetSnafu)?
|
||||
.clone();
|
||||
let op = Operator::new(services::Fs::default().root(&root))
|
||||
.context(OpenDalSnafu)?
|
||||
.layer(LoggingLayer::default())
|
||||
.finish();
|
||||
Ok(op)
|
||||
}
|
||||
|
||||
async fn export_database_data(&self) -> Result<()> {
|
||||
let timer = Instant::now();
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let mut tasks = Vec::with_capacity(db_count);
|
||||
let operator = Arc::new(self.build_operator().await?);
|
||||
let with_options = build_with_options(&self.start_time, &self.end_time);
|
||||
|
||||
for schema in db_names {
|
||||
let semaphore_moved = semaphore.clone();
|
||||
let export_self = self.clone();
|
||||
let with_options_clone = with_options.clone();
|
||||
let operator = operator.clone();
|
||||
|
||||
tasks.push(async move {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
|
||||
let with_options = match (&self.start_time, &self.end_time) {
|
||||
(Some(start_time), Some(end_time)) => {
|
||||
format!(
|
||||
"WITH (FORMAT='parquet', start_time='{}', end_time='{}')",
|
||||
start_time, end_time
|
||||
)
|
||||
}
|
||||
(Some(start_time), None) => {
|
||||
format!("WITH (FORMAT='parquet', start_time='{}')", start_time)
|
||||
}
|
||||
(None, Some(end_time)) => {
|
||||
format!("WITH (FORMAT='parquet', end_time='{}')", end_time)
|
||||
}
|
||||
(None, None) => "WITH (FORMAT='parquet')".to_string(),
|
||||
};
|
||||
// Create directory if not using S3
|
||||
if !export_self.s3 {
|
||||
let db_dir = format!("{}/{}/", export_self.catalog, schema);
|
||||
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
|
||||
}
|
||||
|
||||
let (path, connection_part) = export_self.get_storage_params(&schema);
|
||||
|
||||
// Execute COPY DATABASE TO command
|
||||
let sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" TO '{}' {};"#,
|
||||
self.catalog,
|
||||
schema,
|
||||
db_dir.to_str().unwrap(),
|
||||
with_options
|
||||
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
info!("Executing sql: {sql}");
|
||||
export_self.database_client.sql_in_public(&sql).await?;
|
||||
info!(
|
||||
"Finished exporting {}.{} data to {}",
|
||||
export_self.catalog, schema, path
|
||||
);
|
||||
|
||||
info!("Executing sql: {sql}");
|
||||
// Create copy_from.sql file
|
||||
let copy_database_from_sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
|
||||
self.database_client.sql_in_public(&sql).await?;
|
||||
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
|
||||
export_self
|
||||
.write_to_storage(
|
||||
&operator,
|
||||
©_from_path,
|
||||
copy_database_from_sql.into_bytes(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Finished exporting {}.{schema} data into path: {}",
|
||||
self.catalog,
|
||||
db_dir.to_string_lossy()
|
||||
);
|
||||
|
||||
// The export copy from sql
|
||||
let copy_from_file = db_dir.join("copy_from.sql");
|
||||
let mut writer =
|
||||
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
|
||||
let copy_database_from_sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH (FORMAT='parquet');"#,
|
||||
self.catalog,
|
||||
"Finished exporting {}.{} copy_from.sql to {}",
|
||||
export_self.catalog,
|
||||
schema,
|
||||
db_dir.to_str().unwrap()
|
||||
export_self.format_output_path(©_from_path)
|
||||
);
|
||||
writer
|
||||
.write(copy_database_from_sql.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
writer.flush().await.context(FileIoSnafu)?;
|
||||
|
||||
info!("Finished exporting {}.{schema} copy_from.sql", self.catalog);
|
||||
|
||||
Ok::<(), Error>(())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
let success = futures::future::join_all(tasks)
|
||||
let success = self.execute_tasks(tasks).await;
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
|
||||
format!("{}/{}/{}", self.catalog, schema, file_name)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
if self.s3 {
|
||||
format!(
|
||||
"s3://{}/{}",
|
||||
self.s3_bucket.as_ref().unwrap_or(&String::new()),
|
||||
file_path
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{}/{}",
|
||||
self.output_dir.as_ref().unwrap_or(&String::new()),
|
||||
file_path
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn write_to_storage(
|
||||
&self,
|
||||
op: &Operator,
|
||||
file_path: &str,
|
||||
content: Vec<u8>,
|
||||
) -> Result<()> {
|
||||
op.write(file_path, content).await.context(OpenDalSnafu)
|
||||
}
|
||||
|
||||
fn get_storage_params(&self, schema: &str) -> (String, String) {
|
||||
if self.s3 {
|
||||
let s3_path = format!(
|
||||
"s3://{}/{}/{}/",
|
||||
// Safety: s3_bucket is required when s3 is enabled
|
||||
self.s3_bucket.as_ref().unwrap(),
|
||||
self.catalog,
|
||||
schema
|
||||
);
|
||||
|
||||
// endpoint is optional
|
||||
let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() {
|
||||
format!(", ENDPOINT='{}'", endpoint)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
// Safety: All s3 options are required
|
||||
let connection_options = format!(
|
||||
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
|
||||
self.s3_access_key.as_ref().unwrap(),
|
||||
self.s3_secret_key.as_ref().unwrap(),
|
||||
self.s3_region.as_ref().unwrap(),
|
||||
endpoint_option
|
||||
);
|
||||
|
||||
(s3_path, format!(" CONNECTION ({})", connection_options))
|
||||
} else {
|
||||
(
|
||||
self.catalog_path()
|
||||
.join(format!("{schema}/"))
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
String::new(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_tasks(
|
||||
&self,
|
||||
tasks: Vec<impl std::future::Future<Output = Result<()>>>,
|
||||
) -> usize {
|
||||
futures::future::join_all(tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.filter(|r| match r {
|
||||
Ok(_) => true,
|
||||
Err(e) => {
|
||||
error!(e; "export database job failed");
|
||||
error!(e; "export job failed");
|
||||
false
|
||||
}
|
||||
})
|
||||
.count();
|
||||
let elapsed = timer.elapsed();
|
||||
|
||||
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
.count()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,3 +679,15 @@ impl Tool for Export {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
|
||||
fn build_with_options(start_time: &Option<String>, end_time: &Option<String>) -> String {
|
||||
let mut options = vec!["format = 'parquet'".to_string()];
|
||||
if let Some(start) = start_time {
|
||||
options.push(format!("start_time = '{}'", start));
|
||||
}
|
||||
if let Some(end) = end_time {
|
||||
options.push(format!("end_time = '{}'", end));
|
||||
}
|
||||
options.join(", ")
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
async-trait.workspace = true
|
||||
bincode = "1.3"
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
|
||||
@@ -43,7 +43,6 @@ impl Function for DateFormatFunction {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -105,22 +104,6 @@ impl Function for DateFormatFunction {
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
for i in 0..size {
|
||||
let datetime = left.get(i).as_datetime();
|
||||
let format = formats.get(i).as_string();
|
||||
|
||||
let result = match (datetime, format) {
|
||||
(Some(datetime), Some(fmt)) => datetime
|
||||
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteSnafu)?,
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -147,7 +130,7 @@ mod tests {
|
||||
use common_query::prelude::{TypeSignature, Volatility};
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
|
||||
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector};
|
||||
|
||||
use super::{DateFormatFunction, *};
|
||||
|
||||
@@ -169,16 +152,11 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::OneOf(sigs),
|
||||
volatility: Volatility::Immutable
|
||||
} if sigs.len() == 6));
|
||||
} if sigs.len() == 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -262,45 +240,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_date_format() {
|
||||
let f = DateFormatFunction;
|
||||
|
||||
let dates = vec![Some(123), None, Some(42), None];
|
||||
let formats = vec![
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
];
|
||||
let results = [
|
||||
Some("1970-01-01 00:00:00.123"),
|
||||
None,
|
||||
Some("1970-01-01 00:00:00.042"),
|
||||
None,
|
||||
];
|
||||
|
||||
let date_vector = DateTimeVector::from(dates.clone());
|
||||
let interval_vector = StringVector::from_vec(formats);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
|
||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in dates.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
let result = results.get(i).unwrap();
|
||||
|
||||
if result.is_none() {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::String(s) => {
|
||||
assert_eq!(s.as_utf8(), result.unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,11 +118,6 @@ mod tests {
|
||||
ConcreteDataType::date_datatype(),
|
||||
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(
|
||||
matches!(f.signature(),
|
||||
Signature {
|
||||
|
||||
@@ -23,7 +23,7 @@ use datatypes::arrow::array::AsArray;
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::compute::kernels::zip;
|
||||
use datatypes::arrow::datatypes::{
|
||||
DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
|
||||
DataType as ArrowDataType, Date32Type, TimeUnit, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
|
||||
};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -69,9 +69,8 @@ impl Function for GreatestFunction {
|
||||
);
|
||||
|
||||
match &input_types[0] {
|
||||
ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::String(_) => Ok(ConcreteDataType::timestamp_millisecond_datatype()),
|
||||
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
|
||||
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -87,7 +86,6 @@ impl Function for GreatestFunction {
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
@@ -109,20 +107,24 @@ impl Function for GreatestFunction {
|
||||
);
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::String(_) => {
|
||||
// Treats string as `DateTime` type.
|
||||
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column1 = column1.as_primitive::<Date64Type>();
|
||||
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column2 = column2.as_primitive::<Date64Type>();
|
||||
let column1 = cast(
|
||||
&columns[0].to_arrow_array(),
|
||||
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column1 = column1.as_primitive::<TimestampMillisecondType>();
|
||||
let column2 = cast(
|
||||
&columns[1].to_arrow_array(),
|
||||
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column2 = column2.as_primitive::<TimestampMillisecondType>();
|
||||
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
|
||||
let result =
|
||||
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
|
||||
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
|
||||
}
|
||||
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
|
||||
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
|
||||
ConcreteDataType::Timestamp(ts_type) => match ts_type {
|
||||
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
|
||||
TimestampType::Millisecond(_) => {
|
||||
@@ -155,15 +157,15 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::types::{
|
||||
DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
|
||||
TimestampNanosecondType, TimestampSecondType,
|
||||
DateType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
DateVector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use paste::paste;
|
||||
|
||||
@@ -178,7 +180,7 @@ mod tests {
|
||||
ConcreteDataType::string_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
);
|
||||
let columns = vec![
|
||||
Arc::new(StringVector::from(vec![
|
||||
@@ -194,15 +196,18 @@ mod tests {
|
||||
let result = function
|
||||
.eval(&FunctionContext::default(), &columns)
|
||||
.unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
let result = result
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("2001-02-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("2012-12-23 00:00:00", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -245,30 +250,33 @@ mod tests {
|
||||
assert_eq!(
|
||||
function
|
||||
.return_type(&[
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::datetime_datatype()
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
);
|
||||
|
||||
let columns = vec![
|
||||
Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(vec![0, 1])) as _,
|
||||
];
|
||||
|
||||
let result = function
|
||||
.eval(&FunctionContext::default(), &columns)
|
||||
.unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
let result = result
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00.002", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{Int64Vector, VectorRef};
|
||||
use snafu::ensure;
|
||||
@@ -32,10 +32,6 @@ const NAME: &str = "to_unixtime";
|
||||
|
||||
fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
|
||||
let timezone = &func_ctx.query_ctx.timezone();
|
||||
if let Ok(dt) = DateTime::from_str(arg, Some(timezone)) {
|
||||
return Some(dt.val() / 1000);
|
||||
}
|
||||
|
||||
if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) {
|
||||
return Some(ts.split().0);
|
||||
}
|
||||
@@ -59,12 +55,6 @@ fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
||||
.collect::<Vec<Option<i64>>>()
|
||||
}
|
||||
|
||||
fn convert_datetimes_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
||||
(0..vector.len())
|
||||
.map(|i| vector.get(i).as_datetime().map(|dt| dt.val() / 1000))
|
||||
.collect::<Vec<Option<i64>>>()
|
||||
}
|
||||
|
||||
impl Function for ToUnixtimeFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
@@ -82,7 +72,6 @@ impl Function for ToUnixtimeFunction {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -119,10 +108,6 @@ impl Function for ToUnixtimeFunction {
|
||||
let seconds = convert_dates_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
let seconds = convert_datetimes_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
}
|
||||
ConcreteDataType::Timestamp(_) => {
|
||||
let seconds = convert_timestamps_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
@@ -148,7 +133,7 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
|
||||
DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::{ToUnixtimeFunction, *};
|
||||
@@ -171,7 +156,6 @@ mod tests {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -253,31 +237,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_to_unixtime() {
|
||||
let f = ToUnixtimeFunction;
|
||||
|
||||
let times = vec![Some(123000), None, Some(42000), None];
|
||||
let results = [Some(123), None, Some(42), None];
|
||||
let date_vector = DateTimeVector::from(times.clone());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
|
||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in times.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
if i == 1 || i == 3 {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::Int64(ts) => {
|
||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_to_unixtime() {
|
||||
let f = ToUnixtimeFunction;
|
||||
|
||||
@@ -17,8 +17,8 @@ use api::v1::column::Values;
|
||||
use common_base::BitVec;
|
||||
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
|
||||
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
|
||||
Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
|
||||
IntervalMonthDayNanoVector, IntervalYearMonthVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
@@ -141,12 +141,6 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
|
||||
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
|
||||
x.val()
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::DateTime(_),
|
||||
DateTimeVector,
|
||||
datetime_values,
|
||||
|x| { x.val() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)),
|
||||
TimestampSecondVector,
|
||||
|
||||
@@ -18,11 +18,13 @@ mod print_caller;
|
||||
mod range_fn;
|
||||
mod stack_trace_debug;
|
||||
mod utils;
|
||||
|
||||
use aggr_func::{impl_aggr_func_type_store, impl_as_aggr_func_creator};
|
||||
use print_caller::process_print_caller;
|
||||
use proc_macro::TokenStream;
|
||||
use quote::quote;
|
||||
use range_fn::process_range_fn;
|
||||
use syn::{parse_macro_input, DeriveInput};
|
||||
use syn::{parse_macro_input, Data, DeriveInput, Fields};
|
||||
|
||||
use crate::admin_fn::process_admin_fn;
|
||||
|
||||
@@ -136,3 +138,51 @@ pub fn print_caller(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
pub fn stack_trace_debug(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
stack_trace_debug::stack_trace_style_impl(args.into(), input.into()).into()
|
||||
}
|
||||
|
||||
/// Generates implementation for `From<&TableMeta> for TableMetaBuilder`
|
||||
#[proc_macro_derive(ToMetaBuilder)]
|
||||
pub fn derive_meta_builder(input: TokenStream) -> TokenStream {
|
||||
let input = parse_macro_input!(input as DeriveInput);
|
||||
|
||||
let Data::Struct(data_struct) = input.data else {
|
||||
panic!("ToMetaBuilder can only be derived for structs");
|
||||
};
|
||||
|
||||
let Fields::Named(fields) = data_struct.fields else {
|
||||
panic!("ToMetaBuilder can only be derived for structs with named fields");
|
||||
};
|
||||
|
||||
// Check that this is being applied to TableMeta struct
|
||||
if input.ident != "TableMeta" {
|
||||
panic!("ToMetaBuilder can only be derived for TableMeta struct");
|
||||
}
|
||||
|
||||
let field_init = fields.named.iter().map(|field| {
|
||||
let field_name = field.ident.as_ref().unwrap();
|
||||
quote! {
|
||||
#field_name: Default::default(),
|
||||
}
|
||||
});
|
||||
|
||||
let field_assignments = fields.named.iter().map(|field| {
|
||||
let field_name = field.ident.as_ref().unwrap();
|
||||
quote! {
|
||||
builder.#field_name(meta.#field_name.clone());
|
||||
}
|
||||
});
|
||||
|
||||
let gen = quote! {
|
||||
impl From<&TableMeta> for TableMetaBuilder {
|
||||
fn from(meta: &TableMeta) -> Self {
|
||||
let mut builder = Self {
|
||||
#(#field_init)*
|
||||
};
|
||||
|
||||
#(#field_assignments)*
|
||||
builder
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
gen.into()
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ license.workspace = true
|
||||
[features]
|
||||
testing = []
|
||||
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
|
||||
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -57,9 +58,10 @@ serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlx = { workspace = true, optional = true }
|
||||
store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table.workspace = true
|
||||
table = { workspace = true, features = ["testing"] }
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -192,6 +192,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "comment".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
},
|
||||
(1..=3)
|
||||
.map(|i| {
|
||||
|
||||
@@ -425,7 +425,14 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
|
||||
let flow_type = value.flow_type.unwrap_or_default().to_string();
|
||||
options.insert("flow_type".to_string(), flow_type);
|
||||
|
||||
let flow_info = FlowInfoValue {
|
||||
let mut create_time = chrono::Utc::now();
|
||||
if let Some(prev_flow_value) = value.prev_flow_info_value.as_ref()
|
||||
&& value.task.or_replace
|
||||
{
|
||||
create_time = prev_flow_value.get_inner_ref().created_time;
|
||||
}
|
||||
|
||||
let flow_info: FlowInfoValue = FlowInfoValue {
|
||||
source_table_ids: value.source_table_ids.clone(),
|
||||
sink_table_name,
|
||||
flownode_ids,
|
||||
@@ -435,6 +442,8 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
|
||||
expire_after,
|
||||
comment,
|
||||
options,
|
||||
created_time: create_time,
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
|
||||
(flow_info, flow_routes)
|
||||
|
||||
@@ -685,7 +685,36 @@ pub enum Error {
|
||||
operation: String,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to execute via MySql, sql: {}", sql))]
|
||||
MySqlExecution {
|
||||
sql: String,
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to create connection pool for MySql"))]
|
||||
CreateMySqlPool {
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to {} MySql transaction", operation))]
|
||||
MySqlTransaction {
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
operation: String,
|
||||
},
|
||||
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
#[snafu(display("Rds transaction retry failed"))]
|
||||
RdsTransactionRetryFailed {
|
||||
#[snafu(implicit)]
|
||||
@@ -823,8 +852,13 @@ impl ErrorExt for Error {
|
||||
PostgresExecution { .. }
|
||||
| CreatePostgresPool { .. }
|
||||
| GetPostgresConnection { .. }
|
||||
| PostgresTransaction { .. }
|
||||
| RdsTransactionRetryFailed { .. } => StatusCode::Internal,
|
||||
| PostgresTransaction { .. } => StatusCode::Internal,
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
RdsTransactionRetryFailed { .. } => StatusCode::Internal,
|
||||
Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
@@ -835,16 +869,29 @@ impl ErrorExt for Error {
|
||||
}
|
||||
|
||||
impl Error {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
/// Check if the error is a serialization error.
|
||||
pub fn is_serialization_error(&self) -> bool {
|
||||
match self {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
Error::PostgresTransaction { error, .. } => {
|
||||
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
Error::PostgresExecution { error, .. } => {
|
||||
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
|
||||
}
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
Error::MySqlExecution {
|
||||
error: sqlx::Error::Database(database_error),
|
||||
..
|
||||
} => {
|
||||
matches!(
|
||||
database_error.message(),
|
||||
"Deadlock found when trying to get lock; try restarting transaction"
|
||||
| "can't serialize access for this transaction"
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -461,6 +461,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -632,6 +634,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
let err = flow_metadata_manager
|
||||
.create_flow_metadata(flow_id, flow_value, flow_routes.clone())
|
||||
@@ -869,6 +873,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
let err = flow_metadata_manager
|
||||
.update_flow_metadata(
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -131,6 +132,12 @@ pub struct FlowInfoValue {
|
||||
pub(crate) comment: String,
|
||||
/// The options.
|
||||
pub(crate) options: HashMap<String, String>,
|
||||
/// The created time
|
||||
#[serde(default)]
|
||||
pub(crate) created_time: DateTime<Utc>,
|
||||
/// The updated time.
|
||||
#[serde(default)]
|
||||
pub(crate) updated_time: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl FlowInfoValue {
|
||||
@@ -171,6 +178,14 @@ impl FlowInfoValue {
|
||||
pub fn options(&self) -> &HashMap<String, String> {
|
||||
&self.options
|
||||
}
|
||||
|
||||
pub fn created_time(&self) -> &DateTime<Utc> {
|
||||
&self.created_time
|
||||
}
|
||||
|
||||
pub fn updated_time(&self) -> &DateTime<Utc> {
|
||||
&self.updated_time
|
||||
}
|
||||
}
|
||||
|
||||
pub type FlowInfoManagerRef = Arc<FlowInfoManager>;
|
||||
|
||||
@@ -97,11 +97,19 @@ impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey {
|
||||
pub struct FlowStateValue {
|
||||
/// For each key, the bytes of the state in memory
|
||||
pub state_size: BTreeMap<FlowId, usize>,
|
||||
/// For each key, the last execution time of flow in unix timestamp milliseconds.
|
||||
pub last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
}
|
||||
|
||||
impl FlowStateValue {
|
||||
pub fn new(state_size: BTreeMap<FlowId, usize>) -> Self {
|
||||
Self { state_size }
|
||||
pub fn new(
|
||||
state_size: BTreeMap<FlowId, usize>,
|
||||
last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
) -> Self {
|
||||
Self {
|
||||
state_size,
|
||||
last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,12 +151,15 @@ impl FlowStateManager {
|
||||
pub struct FlowStat {
|
||||
/// For each key, the bytes of the state in memory
|
||||
pub state_size: BTreeMap<u32, usize>,
|
||||
/// For each key, the last execution time of flow in unix timestamp milliseconds.
|
||||
pub last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
}
|
||||
|
||||
impl From<FlowStateValue> for FlowStat {
|
||||
fn from(value: FlowStateValue) -> Self {
|
||||
Self {
|
||||
state_size: value.state_size,
|
||||
last_exec_time_map: value.last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,6 +168,7 @@ impl From<FlowStat> for FlowStateValue {
|
||||
fn from(value: FlowStat) -> Self {
|
||||
Self {
|
||||
state_size: value.state_size,
|
||||
last_exec_time_map: value.last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let meta = TableMetaBuilder::default()
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(Arc::new(schema))
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::rpc::KeyValue;
|
||||
pub mod chroot;
|
||||
pub mod etcd;
|
||||
pub mod memory;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(any(feature = "mysql_kvbackend", feature = "pg_kvbackend"))]
|
||||
pub mod rds;
|
||||
pub mod test;
|
||||
pub mod txn;
|
||||
|
||||
@@ -14,13 +14,11 @@
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use serde::Serializer;
|
||||
|
||||
use super::{KvBackendRef, ResettableKvBackend};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
|
||||
@@ -38,19 +36,6 @@ pub struct MemoryKvBackend<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Display for MemoryKvBackend<T> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
for (k, v) in kvs.iter() {
|
||||
f.serialize_str(&String::from_utf8_lossy(k))?;
|
||||
f.serialize_str(" -> ")?;
|
||||
f.serialize_str(&String::from_utf8_lossy(v))?;
|
||||
f.serialize_str("\n")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for MemoryKvBackend<T> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
||||
@@ -33,10 +33,16 @@ use crate::rpc::store::{
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
mod postgres;
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
pub use postgres::PgStore;
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
mod mysql;
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
pub use mysql::MySqlStore;
|
||||
|
||||
const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
|
||||
|
||||
/// Query executor for rds. It can execute queries or generate a transaction executor.
|
||||
@@ -106,6 +112,14 @@ impl<T: Executor> ExecutorImpl<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
|
||||
async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
|
||||
match self {
|
||||
Self::Default(executor) => executor.execute(query, params).await,
|
||||
Self::Txn(executor) => executor.execute(query, params).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn commit(self) -> Result<()> {
|
||||
match self {
|
||||
Self::Txn(executor) => executor.commit().await,
|
||||
|
||||
650
src/common/meta/src/kv_backend/rds/mysql.rs
Normal file
650
src/common/meta/src/kv_backend/rds/mysql.rs
Normal file
@@ -0,0 +1,650 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::ResultExt;
|
||||
use sqlx::mysql::MySqlRow;
|
||||
use sqlx::pool::Pool;
|
||||
use sqlx::{MySql, MySqlPool, Row, Transaction as MySqlTransaction};
|
||||
|
||||
use crate::error::{CreateMySqlPoolSnafu, MySqlExecutionSnafu, MySqlTransactionSnafu, Result};
|
||||
use crate::kv_backend::rds::{
|
||||
Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
|
||||
RDS_STORE_TXN_RETRY_COUNT,
|
||||
};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
type MySqlClient = Arc<Pool<MySql>>;
|
||||
pub struct MySqlTxnClient(MySqlTransaction<'static, MySql>);
|
||||
|
||||
fn key_value_from_row(row: MySqlRow) -> KeyValue {
|
||||
// Safety: key and value are the first two columns in the row
|
||||
KeyValue {
|
||||
key: row.get_unchecked(0),
|
||||
value: row.get_unchecked(1),
|
||||
}
|
||||
}
|
||||
|
||||
const EMPTY: &[u8] = &[0];
|
||||
|
||||
/// Type of range template.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum RangeTemplateType {
|
||||
Point,
|
||||
Range,
|
||||
Full,
|
||||
LeftBounded,
|
||||
Prefix,
|
||||
}
|
||||
|
||||
/// Builds params for the given range template type.
|
||||
impl RangeTemplateType {
|
||||
fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
|
||||
match self {
|
||||
RangeTemplateType::Point => vec![key],
|
||||
RangeTemplateType::Range => vec![key, range_end],
|
||||
RangeTemplateType::Full => vec![],
|
||||
RangeTemplateType::LeftBounded => vec![key],
|
||||
RangeTemplateType::Prefix => {
|
||||
key.push(b'%');
|
||||
vec![key]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Templates for range request.
|
||||
#[derive(Debug, Clone)]
|
||||
struct RangeTemplate {
|
||||
point: String,
|
||||
range: String,
|
||||
full: String,
|
||||
left_bounded: String,
|
||||
prefix: String,
|
||||
}
|
||||
|
||||
impl RangeTemplate {
|
||||
/// Gets the template for the given type.
|
||||
fn get(&self, typ: RangeTemplateType) -> &str {
|
||||
match typ {
|
||||
RangeTemplateType::Point => &self.point,
|
||||
RangeTemplateType::Range => &self.range,
|
||||
RangeTemplateType::Full => &self.full,
|
||||
RangeTemplateType::LeftBounded => &self.left_bounded,
|
||||
RangeTemplateType::Prefix => &self.prefix,
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds limit to the template.
|
||||
fn with_limit(template: &str, limit: i64) -> String {
|
||||
if limit == 0 {
|
||||
return format!("{};", template);
|
||||
}
|
||||
format!("{} LIMIT {};", template, limit)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
|
||||
if start.len() != end.len() {
|
||||
return false;
|
||||
}
|
||||
let l = start.len();
|
||||
let same_prefix = start[0..l - 1] == end[0..l - 1];
|
||||
if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
|
||||
return same_prefix && (*rhs + 1) == *lhs;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Determine the template type for range request.
|
||||
fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
|
||||
match (key, range_end) {
|
||||
(_, &[]) => RangeTemplateType::Point,
|
||||
(EMPTY, EMPTY) => RangeTemplateType::Full,
|
||||
(_, EMPTY) => RangeTemplateType::LeftBounded,
|
||||
(start, end) => {
|
||||
if is_prefix_range(start, end) {
|
||||
RangeTemplateType::Prefix
|
||||
} else {
|
||||
RangeTemplateType::Range
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate in placeholders for MySQL.
|
||||
fn mysql_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
|
||||
(from..=to).map(|_| "?".to_string()).collect()
|
||||
}
|
||||
|
||||
/// Factory for building sql templates.
|
||||
struct MySqlTemplateFactory<'a> {
|
||||
table_name: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> MySqlTemplateFactory<'a> {
|
||||
/// Creates a new [`SqlTemplateFactory`] with the given table name.
|
||||
fn new(table_name: &'a str) -> Self {
|
||||
Self { table_name }
|
||||
}
|
||||
|
||||
/// Builds the template set for the given table name.
|
||||
fn build(&self) -> MySqlTemplateSet {
|
||||
let table_name = self.table_name;
|
||||
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
|
||||
MySqlTemplateSet {
|
||||
table_name: table_name.to_string(),
|
||||
create_table_statement: format!(
|
||||
// Cannot be more than 3072 bytes in PRIMARY KEY
|
||||
"CREATE TABLE IF NOT EXISTS {table_name}(k VARBINARY(3072) PRIMARY KEY, v BLOB);",
|
||||
),
|
||||
range_template: RangeTemplate {
|
||||
point: format!("SELECT k, v FROM {table_name} WHERE k = ?"),
|
||||
range: format!("SELECT k, v FROM {table_name} WHERE k >= ? AND k < ? ORDER BY k"),
|
||||
full: format!("SELECT k, v FROM {table_name} ? ORDER BY k"),
|
||||
left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= ? ORDER BY k"),
|
||||
prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE ? ORDER BY k"),
|
||||
},
|
||||
delete_template: RangeTemplate {
|
||||
point: format!("DELETE FROM {table_name} WHERE k = ?;"),
|
||||
range: format!("DELETE FROM {table_name} WHERE k >= ? AND k < ?;"),
|
||||
full: format!("DELETE FROM {table_name}"),
|
||||
left_bounded: format!("DELETE FROM {table_name} WHERE k >= ?;"),
|
||||
prefix: format!("DELETE FROM {table_name} WHERE k LIKE ?;"),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Templates for the given table name.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MySqlTemplateSet {
|
||||
table_name: String,
|
||||
create_table_statement: String,
|
||||
range_template: RangeTemplate,
|
||||
delete_template: RangeTemplate,
|
||||
}
|
||||
|
||||
impl MySqlTemplateSet {
|
||||
/// Generates the sql for batch get.
|
||||
fn generate_batch_get_query(&self, key_len: usize) -> String {
|
||||
let table_name = &self.table_name;
|
||||
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
|
||||
format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
|
||||
}
|
||||
|
||||
/// Generates the sql for batch delete.
|
||||
fn generate_batch_delete_query(&self, key_len: usize) -> String {
|
||||
let table_name = &self.table_name;
|
||||
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
|
||||
format!("DELETE FROM {table_name} WHERE k in ({});", in_clause)
|
||||
}
|
||||
|
||||
/// Generates the sql for batch upsert.
|
||||
/// For MySQL, it also generates a select query to get the previous values.
|
||||
fn generate_batch_upsert_query(&self, kv_len: usize) -> (String, String) {
|
||||
let table_name = &self.table_name;
|
||||
let in_placeholders: Vec<String> = (1..=kv_len).map(|_| "?".to_string()).collect();
|
||||
let in_clause = in_placeholders.join(", ");
|
||||
let mut values_placeholders = Vec::new();
|
||||
for _ in 0..kv_len {
|
||||
values_placeholders.push("(?, ?)".to_string());
|
||||
}
|
||||
let values_clause = values_placeholders.join(", ");
|
||||
|
||||
(
|
||||
format!(r#"SELECT k, v FROM {table_name} WHERE k IN ({in_clause})"#,),
|
||||
format!(
|
||||
r#"INSERT INTO {table_name} (k, v) VALUES {values_clause} ON DUPLICATE KEY UPDATE v = VALUES(v);"#,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Executor for MySqlClient {
|
||||
type Transaction<'a>
|
||||
= MySqlTxnClient
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
fn name() -> &'static str {
|
||||
"MySql"
|
||||
}
|
||||
|
||||
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
let rows = query
|
||||
.fetch_all(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(rows.into_iter().map(key_value_from_row).collect())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
query
|
||||
.execute(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
|
||||
// sqlx has no isolation level support for now, so we have to set it manually.
|
||||
// TODO(CookiePie): Waiting for https://github.com/launchbadge/sqlx/pull/3614 and remove this.
|
||||
sqlx::query("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
|
||||
.execute(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu {
|
||||
sql: "SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE",
|
||||
})?;
|
||||
let txn = self
|
||||
.begin()
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: "begin" })?;
|
||||
Ok(MySqlTxnClient(txn))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Transaction<'_> for MySqlTxnClient {
|
||||
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
|
||||
let rows = query
|
||||
.fetch_all(&mut *(self.0))
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(rows.into_iter().map(key_value_from_row).collect())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
|
||||
query
|
||||
.execute(&mut *(self.0))
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Caution: sqlx will stuck on the query if two transactions conflict with each other.
|
||||
/// Don't know if it's a feature or it depends on the database. Be careful.
|
||||
async fn commit(self) -> Result<()> {
|
||||
self.0.commit().await.context(MySqlTransactionSnafu {
|
||||
operation: "commit",
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MySqlExecutorFactory {
|
||||
pool: Arc<Pool<MySql>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ExecutorFactory<MySqlClient> for MySqlExecutorFactory {
|
||||
async fn default_executor(&self) -> Result<MySqlClient> {
|
||||
Ok(self.pool.clone())
|
||||
}
|
||||
|
||||
async fn txn_executor<'a>(
|
||||
&self,
|
||||
default_executor: &'a mut MySqlClient,
|
||||
) -> Result<MySqlTxnClient> {
|
||||
default_executor.txn_executor().await
|
||||
}
|
||||
}
|
||||
|
||||
/// A MySQL-backed key-value store.
|
||||
/// It uses [sqlx::Pool<MySql>] as the connection pool for [RdsStore].
|
||||
pub type MySqlStore = RdsStore<MySqlClient, MySqlExecutorFactory, MySqlTemplateSet>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvQueryExecutor<MySqlClient> for MySqlStore {
|
||||
async fn range_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: RangeRequest,
|
||||
) -> Result<RangeResponse> {
|
||||
let template_type = range_template(&req.key, &req.range_end);
|
||||
let template = self.sql_template_set.range_template.get(template_type);
|
||||
let params = template_type.build_params(req.key, req.range_end);
|
||||
let params_ref = params.iter().collect::<Vec<_>>();
|
||||
// Always add 1 to limit to check if there is more data
|
||||
let query =
|
||||
RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
|
||||
let limit = req.limit as usize;
|
||||
debug!("query: {:?}, params: {:?}", query, params);
|
||||
let mut kvs = query_executor.query(&query, ¶ms_ref).await?;
|
||||
if req.keys_only {
|
||||
kvs.iter_mut().for_each(|kv| kv.value = vec![]);
|
||||
}
|
||||
// If limit is 0, we always return all data
|
||||
if limit == 0 || kvs.len() <= limit {
|
||||
return Ok(RangeResponse { kvs, more: false });
|
||||
}
|
||||
// If limit is greater than the number of rows, we remove the last row and set more to true
|
||||
let removed = kvs.pop();
|
||||
debug_assert!(removed.is_some());
|
||||
Ok(RangeResponse { kvs, more: true })
|
||||
}
|
||||
|
||||
async fn batch_put_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchPutRequest,
|
||||
) -> Result<BatchPutResponse> {
|
||||
let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
|
||||
let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
|
||||
|
||||
for kv in &req.kvs {
|
||||
let processed_key = &kv.key;
|
||||
in_params.push(processed_key);
|
||||
|
||||
let processed_value = &kv.value;
|
||||
values_params.push(processed_key);
|
||||
values_params.push(processed_value);
|
||||
}
|
||||
let in_params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let values_params = values_params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let (select, update) = self
|
||||
.sql_template_set
|
||||
.generate_batch_upsert_query(req.kvs.len());
|
||||
|
||||
// Fast path: if we don't need previous kvs, we can just upsert the keys.
|
||||
if !req.prev_kv {
|
||||
query_executor.execute(&update, &values_params).await?;
|
||||
return Ok(BatchPutResponse::default());
|
||||
}
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.batch_put_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
let prev_kvs = query_executor.query(&select, &in_params).await?;
|
||||
query_executor.execute(&update, &values_params).await?;
|
||||
Ok(BatchPutResponse { prev_kvs })
|
||||
}
|
||||
|
||||
async fn batch_get_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchGetRequest,
|
||||
) -> Result<BatchGetResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchGetResponse { kvs: vec![] });
|
||||
}
|
||||
let query = self
|
||||
.sql_template_set
|
||||
.generate_batch_get_query(req.keys.len());
|
||||
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let kvs = query_executor.query(&query, ¶ms).await?;
|
||||
Ok(BatchGetResponse { kvs })
|
||||
}
|
||||
|
||||
async fn delete_range_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: DeleteRangeRequest,
|
||||
) -> Result<DeleteRangeResponse> {
|
||||
// Since we need to know the number of deleted keys, we have no fast path here.
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.delete_range_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
let range_get_req = RangeRequest {
|
||||
key: req.key.clone(),
|
||||
range_end: req.range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
};
|
||||
let prev_kvs = self
|
||||
.range_with_query_executor(query_executor, range_get_req)
|
||||
.await?
|
||||
.kvs;
|
||||
let template_type = range_template(&req.key, &req.range_end);
|
||||
let template = self.sql_template_set.delete_template.get(template_type);
|
||||
let params = template_type.build_params(req.key, req.range_end);
|
||||
let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
query_executor.execute(template, ¶ms_ref).await?;
|
||||
let mut resp = DeleteRangeResponse::new(prev_kvs.len() as i64);
|
||||
if req.prev_kv {
|
||||
resp.with_prev_kvs(prev_kvs);
|
||||
}
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
async fn batch_delete_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchDeleteRequest,
|
||||
) -> Result<BatchDeleteResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchDeleteResponse::default());
|
||||
}
|
||||
let query = self
|
||||
.sql_template_set
|
||||
.generate_batch_delete_query(req.keys.len());
|
||||
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
// Fast path: if we don't need previous kvs, we can just delete the keys.
|
||||
if !req.prev_kv {
|
||||
query_executor.execute(&query, ¶ms).await?;
|
||||
return Ok(BatchDeleteResponse::default());
|
||||
}
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.batch_delete_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
// Should get previous kvs first
|
||||
let batch_get_req = BatchGetRequest {
|
||||
keys: req.keys.clone(),
|
||||
};
|
||||
let prev_kvs = self
|
||||
.batch_get_with_query_executor(query_executor, batch_get_req)
|
||||
.await?
|
||||
.kvs;
|
||||
// Pure `DELETE` has no return value, so we need to use `execute` instead of `query`.
|
||||
query_executor.execute(&query, ¶ms).await?;
|
||||
if req.prev_kv {
|
||||
Ok(BatchDeleteResponse { prev_kvs })
|
||||
} else {
|
||||
Ok(BatchDeleteResponse::default())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MySqlStore {
|
||||
/// Create [MySqlStore] impl of [KvBackendRef] from url.
|
||||
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
|
||||
let pool = MySqlPool::connect(url)
|
||||
.await
|
||||
.context(CreateMySqlPoolSnafu)?;
|
||||
Self::with_mysql_pool(pool, table_name, max_txn_ops).await
|
||||
}
|
||||
|
||||
/// Create [MySqlStore] impl of [KvBackendRef] from [sqlx::Pool<MySql>].
|
||||
pub async fn with_mysql_pool(
|
||||
pool: Pool<MySql>,
|
||||
table_name: &str,
|
||||
max_txn_ops: usize,
|
||||
) -> Result<KvBackendRef> {
|
||||
// This step ensures the mysql metadata backend is ready to use.
|
||||
// We check if greptime_metakv table exists, and we will create a new table
|
||||
// if it does not exist.
|
||||
let sql_template_set = MySqlTemplateFactory::new(table_name).build();
|
||||
sqlx::query(&sql_template_set.create_table_statement)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu {
|
||||
sql: sql_template_set.create_table_statement.to_string(),
|
||||
})?;
|
||||
Ok(Arc::new(MySqlStore {
|
||||
max_txn_ops,
|
||||
sql_template_set,
|
||||
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
|
||||
executor_factory: MySqlExecutorFactory {
|
||||
pool: Arc::new(pool),
|
||||
},
|
||||
_phantom: PhantomData,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::test::{
|
||||
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
|
||||
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
|
||||
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
|
||||
test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
|
||||
test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
|
||||
unprepare_kv,
|
||||
};
|
||||
|
||||
async fn build_mysql_kv_backend(table_name: &str) -> Option<MySqlStore> {
|
||||
init_default_ut_logging();
|
||||
let endpoints = std::env::var("GT_MYSQL_ENDPOINTS").unwrap_or_default();
|
||||
if endpoints.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let pool = MySqlPool::connect(&endpoints).await.unwrap();
|
||||
let sql_templates = MySqlTemplateFactory::new(table_name).build();
|
||||
sqlx::query(&sql_templates.create_table_statement)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
Some(MySqlStore {
|
||||
max_txn_ops: 128,
|
||||
sql_template_set: sql_templates,
|
||||
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
|
||||
executor_factory: MySqlExecutorFactory {
|
||||
pool: Arc::new(pool),
|
||||
},
|
||||
_phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_put() {
|
||||
let kv_backend = build_mysql_kv_backend("put_test").await.unwrap();
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range() {
|
||||
let kv_backend = build_mysql_kv_backend("range_test").await.unwrap();
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range_2() {
|
||||
let kv_backend = build_mysql_kv_backend("range2_test").await.unwrap();
|
||||
let prefix = b"range2/";
|
||||
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_get() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_get_test").await.unwrap();
|
||||
let prefix = b"batch_get/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_test").await.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete_with_prefix() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix_test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_delete_range() {
|
||||
let kv_backend = build_mysql_kv_backend("delete_range_test").await.unwrap();
|
||||
let prefix = b"delete_range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_compare_and_put() {
|
||||
let kv_backend = build_mysql_kv_backend("compare_and_put_test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"compare_and_put/";
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_txn() {
|
||||
let kv_backend = build_mysql_kv_backend("txn_test").await.unwrap();
|
||||
test_txn_one_compare_op(&kv_backend).await;
|
||||
text_txn_multi_compare_op(&kv_backend).await;
|
||||
test_txn_compare_equal(&kv_backend).await;
|
||||
test_txn_compare_greater(&kv_backend).await;
|
||||
test_txn_compare_less(&kv_backend).await;
|
||||
test_txn_compare_not_equal(&kv_backend).await;
|
||||
}
|
||||
}
|
||||
@@ -153,6 +153,7 @@ impl<'a> PgSqlTemplateFactory<'a> {
|
||||
/// Builds the template set for the given table name.
|
||||
fn build(&self) -> PgSqlTemplateSet {
|
||||
let table_name = self.table_name;
|
||||
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
|
||||
PgSqlTemplateSet {
|
||||
table_name: table_name.to_string(),
|
||||
create_table_statement: format!(
|
||||
|
||||
@@ -34,6 +34,24 @@ pub struct MigrateRegionRequest {
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
/// A request to add region follower.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AddRegionFollowerRequest {
|
||||
/// The region id to add follower.
|
||||
pub region_id: u64,
|
||||
/// The peer id to add follower.
|
||||
pub peer_id: u64,
|
||||
}
|
||||
|
||||
/// A request to remove region follower.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoveRegionFollowerRequest {
|
||||
/// The region id to remove follower.
|
||||
pub region_id: u64,
|
||||
/// The peer id to remove follower.
|
||||
pub peer_id: u64,
|
||||
}
|
||||
|
||||
/// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
|
||||
pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result<ProcedureId> {
|
||||
ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| {
|
||||
|
||||
@@ -26,6 +26,7 @@ use datafusion_common::cast::{as_boolean_array, as_null_array};
|
||||
use datafusion_common::{internal_err, DataFusionError, ScalarValue};
|
||||
use datatypes::arrow::array::{Array, BooleanArray, RecordBatch};
|
||||
use datatypes::arrow::compute::filter_record_batch;
|
||||
use datatypes::compute::or_kleene;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -47,6 +48,8 @@ pub struct SimpleFilterEvaluator {
|
||||
literal: Scalar<ArrayRef>,
|
||||
/// The operator.
|
||||
op: Operator,
|
||||
/// Only used when the operator is `Or`-chain.
|
||||
literal_list: Vec<Scalar<ArrayRef>>,
|
||||
}
|
||||
|
||||
impl SimpleFilterEvaluator {
|
||||
@@ -69,6 +72,7 @@ impl SimpleFilterEvaluator {
|
||||
column_name,
|
||||
literal: val.to_scalar().ok()?,
|
||||
op,
|
||||
literal_list: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
@@ -83,6 +87,35 @@ impl SimpleFilterEvaluator {
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq => {}
|
||||
Operator::Or => {
|
||||
let lhs = Self::try_new(&binary.left)?;
|
||||
let rhs = Self::try_new(&binary.right)?;
|
||||
if lhs.column_name != rhs.column_name
|
||||
|| !matches!(lhs.op, Operator::Eq | Operator::Or)
|
||||
|| !matches!(rhs.op, Operator::Eq | Operator::Or)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut list = vec![];
|
||||
let placeholder_literal = lhs.literal.clone();
|
||||
// above check guarantees the op is either `Eq` or `Or`
|
||||
if matches!(lhs.op, Operator::Or) {
|
||||
list.extend(lhs.literal_list);
|
||||
} else {
|
||||
list.push(lhs.literal);
|
||||
}
|
||||
if matches!(rhs.op, Operator::Or) {
|
||||
list.extend(rhs.literal_list);
|
||||
} else {
|
||||
list.push(rhs.literal);
|
||||
}
|
||||
return Some(Self {
|
||||
column_name: lhs.column_name,
|
||||
literal: placeholder_literal,
|
||||
op: Operator::Or,
|
||||
literal_list: list,
|
||||
});
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
|
||||
@@ -103,6 +136,7 @@ impl SimpleFilterEvaluator {
|
||||
column_name: lhs.name.clone(),
|
||||
literal,
|
||||
op,
|
||||
literal_list: vec![],
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
@@ -118,19 +152,19 @@ impl SimpleFilterEvaluator {
|
||||
let input = input
|
||||
.to_scalar()
|
||||
.with_context(|_| ToArrowScalarSnafu { v: input.clone() })?;
|
||||
let result = self.evaluate_datum(&input)?;
|
||||
let result = self.evaluate_datum(&input, 1)?;
|
||||
Ok(result.value(0))
|
||||
}
|
||||
|
||||
pub fn evaluate_array(&self, input: &ArrayRef) -> Result<BooleanBuffer> {
|
||||
self.evaluate_datum(input)
|
||||
self.evaluate_datum(input, input.len())
|
||||
}
|
||||
|
||||
pub fn evaluate_vector(&self, input: &VectorRef) -> Result<BooleanBuffer> {
|
||||
self.evaluate_datum(&input.to_arrow_array())
|
||||
self.evaluate_datum(&input.to_arrow_array(), input.len())
|
||||
}
|
||||
|
||||
fn evaluate_datum(&self, input: &impl Datum) -> Result<BooleanBuffer> {
|
||||
fn evaluate_datum(&self, input: &impl Datum, input_len: usize) -> Result<BooleanBuffer> {
|
||||
let result = match self.op {
|
||||
Operator::Eq => cmp::eq(input, &self.literal),
|
||||
Operator::NotEq => cmp::neq(input, &self.literal),
|
||||
@@ -138,6 +172,15 @@ impl SimpleFilterEvaluator {
|
||||
Operator::LtEq => cmp::lt_eq(input, &self.literal),
|
||||
Operator::Gt => cmp::gt(input, &self.literal),
|
||||
Operator::GtEq => cmp::gt_eq(input, &self.literal),
|
||||
Operator::Or => {
|
||||
// OR operator stands for OR-chained EQs (or INLIST in other words)
|
||||
let mut result: BooleanArray = vec![false; input_len].into();
|
||||
for literal in &self.literal_list {
|
||||
let rhs = cmp::eq(input, literal).context(ArrowComputeSnafu)?;
|
||||
result = or_kleene(&result, &rhs).context(ArrowComputeSnafu)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedOperationSnafu {
|
||||
reason: format!("{:?}", self.op),
|
||||
@@ -349,4 +392,49 @@ mod test {
|
||||
let expected = datatypes::arrow::array::Int32Array::from(vec![5, 6]);
|
||||
assert_eq!(first_column_values, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_filter_expression() {
|
||||
// Create an expression tree for: col = 'B' OR col = 'C' OR col = 'D'
|
||||
let col_eq_b = col("col").eq(lit("B"));
|
||||
let col_eq_c = col("col").eq(lit("C"));
|
||||
let col_eq_d = col("col").eq(lit("D"));
|
||||
|
||||
// Build the OR chain
|
||||
let col_or_expr = col_eq_b.or(col_eq_c).or(col_eq_d);
|
||||
|
||||
// Check that SimpleFilterEvaluator can handle OR chain
|
||||
let or_evaluator = SimpleFilterEvaluator::try_new(&col_or_expr).unwrap();
|
||||
assert_eq!(or_evaluator.column_name, "col");
|
||||
assert_eq!(or_evaluator.op, Operator::Or);
|
||||
assert_eq!(or_evaluator.literal_list.len(), 3);
|
||||
assert_eq!(format!("{:?}", or_evaluator.literal_list), "[Scalar(StringArray\n[\n \"B\",\n]), Scalar(StringArray\n[\n \"C\",\n]), Scalar(StringArray\n[\n \"D\",\n])]");
|
||||
|
||||
// Create a schema and batch for testing
|
||||
let schema = Schema::new(vec![Field::new("col", DataType::Utf8, false)]);
|
||||
let df_schema = DFSchema::try_from(schema.clone()).unwrap();
|
||||
let props = ExecutionProps::new();
|
||||
let physical_expr = create_physical_expr(&col_or_expr, &df_schema, &props).unwrap();
|
||||
|
||||
// Create test data
|
||||
let col_data = Arc::new(datatypes::arrow::array::StringArray::from(vec![
|
||||
"B", "C", "E", "B", "C", "D", "F",
|
||||
]));
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![col_data]).unwrap();
|
||||
let expected = datatypes::arrow::array::StringArray::from(vec!["B", "C", "B", "C", "D"]);
|
||||
|
||||
// Filter the batch
|
||||
let filtered_batch = batch_filter(&batch, &physical_expr).unwrap();
|
||||
|
||||
// Expected: rows with col in ("B", "C", "D")
|
||||
// That would be rows 0, 1, 3, 4, 5
|
||||
assert_eq!(filtered_batch.num_rows(), 5);
|
||||
|
||||
let col_filtered = filtered_batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<datatypes::arrow::array::StringArray>()
|
||||
.unwrap();
|
||||
assert_eq!(col_filtered, &expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,407 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter, Write};
|
||||
|
||||
use chrono::{
|
||||
Days, LocalResult, Months, NaiveDateTime, TimeDelta, TimeZone as ChronoTimeZone, Utc,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{InvalidDateStrSnafu, Result};
|
||||
use crate::interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
|
||||
use crate::timezone::{get_timezone, Timezone};
|
||||
use crate::util::{datetime_to_utc, format_utc_datetime};
|
||||
use crate::Date;
|
||||
|
||||
const DATETIME_FORMAT: &str = "%F %H:%M:%S%.f";
|
||||
const DATETIME_FORMAT_WITH_TZ: &str = "%F %H:%M:%S%.f%z";
|
||||
|
||||
/// [DateTime] represents the **milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch)**.
|
||||
#[derive(
|
||||
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize,
|
||||
)]
|
||||
pub struct DateTime(i64);
|
||||
|
||||
impl Display for DateTime {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(abs_time) = chrono::DateTime::from_timestamp_millis(self.0) {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
format_utc_datetime(&abs_time.naive_utc(), DATETIME_FORMAT_WITH_TZ)
|
||||
)
|
||||
} else {
|
||||
write!(f, "DateTime({})", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateTime> for serde_json::Value {
|
||||
fn from(d: DateTime) -> Self {
|
||||
serde_json::Value::String(d.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NaiveDateTime> for DateTime {
|
||||
fn from(value: NaiveDateTime) -> Self {
|
||||
DateTime::from(value.and_utc().timestamp_millis())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for DateTime {
|
||||
fn from(v: i64) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Date> for DateTime {
|
||||
fn from(value: Date) -> Self {
|
||||
// It's safe, i32 * 86400000 won't be overflow
|
||||
Self(value.to_secs() * 1000)
|
||||
}
|
||||
}
|
||||
|
||||
impl DateTime {
|
||||
/// Try parsing a string into [`DateTime`] with the system timezone.
|
||||
/// See `DateTime::from_str`.
|
||||
pub fn from_str_system(s: &str) -> Result<Self> {
|
||||
Self::from_str(s, None)
|
||||
}
|
||||
|
||||
/// Try parsing a string into [`DateTime`] with the given timezone.
|
||||
/// Supported format:
|
||||
/// - RFC3339 in the naive UTC timezone.
|
||||
/// - `%F %T` with the given timezone
|
||||
/// - `%F %T%z` with the timezone in string
|
||||
pub fn from_str(s: &str, timezone: Option<&Timezone>) -> Result<Self> {
|
||||
let s = s.trim();
|
||||
let timestamp_millis = if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
|
||||
dt.naive_utc().and_utc().timestamp_millis()
|
||||
} else if let Ok(d) = NaiveDateTime::parse_from_str(s, DATETIME_FORMAT) {
|
||||
match datetime_to_utc(&d, get_timezone(timezone)) {
|
||||
LocalResult::None => {
|
||||
return InvalidDateStrSnafu { raw: s }.fail();
|
||||
}
|
||||
LocalResult::Single(t) | LocalResult::Ambiguous(t, _) => {
|
||||
t.and_utc().timestamp_millis()
|
||||
}
|
||||
}
|
||||
} else if let Ok(v) = chrono::DateTime::parse_from_str(s, DATETIME_FORMAT_WITH_TZ) {
|
||||
v.timestamp_millis()
|
||||
} else {
|
||||
return InvalidDateStrSnafu { raw: s }.fail();
|
||||
};
|
||||
|
||||
Ok(Self(timestamp_millis))
|
||||
}
|
||||
|
||||
/// Create a new [DateTime] from milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
|
||||
pub fn new(millis: i64) -> Self {
|
||||
Self(millis)
|
||||
}
|
||||
|
||||
/// Get the milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
|
||||
pub fn val(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Convert to [NaiveDateTime].
|
||||
pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
|
||||
chrono::DateTime::from_timestamp_millis(self.0).map(|x| x.naive_utc())
|
||||
}
|
||||
|
||||
/// Format DateTime for given format and timezone.
|
||||
/// If `tz==None`, the server default timezone will used.
|
||||
pub fn as_formatted_string(
|
||||
self,
|
||||
pattern: &str,
|
||||
timezone: Option<&Timezone>,
|
||||
) -> Result<Option<String>> {
|
||||
if let Some(v) = self.to_chrono_datetime() {
|
||||
let mut formatted = String::new();
|
||||
|
||||
match get_timezone(timezone) {
|
||||
Timezone::Offset(offset) => {
|
||||
write!(
|
||||
formatted,
|
||||
"{}",
|
||||
offset.from_utc_datetime(&v).format(pattern)
|
||||
)
|
||||
.context(crate::error::FormatSnafu { pattern })?;
|
||||
}
|
||||
Timezone::Named(tz) => {
|
||||
write!(formatted, "{}", tz.from_utc_datetime(&v).format(pattern))
|
||||
.context(crate::error::FormatSnafu { pattern })?;
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(Some(formatted));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn to_chrono_datetime_with_timezone(&self, tz: Option<&Timezone>) -> Option<NaiveDateTime> {
|
||||
let datetime = self.to_chrono_datetime();
|
||||
datetime.map(|v| match tz {
|
||||
Some(Timezone::Offset(offset)) => offset.from_utc_datetime(&v).naive_local(),
|
||||
Some(Timezone::Named(tz)) => tz.from_utc_datetime(&v).naive_local(),
|
||||
None => Utc.from_utc_datetime(&v).naive_local(),
|
||||
})
|
||||
}
|
||||
|
||||
// FIXME(yingwen): remove add/sub intervals later
|
||||
/// Adds given [IntervalYearMonth] to the current datetime.
|
||||
pub fn add_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_months(Months::new(interval.months as u32))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Adds given [IntervalDayTime] to the current datetime.
|
||||
pub fn add_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_days(Days::new(interval.days as u64))?
|
||||
.checked_add_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Adds given [IntervalMonthDayNano] to the current datetime.
|
||||
pub fn add_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_months(Months::new(interval.months as u32))?
|
||||
.checked_add_days(Days::new(interval.days as u64))?
|
||||
.checked_add_signed(TimeDelta::nanoseconds(interval.nanoseconds))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalYearMonth] to the current datetime.
|
||||
pub fn sub_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_months(Months::new(interval.months as u32))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalDayTime] to the current datetime.
|
||||
pub fn sub_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_days(Days::new(interval.days as u64))?
|
||||
.checked_sub_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalMonthDayNano] to the current datetime.
|
||||
pub fn sub_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_months(Months::new(interval.months as u32))?
|
||||
.checked_sub_days(Days::new(interval.days as u64))?
|
||||
.checked_sub_signed(TimeDelta::nanoseconds(interval.nanoseconds))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Convert to [common_time::date].
|
||||
pub fn to_date(&self) -> Option<Date> {
|
||||
self.to_chrono_datetime().map(|d| Date::from(d.date()))
|
||||
}
|
||||
|
||||
pub fn negative(&self) -> Self {
|
||||
Self(-self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::timezone::set_default_timezone;
|
||||
|
||||
#[test]
|
||||
pub fn test_new_date_time() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!("1970-01-01 08:00:00+0800", DateTime::new(0).to_string());
|
||||
assert_eq!("1970-01-01 08:00:01+0800", DateTime::new(1000).to_string());
|
||||
assert_eq!("1970-01-01 07:59:59+0800", DateTime::new(-1000).to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_from_string() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let time = "1970-01-01 00:00:00+0800";
|
||||
let dt = DateTime::from_str(time, None).unwrap();
|
||||
assert_eq!(time, &dt.to_string());
|
||||
let dt = DateTime::from_str(" 1970-01-01 00:00:00+0800 ", None).unwrap();
|
||||
assert_eq!(time, &dt.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_from() {
|
||||
let d: DateTime = 42.into();
|
||||
assert_eq!(42, d.val());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_sub_interval() {
|
||||
let datetime = DateTime::new(1000);
|
||||
|
||||
let interval = IntervalDayTime::new(1, 200);
|
||||
|
||||
let new_datetime = datetime.add_day_time(interval).unwrap();
|
||||
assert_eq!(new_datetime.val(), 1000 + 3600 * 24 * 1000 + 200);
|
||||
|
||||
assert_eq!(datetime, new_datetime.sub_day_time(interval).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_date_time() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!(
|
||||
-28800000,
|
||||
DateTime::from_str("1970-01-01 00:00:00", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
0,
|
||||
DateTime::from_str("1970-01-01 08:00:00", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
42,
|
||||
DateTime::from_str("1970-01-01 08:00:00.042", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
42,
|
||||
DateTime::from_str("1970-01-01 08:00:00.042424", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
0,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 08:00:00",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
-28800000,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 00:00:00",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
28800000,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 00:00:00",
|
||||
Some(&Timezone::from_tz_string("-8:00").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_date_time_with_tz() {
|
||||
let ts = DateTime::from_str("1970-01-01 08:00:00+0000", None)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(28800000, ts);
|
||||
let ts = DateTime::from_str("1970-01-01 00:00:00.042+0000", None)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(42, ts);
|
||||
|
||||
// the string has the time zone info, the argument doesn't change the result
|
||||
let ts = DateTime::from_str(
|
||||
"1970-01-01 08:00:00+0000",
|
||||
Some(&Timezone::from_tz_string("-8:00").unwrap()),
|
||||
)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(28800000, ts);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_formatted_string() {
|
||||
let d: DateTime = DateTime::new(1000);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01",
|
||||
d.as_formatted_string("%Y-%m-%d", None).unwrap().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:01",
|
||||
d.as_formatted_string("%Y-%m-%d %H:%M:%S", None)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
"1970-01-01T00:00:01:000",
|
||||
d.as_formatted_string("%Y-%m-%dT%H:%M:%S:%3f", None)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01T08:00:01:000",
|
||||
d.as_formatted_string(
|
||||
"%Y-%m-%dT%H:%M:%S:%3f",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_max_date() {
|
||||
let date = Date::new(i32::MAX);
|
||||
let datetime = DateTime::from(date);
|
||||
assert_eq!(datetime.val(), 185542587100800000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_conversion_between_datetime_and_chrono_datetime() {
|
||||
let cases = [1, 10, 100, 1000, 100000];
|
||||
for case in cases {
|
||||
let dt = DateTime::new(case);
|
||||
let ndt = dt.to_chrono_datetime().unwrap();
|
||||
let dt2 = DateTime::from(ndt);
|
||||
assert_eq!(dt, dt2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod date;
|
||||
pub mod datetime;
|
||||
pub mod duration;
|
||||
pub mod error;
|
||||
pub mod interval;
|
||||
@@ -26,7 +25,6 @@ pub mod ttl;
|
||||
pub mod util;
|
||||
|
||||
pub use date::Date;
|
||||
pub use datetime::DateTime;
|
||||
pub use duration::Duration;
|
||||
pub use interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
|
||||
pub use range::RangeMillis;
|
||||
|
||||
@@ -171,6 +171,10 @@ pub struct S3Config {
|
||||
pub secret_access_key: SecretString,
|
||||
pub endpoint: Option<String>,
|
||||
pub region: Option<String>,
|
||||
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
|
||||
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
|
||||
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
|
||||
pub enable_virtual_host_style: bool,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
@@ -185,6 +189,7 @@ impl PartialEq for S3Config {
|
||||
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.region == other.region
|
||||
&& self.enable_virtual_host_style == other.enable_virtual_host_style
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
@@ -289,6 +294,7 @@ impl Default for S3Config {
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
secret_access_key: SecretString::from(String::default()),
|
||||
enable_virtual_host_style: false,
|
||||
endpoint: Option::default(),
|
||||
region: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
|
||||
@@ -41,10 +41,13 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
|
||||
|
||||
if s3_config.endpoint.is_some() {
|
||||
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
};
|
||||
}
|
||||
if s3_config.region.is_some() {
|
||||
builder = builder.region(s3_config.region.as_ref().unwrap());
|
||||
};
|
||||
}
|
||||
if s3_config.enable_virtual_host_style {
|
||||
builder = builder.enable_virtual_host_style();
|
||||
}
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
|
||||
@@ -30,13 +30,13 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
|
||||
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
|
||||
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
|
||||
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
|
||||
BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
|
||||
DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
|
||||
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
|
||||
IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
|
||||
StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType,
|
||||
TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type, VectorType,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
@@ -68,7 +68,6 @@ pub enum ConcreteDataType {
|
||||
|
||||
// Date and time types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
Time(TimeType),
|
||||
|
||||
@@ -107,7 +106,6 @@ impl fmt::Display for ConcreteDataType {
|
||||
ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::String(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::DateTime(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Timestamp(t) => match t {
|
||||
TimestampType::Second(v) => write!(f, "{}", v.name()),
|
||||
TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
|
||||
@@ -163,7 +161,6 @@ impl ConcreteDataType {
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
| ConcreteDataType::Time(_)
|
||||
| ConcreteDataType::Interval(_)
|
||||
@@ -183,7 +180,6 @@ impl ConcreteDataType {
|
||||
| ConcreteDataType::Int32(_)
|
||||
| ConcreteDataType::Int64(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
| ConcreteDataType::Time(_)
|
||||
| ConcreteDataType::Interval(_)
|
||||
@@ -385,7 +381,7 @@ impl ConcreteDataType {
|
||||
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
|
||||
&ConcreteDataType::String(_) => "VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
|
||||
&ConcreteDataType::Timestamp(_) => "TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "TIME",
|
||||
&ConcreteDataType::Interval(_) => "INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "NUMERIC",
|
||||
@@ -402,7 +398,7 @@ impl ConcreteDataType {
|
||||
&ConcreteDataType::Binary(_) => "_BYTEA",
|
||||
&ConcreteDataType::String(_) => "_VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "_DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
|
||||
&ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "_TIME",
|
||||
&ConcreteDataType::Interval(_) => "_INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "_NUMERIC",
|
||||
@@ -441,7 +437,6 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
ArrowDataType::Float32 => Self::float32_datatype(),
|
||||
ArrowDataType::Float64 => Self::float64_datatype(),
|
||||
ArrowDataType::Date32 => Self::date_datatype(),
|
||||
ArrowDataType::Date64 => Self::datetime_datatype(),
|
||||
ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
|
||||
ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
@@ -490,7 +485,7 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String, Json
|
||||
Binary, Date, String, Json
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
@@ -814,7 +809,6 @@ mod tests {
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::binary_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
@@ -843,7 +837,6 @@ mod tests {
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
@@ -878,7 +871,6 @@ mod tests {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
@@ -23,8 +23,8 @@ use crate::types::{
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, ListVector,
|
||||
MutableVector, PrimitiveVector, StringVector, Vector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
@@ -302,27 +302,6 @@ impl ScalarRef<'_> for Decimal128 {
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type RefType<'a> = DateTime;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarRef<'_> for DateTime {
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
@@ -428,13 +407,6 @@ mod tests {
|
||||
assert_eq!(decimal, decimal.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_scalar() {
|
||||
let dt = DateTime::new(123);
|
||||
assert_eq!(dt, dt.as_scalar_ref());
|
||||
assert_eq!(dt, dt.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_scalar() {
|
||||
let list_value =
|
||||
|
||||
@@ -40,9 +40,6 @@ pub enum LogicalTypeId {
|
||||
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
|
||||
/// in days (32 bits).
|
||||
Date,
|
||||
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
@@ -100,7 +97,6 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::String => ConcreteDataType::string_datatype(),
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
|
||||
@@ -16,7 +16,6 @@ mod binary_type;
|
||||
mod boolean_type;
|
||||
pub mod cast;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
@@ -34,7 +33,6 @@ pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use cast::{cast, cast_with_opt};
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use decimal_type::Decimal128Type;
|
||||
pub use dictionary_type::DictionaryType;
|
||||
pub use duration_type::{
|
||||
|
||||
@@ -119,10 +119,6 @@ pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool {
|
||||
(Date(_), Int32(_) | Timestamp(_) | String(_)) => true,
|
||||
(Int32(_) | String(_) | Timestamp(_), Date(_)) => true,
|
||||
(Date(_), Date(_)) => true,
|
||||
// DateTime type
|
||||
(DateTime(_), Int64(_) | Timestamp(_) | String(_)) => true,
|
||||
(Int64(_) | Timestamp(_) | String(_), DateTime(_)) => true,
|
||||
(DateTime(_), DateTime(_)) => true,
|
||||
// Timestamp type
|
||||
(Timestamp(_), Int64(_) | String(_)) => true,
|
||||
(Int64(_) | String(_), Timestamp(_)) => true,
|
||||
@@ -175,7 +171,7 @@ mod tests {
|
||||
use common_base::bytes::StringBytes;
|
||||
use common_time::time::Time;
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use ordered_float::OrderedFloat;
|
||||
|
||||
use super::*;
|
||||
@@ -274,7 +270,6 @@ mod tests {
|
||||
null_datatype,
|
||||
boolean_datatype,
|
||||
date_datatype,
|
||||
datetime_datatype,
|
||||
timestamp_second_datatype,
|
||||
binary_datatype
|
||||
);
|
||||
@@ -287,23 +282,12 @@ mod tests {
|
||||
timestamp_second_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
// datetime -> other types
|
||||
test_can_cast!(
|
||||
Value::DateTime(DateTime::from_str_system("2021-01-01 00:00:00").unwrap()),
|
||||
null_datatype,
|
||||
int64_datatype,
|
||||
timestamp_second_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
// timestamp -> other types
|
||||
test_can_cast!(
|
||||
Value::Timestamp(Timestamp::from_str_utc("2021-01-01 00:00:00").unwrap()),
|
||||
null_datatype,
|
||||
int64_datatype,
|
||||
date_datatype,
|
||||
datetime_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ impl DataType for DateType {
|
||||
Value::Int32(v) => Some(Value::Date(Date::from(v))),
|
||||
Value::String(v) => Date::from_str_utc(v.as_utf8()).map(Value::Date).ok(),
|
||||
Value::Timestamp(v) => v.to_chrono_date().map(|date| Value::Date(date.into())),
|
||||
Value::DateTime(v) => Some(Value::DateTime(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Date64Type};
|
||||
use common_time::DateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector};
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::{DateTimeVector, DateTimeVectorBuilder, PrimitiveVector};
|
||||
|
||||
const MILLISECOND_VARIATION: u64 = 3;
|
||||
/// Data type for [`DateTime`].
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct DateTimeType;
|
||||
|
||||
impl DateTimeType {
|
||||
pub fn precision(&self) -> u64 {
|
||||
MILLISECOND_VARIATION
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for DateTimeType {
|
||||
fn name(&self) -> String {
|
||||
"DateTime".to_string()
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::DateTime
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::DateTime(DateTime::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
match from {
|
||||
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),
|
||||
Value::Timestamp(v) => v.to_chrono_datetime().map(|d| Value::DateTime(d.into())),
|
||||
Value::String(v) => DateTime::from_str_system(v.as_utf8())
|
||||
.map(Value::DateTime)
|
||||
.ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalPrimitiveType for DateTimeType {
|
||||
type ArrowPrimitive = Date64Type;
|
||||
type Native = i64;
|
||||
type Wrapper = DateTime;
|
||||
type LargestType = Self;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
"DateTime"
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to DateTimeVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::DateTime(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {other:?} to DateTime"),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::Timestamp;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_cast() {
|
||||
// cast from Int64
|
||||
let val = Value::Int64(1000);
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(dt, Value::DateTime(DateTime::from(1000)));
|
||||
|
||||
// cast from String
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let val = Value::String("1970-01-01 00:00:00+0800".into());
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(
|
||||
dt,
|
||||
Value::DateTime(DateTime::from_str_system("1970-01-01 00:00:00+0800").unwrap())
|
||||
);
|
||||
|
||||
// cast from Timestamp
|
||||
let val = Value::Timestamp(Timestamp::from_str_utc("2020-09-08 21:42:29+0800").unwrap());
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(
|
||||
dt,
|
||||
Value::DateTime(DateTime::from_str_system("2020-09-08 21:42:29+0800").unwrap())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,7 @@ use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
@@ -25,7 +25,7 @@ use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::boolean_type::bool_to_numeric;
|
||||
use crate::types::{DateTimeType, DateType};
|
||||
use crate::types::DateType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
@@ -157,19 +157,6 @@ impl WrapperType for Date {
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for DateTime {
|
||||
type LogicalType = DateTimeType;
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
DateTime::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_logical_primitive_type {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident) => {
|
||||
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
|
||||
@@ -362,7 +349,6 @@ impl DataType for Int64Type {
|
||||
Value::Float32(v) => num::cast::cast(v).map(Value::Int64),
|
||||
Value::Float64(v) => num::cast::cast(v).map(Value::Int64),
|
||||
Value::String(v) => v.as_utf8().parse::<i64>().map(Value::Int64).ok(),
|
||||
Value::DateTime(v) => Some(Value::Int64(v.val())),
|
||||
Value::Timestamp(v) => Some(Value::Int64(v.value())),
|
||||
Value::Time(v) => Some(Value::Int64(v.value())),
|
||||
// We don't allow casting interval type to int.
|
||||
|
||||
@@ -75,7 +75,6 @@ impl DataType for StringType {
|
||||
Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::String(v) => Some(Value::String(v)),
|
||||
Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::DateTime(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
|
||||
Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
|
||||
Value::IntervalYearMonth(v) => {
|
||||
|
||||
@@ -132,7 +132,6 @@ macro_rules! impl_data_type_for_timestamp {
|
||||
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
Value::String(v) => Timestamp::from_str_utc(v.as_utf8()).map(Value::Timestamp).ok(),
|
||||
Value::Int64(v) => Some(Value::Timestamp(Timestamp::new(v, TimeUnit::$unit))),
|
||||
Value::DateTime(v) => Timestamp::new_second(v.val()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
Value::Date(v) => Timestamp::new_second(v.to_secs()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
_ => None
|
||||
}
|
||||
@@ -202,7 +201,7 @@ impl_data_type_for_timestamp!(Microsecond);
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -249,13 +248,6 @@ mod tests {
|
||||
.unwrap();
|
||||
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1694589525)));
|
||||
|
||||
// Datetime -> TimestampSecond
|
||||
let dt = Value::DateTime(DateTime::from(1234567));
|
||||
let ts = ConcreteDataType::timestamp_second_datatype()
|
||||
.try_cast(dt)
|
||||
.unwrap();
|
||||
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1234567)));
|
||||
|
||||
// Date -> TimestampMillisecond
|
||||
let d = Value::Date(Date::from_str_utc("1970-01-01").unwrap());
|
||||
let ts = ConcreteDataType::timestamp_millisecond_datatype()
|
||||
|
||||
@@ -24,7 +24,6 @@ use common_base::bytes::{Bytes, StringBytes};
|
||||
use common_decimal::Decimal128;
|
||||
use common_telemetry::error;
|
||||
use common_time::date::Date;
|
||||
use common_time::datetime::DateTime;
|
||||
use common_time::interval::IntervalUnit;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
@@ -75,7 +74,6 @@ pub enum Value {
|
||||
|
||||
// Date & Time types:
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
Timestamp(Timestamp),
|
||||
Time(Time),
|
||||
Duration(Duration),
|
||||
@@ -112,7 +110,6 @@ impl Display for Value {
|
||||
write!(f, "{hex}")
|
||||
}
|
||||
Value::Date(v) => write!(f, "{v}"),
|
||||
Value::DateTime(v) => write!(f, "{v}"),
|
||||
Value::Timestamp(v) => write!(f, "{}", v.to_iso8601_string()),
|
||||
Value::Time(t) => write!(f, "{}", t.to_iso8601_string()),
|
||||
Value::IntervalYearMonth(v) => {
|
||||
@@ -162,7 +159,6 @@ macro_rules! define_data_type_func {
|
||||
$struct::String(_) => ConcreteDataType::string_datatype(),
|
||||
$struct::Binary(_) => ConcreteDataType::binary_datatype(),
|
||||
$struct::Date(_) => ConcreteDataType::date_datatype(),
|
||||
$struct::DateTime(_) => ConcreteDataType::datetime_datatype(),
|
||||
$struct::Time(t) => ConcreteDataType::time_datatype(*t.unit()),
|
||||
$struct::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()),
|
||||
$struct::IntervalYearMonth(_) => {
|
||||
@@ -222,7 +218,6 @@ impl Value {
|
||||
Value::String(v) => ValueRef::String(v.as_utf8()),
|
||||
Value::Binary(v) => ValueRef::Binary(v),
|
||||
Value::Date(v) => ValueRef::Date(*v),
|
||||
Value::DateTime(v) => ValueRef::DateTime(*v),
|
||||
Value::List(v) => ValueRef::List(ListValueRef::Ref { val: v }),
|
||||
Value::Timestamp(v) => ValueRef::Timestamp(*v),
|
||||
Value::Time(v) => ValueRef::Time(*v),
|
||||
@@ -258,14 +253,6 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast Value to DateTime. Return None if value is not a valid datetime data type.
|
||||
pub fn as_datetime(&self) -> Option<DateTime> {
|
||||
match self {
|
||||
Value::DateTime(t) => Some(*t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast Value to [Time]. Return None if value is not a valid time data type.
|
||||
pub fn as_time(&self) -> Option<Time> {
|
||||
match self {
|
||||
@@ -345,7 +332,6 @@ impl Value {
|
||||
Value::Binary(_) => LogicalTypeId::Binary,
|
||||
Value::List(_) => LogicalTypeId::List,
|
||||
Value::Date(_) => LogicalTypeId::Date,
|
||||
Value::DateTime(_) => LogicalTypeId::DateTime,
|
||||
Value::Timestamp(t) => match t.unit() {
|
||||
TimeUnit::Second => LogicalTypeId::TimestampSecond,
|
||||
TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond,
|
||||
@@ -401,7 +387,6 @@ impl Value {
|
||||
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())),
|
||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
||||
Value::Null => to_null_scalar_value(output_type)?,
|
||||
Value::List(list) => {
|
||||
// Safety: The logical type of the value and output_type are the same.
|
||||
@@ -463,7 +448,6 @@ impl Value {
|
||||
Value::Float64(x) => Some(Value::Float64(-*x)),
|
||||
Value::Decimal128(x) => Some(Value::Decimal128(x.negative())),
|
||||
Value::Date(x) => Some(Value::Date(x.negative())),
|
||||
Value::DateTime(x) => Some(Value::DateTime(x.negative())),
|
||||
Value::Timestamp(x) => Some(Value::Timestamp(x.negative())),
|
||||
Value::Time(x) => Some(Value::Time(x.negative())),
|
||||
Value::Duration(x) => Some(Value::Duration(x.negative())),
|
||||
@@ -525,7 +509,6 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
|
||||
}
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
|
||||
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
|
||||
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None),
|
||||
ConcreteDataType::Interval(v) => match v {
|
||||
IntervalType::YearMonth(_) => ScalarValue::IntervalYearMonth(None),
|
||||
@@ -631,7 +614,6 @@ macro_rules! impl_ord_for_value_like {
|
||||
($Type::String(v1), $Type::String(v2)) => v1.cmp(v2),
|
||||
($Type::Binary(v1), $Type::Binary(v2)) => v1.cmp(v2),
|
||||
($Type::Date(v1), $Type::Date(v2)) => v1.cmp(v2),
|
||||
($Type::DateTime(v1), $Type::DateTime(v2)) => v1.cmp(v2),
|
||||
($Type::Timestamp(v1), $Type::Timestamp(v2)) => v1.cmp(v2),
|
||||
($Type::Time(v1), $Type::Time(v2)) => v1.cmp(v2),
|
||||
($Type::IntervalYearMonth(v1), $Type::IntervalYearMonth(v2)) => v1.cmp(v2),
|
||||
@@ -712,7 +694,6 @@ impl_try_from_value!(String, StringBytes);
|
||||
impl_try_from_value!(Binary, Bytes);
|
||||
impl_try_from_value!(Date, Date);
|
||||
impl_try_from_value!(Time, Time);
|
||||
impl_try_from_value!(DateTime, DateTime);
|
||||
impl_try_from_value!(Timestamp, Timestamp);
|
||||
impl_try_from_value!(IntervalYearMonth, IntervalYearMonth);
|
||||
impl_try_from_value!(IntervalDayTime, IntervalDayTime);
|
||||
@@ -756,7 +737,6 @@ impl_value_from!(String, StringBytes);
|
||||
impl_value_from!(Binary, Bytes);
|
||||
impl_value_from!(Date, Date);
|
||||
impl_value_from!(Time, Time);
|
||||
impl_value_from!(DateTime, DateTime);
|
||||
impl_value_from!(Timestamp, Timestamp);
|
||||
impl_value_from!(IntervalYearMonth, IntervalYearMonth);
|
||||
impl_value_from!(IntervalDayTime, IntervalDayTime);
|
||||
@@ -803,7 +783,6 @@ impl TryFrom<Value> for serde_json::Value {
|
||||
Value::String(bytes) => serde_json::Value::String(bytes.into_string()),
|
||||
Value::Binary(bytes) => serde_json::to_value(bytes)?,
|
||||
Value::Date(v) => serde_json::Value::Number(v.val().into()),
|
||||
Value::DateTime(v) => serde_json::Value::Number(v.val().into()),
|
||||
Value::List(v) => serde_json::to_value(v)?,
|
||||
Value::Timestamp(v) => serde_json::to_value(v.value())?,
|
||||
Value::Time(v) => serde_json::to_value(v.value())?,
|
||||
@@ -933,9 +912,6 @@ impl TryFrom<ScalarValue> for Value {
|
||||
Value::List(ListValue::new(items, datatype))
|
||||
}
|
||||
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
|
||||
ScalarValue::Date64(d) => d
|
||||
.map(|x| Value::DateTime(DateTime::new(x)))
|
||||
.unwrap_or(Value::Null),
|
||||
ScalarValue::TimestampSecond(t, _) => t
|
||||
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Second)))
|
||||
.unwrap_or(Value::Null),
|
||||
@@ -994,7 +970,8 @@ impl TryFrom<ScalarValue> for Value {
|
||||
| ScalarValue::Float16(_)
|
||||
| ScalarValue::Utf8View(_)
|
||||
| ScalarValue::BinaryView(_)
|
||||
| ScalarValue::Map(_) => {
|
||||
| ScalarValue::Map(_)
|
||||
| ScalarValue::Date64(_) => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: v.data_type(),
|
||||
}
|
||||
@@ -1023,7 +1000,6 @@ impl From<ValueRef<'_>> for Value {
|
||||
ValueRef::String(v) => Value::String(v.into()),
|
||||
ValueRef::Binary(v) => Value::Binary(v.into()),
|
||||
ValueRef::Date(v) => Value::Date(v),
|
||||
ValueRef::DateTime(v) => Value::DateTime(v),
|
||||
ValueRef::Timestamp(v) => Value::Timestamp(v),
|
||||
ValueRef::Time(v) => Value::Time(v),
|
||||
ValueRef::IntervalYearMonth(v) => Value::IntervalYearMonth(v),
|
||||
@@ -1063,7 +1039,6 @@ pub enum ValueRef<'a> {
|
||||
|
||||
// Date & Time types:
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
Timestamp(Timestamp),
|
||||
Time(Time),
|
||||
Duration(Duration),
|
||||
@@ -1175,11 +1150,6 @@ impl<'a> ValueRef<'a> {
|
||||
impl_as_for_value_ref!(self, Date)
|
||||
}
|
||||
|
||||
/// Cast itself to [DateTime].
|
||||
pub fn as_datetime(&self) -> Result<Option<DateTime>> {
|
||||
impl_as_for_value_ref!(self, DateTime)
|
||||
}
|
||||
|
||||
/// Cast itself to [Timestamp].
|
||||
pub fn as_timestamp(&self) -> Result<Option<Timestamp>> {
|
||||
impl_as_for_value_ref!(self, Timestamp)
|
||||
@@ -1263,7 +1233,6 @@ impl_value_ref_from!(Int64, i64);
|
||||
impl_value_ref_from!(Float32, f32);
|
||||
impl_value_ref_from!(Float64, f64);
|
||||
impl_value_ref_from!(Date, Date);
|
||||
impl_value_ref_from!(DateTime, DateTime);
|
||||
impl_value_ref_from!(Timestamp, Timestamp);
|
||||
impl_value_ref_from!(Time, Time);
|
||||
impl_value_ref_from!(IntervalYearMonth, IntervalYearMonth);
|
||||
@@ -1327,7 +1296,6 @@ pub fn transform_value_ref_to_json_value<'a>(
|
||||
}
|
||||
}
|
||||
ValueRef::Date(v) => serde_json::Value::Number(v.val().into()),
|
||||
ValueRef::DateTime(v) => serde_json::Value::Number(v.val().into()),
|
||||
ValueRef::List(v) => serde_json::to_value(v)?,
|
||||
ValueRef::Timestamp(v) => serde_json::to_value(v.value())?,
|
||||
ValueRef::Time(v) => serde_json::to_value(v.value())?,
|
||||
@@ -1426,7 +1394,6 @@ impl ValueRef<'_> {
|
||||
ValueRef::String(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Binary(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Date(_) => 4,
|
||||
ValueRef::DateTime(_) => 8,
|
||||
ValueRef::Timestamp(_) => 16,
|
||||
ValueRef::Time(_) => 16,
|
||||
ValueRef::Duration(_) => 16,
|
||||
@@ -1462,7 +1429,9 @@ pub fn column_data_to_json(data: ValueData) -> JsonValue {
|
||||
.unwrap_or(JsonValue::Null),
|
||||
ValueData::StringValue(s) => JsonValue::String(s),
|
||||
ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
|
||||
ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
|
||||
ValueData::DatetimeValue(d) => {
|
||||
JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
|
||||
}
|
||||
ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
|
||||
ValueData::TimeMillisecondValue(d) => {
|
||||
JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
|
||||
@@ -1511,6 +1480,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_column_data_to_json() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
|
||||
JsonValue::String("aGVsbG8=".to_string())
|
||||
@@ -1569,31 +1539,31 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::DatetimeValue(456)),
|
||||
JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
|
||||
JsonValue::String("1970-01-01 08:00:00.000456+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeSecondValue(789)),
|
||||
JsonValue::String("00:13:09+0000".to_string())
|
||||
JsonValue::String("08:13:09+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeMillisecondValue(789)),
|
||||
JsonValue::String("00:00:00.789+0000".to_string())
|
||||
JsonValue::String("08:00:00.789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeMicrosecondValue(789)),
|
||||
JsonValue::String("00:00:00.000789+0000".to_string())
|
||||
JsonValue::String("08:00:00.000789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
|
||||
JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
|
||||
JsonValue::String("1970-01-15 14:56:07.890+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
|
||||
JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
|
||||
JsonValue::String("2009-02-14 07:31:30.123456789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
|
||||
JsonValue::String("2009-02-13 23:31:30+0000".to_string())
|
||||
JsonValue::String("2009-02-14 07:31:30+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::IntervalYearMonthValue(12)),
|
||||
@@ -1758,12 +1728,6 @@ mod tests {
|
||||
);
|
||||
assert_eq!(Value::Null, ScalarValue::Date32(None).try_into().unwrap());
|
||||
|
||||
assert_eq!(
|
||||
Value::DateTime(DateTime::new(456)),
|
||||
ScalarValue::Date64(Some(456)).try_into().unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, ScalarValue::Date64(None).try_into().unwrap());
|
||||
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Second)),
|
||||
ScalarValue::TimestampSecond(Some(1), None)
|
||||
@@ -2027,10 +1991,6 @@ mod tests {
|
||||
&ConcreteDataType::date_datatype(),
|
||||
&Value::Date(Date::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::datetime_datatype(),
|
||||
&Value::DateTime(DateTime::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
&Value::Timestamp(Timestamp::new_millisecond(1)),
|
||||
@@ -2169,11 +2129,6 @@ mod tests {
|
||||
serde_json::Value::Number(5000i32.into()),
|
||||
to_json(Value::Date(Date::new(5000)))
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::Value::Number(5000i64.into()),
|
||||
to_json(Value::DateTime(DateTime::new(5000)))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
serde_json::Value::Number(1.into()),
|
||||
to_json(Value::Timestamp(Timestamp::new_millisecond(1)))
|
||||
@@ -2259,7 +2214,6 @@ mod tests {
|
||||
);
|
||||
|
||||
check_as_value_ref!(Date, Date::new(103));
|
||||
check_as_value_ref!(DateTime, DateTime::new(1034));
|
||||
|
||||
let list = ListValue {
|
||||
items: vec![],
|
||||
@@ -2291,7 +2245,6 @@ mod tests {
|
||||
check_as_null!(as_string);
|
||||
check_as_null!(as_boolean);
|
||||
check_as_null!(as_date);
|
||||
check_as_null!(as_datetime);
|
||||
check_as_null!(as_list);
|
||||
|
||||
macro_rules! check_as_correct {
|
||||
@@ -2304,7 +2257,6 @@ mod tests {
|
||||
check_as_correct!("hello".as_bytes(), Binary, as_binary);
|
||||
check_as_correct!(true, Boolean, as_boolean);
|
||||
check_as_correct!(Date::new(123), Date, as_date);
|
||||
check_as_correct!(DateTime::new(12), DateTime, as_datetime);
|
||||
check_as_correct!(Time::new_second(12), Time, as_time);
|
||||
check_as_correct!(Duration::new_second(12), Duration, as_duration);
|
||||
let list = ListValue {
|
||||
@@ -2318,7 +2270,6 @@ mod tests {
|
||||
assert!(wrong_value.as_string().is_err());
|
||||
assert!(wrong_value.as_boolean().is_err());
|
||||
assert!(wrong_value.as_date().is_err());
|
||||
assert!(wrong_value.as_datetime().is_err());
|
||||
assert!(wrong_value.as_list().is_err());
|
||||
assert!(wrong_value.as_time().is_err());
|
||||
assert!(wrong_value.as_timestamp().is_err());
|
||||
@@ -2346,10 +2297,6 @@ mod tests {
|
||||
"010203"
|
||||
);
|
||||
assert_eq!(Value::Date(Date::new(0)).to_string(), "1970-01-01");
|
||||
assert_eq!(
|
||||
Value::DateTime(DateTime::new(0)).to_string(),
|
||||
"1970-01-01 08:00:00+0800"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)).to_string(),
|
||||
"1970-01-01 08:00:01+0800"
|
||||
@@ -2755,7 +2702,6 @@ mod tests {
|
||||
check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4);
|
||||
check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8);
|
||||
check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(&ValueRef::IntervalYearMonth(IntervalYearMonth::new(1)), 4);
|
||||
|
||||
@@ -29,7 +29,6 @@ mod binary;
|
||||
mod boolean;
|
||||
mod constant;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod decimal;
|
||||
mod duration;
|
||||
mod eq;
|
||||
@@ -48,7 +47,6 @@ pub use binary::{BinaryVector, BinaryVectorBuilder};
|
||||
pub use boolean::{BooleanVector, BooleanVectorBuilder};
|
||||
pub use constant::ConstantVector;
|
||||
pub use date::{DateVector, DateVectorBuilder};
|
||||
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
|
||||
pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
|
||||
pub use duration::{
|
||||
DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
|
||||
@@ -377,7 +375,7 @@ pub mod tests {
|
||||
// Test Primitive types
|
||||
mutable_primitive_data_type_eq_with_lower!(
|
||||
Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
|
||||
Date, DateTime, Binary, String
|
||||
Date, Binary, String
|
||||
);
|
||||
|
||||
// Test types about time
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateTimeType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
/// Vector of [`DateTime`](common_time::Date)
|
||||
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
|
||||
/// Builder for [`DateTimeVector`].
|
||||
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use arrow_array::ArrayRef;
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::DateTime;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{
|
||||
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let v = DateTimeVector::new(PrimitiveArray::from(vec![1000, 2000, 3000]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("DateTimeVector", v.vector_type_name());
|
||||
assert_eq!(
|
||||
&arrow::datatypes::DataType::Date64,
|
||||
v.to_arrow_array().data_type()
|
||||
);
|
||||
|
||||
assert_eq!(Some(DateTime::new(1000)), v.get_data(0));
|
||||
assert_eq!(Value::DateTime(DateTime::new(1000)), v.get(0));
|
||||
assert_eq!(ValueRef::DateTime(DateTime::new(1000)), v.get_ref(0));
|
||||
|
||||
let mut iter = v.iter_data();
|
||||
assert_eq!(Some(DateTime::new(1000)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(2000)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(3000)), iter.next().unwrap());
|
||||
assert!(!v.is_null(0));
|
||||
assert_eq!(24, v.memory_size());
|
||||
|
||||
if let Value::DateTime(d) = v.get(0) {
|
||||
assert_eq!(1000, d.val());
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
assert_eq!(
|
||||
"[\"1970-01-01 08:00:01+0800\",\"1970-01-01 08:00:02+0800\",\"1970-01-01 08:00:03+0800\"]",
|
||||
serde_json::to_string(&v.serialize_to_json().unwrap()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector_builder() {
|
||||
let mut builder = DateTimeVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(DateTime::new(1)));
|
||||
builder.push(None);
|
||||
builder.push(Some(DateTime::new(-1)));
|
||||
|
||||
let v = builder.finish();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input = DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]);
|
||||
|
||||
let mut builder = DateTimeType.create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::DateTime(DateTime::new(5)));
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow: ArrayRef = Arc::new(vector.as_arrow().slice(0, vector.len())) as _;
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
}
|
||||
@@ -20,12 +20,12 @@ use crate::data_type::DataType;
|
||||
use crate::types::{DurationType, TimeType, TimestampType};
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector,
|
||||
DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
|
||||
DurationSecondVector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
IntervalYearMonthVector, ListVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, DurationMicrosecondVector,
|
||||
DurationMillisecondVector, DurationNanosecondVector, DurationSecondVector,
|
||||
IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, ListVector,
|
||||
PrimitiveVector, StringVector, TimeMicrosecondVector, TimeMillisecondVector,
|
||||
TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use crate::with_match_primitive_type_id;
|
||||
|
||||
@@ -83,7 +83,6 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
Binary(_) | Json(_) | Vector(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => {
|
||||
is_vector_eq!(TimestampSecondVector, lhs, rhs)
|
||||
@@ -195,7 +194,6 @@ mod tests {
|
||||
)));
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
|
||||
100, 120,
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::prelude::DataType;
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Decimal128Vector,
|
||||
BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector,
|
||||
DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
|
||||
DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
|
||||
Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
|
||||
@@ -179,9 +179,6 @@ impl Helper {
|
||||
ScalarValue::Date32(v) => {
|
||||
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Date64(v) => {
|
||||
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampSecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
|
||||
@@ -244,7 +241,8 @@ impl Helper {
|
||||
| ScalarValue::Float16(_)
|
||||
| ScalarValue::Utf8View(_)
|
||||
| ScalarValue::BinaryView(_)
|
||||
| ScalarValue::Map(_) => {
|
||||
| ScalarValue::Map(_)
|
||||
| ScalarValue::Date64(_) => {
|
||||
return error::ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {value}"),
|
||||
}
|
||||
@@ -286,7 +284,6 @@ impl Helper {
|
||||
Arc::new(StringVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
|
||||
@@ -362,7 +359,8 @@ impl Helper {
|
||||
| ArrowDataType::BinaryView
|
||||
| ArrowDataType::Utf8View
|
||||
| ArrowDataType::ListView(_)
|
||||
| ArrowDataType::LargeListView(_) => {
|
||||
| ArrowDataType::LargeListView(_)
|
||||
| ArrowDataType::Date64 => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: array.as_ref().data_type().clone(),
|
||||
}
|
||||
@@ -411,9 +409,9 @@ impl Helper {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
|
||||
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
|
||||
ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
|
||||
Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
|
||||
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
||||
};
|
||||
@@ -424,7 +422,7 @@ mod tests {
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Duration};
|
||||
use common_time::{Date, Duration};
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
@@ -466,16 +464,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_datetime_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
assert_eq!(Value::DateTime(DateTime::new(42)), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_duration_value() {
|
||||
let vector =
|
||||
@@ -606,7 +594,6 @@ mod tests {
|
||||
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(StringArray::from(vec!["hello", "world"]));
|
||||
check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
|
||||
let data = vec![None, Some(vec![Some(6), Some(7)])];
|
||||
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
check_try_into_vector(list_array);
|
||||
@@ -734,7 +721,6 @@ mod tests {
|
||||
check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_into_and_from(StringArray::from(vec!["hello", "world"]));
|
||||
check_into_and_from(Date32Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Date64Array::from(vec![1, 2, 3]));
|
||||
|
||||
check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
|
||||
@@ -32,7 +32,7 @@ pub(crate) use filter_non_constant;
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::timestamp::{
|
||||
@@ -127,8 +127,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_filter_date_like() {
|
||||
impl_filter_date_like_test!(DateVector, Date, new);
|
||||
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
|
||||
@@ -105,7 +105,7 @@ pub(crate) fn find_unique_constant(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::timestamp::*;
|
||||
@@ -358,7 +358,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
|
||||
@@ -41,7 +41,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
@@ -161,8 +161,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_replicate_date_like() {
|
||||
impl_replicate_date_like_test!(DateVector, Date, new);
|
||||
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_replicate_timestamp_test!(Second);
|
||||
impl_replicate_timestamp_test!(Millisecond);
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
|
||||
@@ -33,7 +33,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{PrimitiveArray, UInt32Array};
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::prelude::VectorRef;
|
||||
use crate::scalars::ScalarVector;
|
||||
@@ -105,7 +105,6 @@ mod tests {
|
||||
|
||||
// test date like type
|
||||
take_time_like_test!(DateVector, Date, new);
|
||||
take_time_like_test!(DateTimeVector, DateTime, new);
|
||||
take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
take_time_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
|
||||
@@ -46,6 +46,7 @@ get-size2 = "0.1.2"
|
||||
greptime-proto.workspace = true
|
||||
# This fork of hydroflow is simply for keeping our dependency in our org, and pin the version
|
||||
# otherwise it is the same with upstream repo
|
||||
chrono.workspace = true
|
||||
http.workspace = true
|
||||
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
|
||||
itertools.workspace = true
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! impl `FlowNode` trait for FlowNodeManager so standalone can call them
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::flow::{
|
||||
|
||||
@@ -21,19 +21,31 @@ use crate::FlowWorkerManager;
|
||||
impl FlowWorkerManager {
|
||||
pub async fn gen_state_report(&self) -> FlowStat {
|
||||
let mut full_report = BTreeMap::new();
|
||||
let mut last_exec_time_map = BTreeMap::new();
|
||||
for worker in self.worker_handles.iter() {
|
||||
match worker.get_state_size().await {
|
||||
Ok(state_size) => {
|
||||
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)))
|
||||
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)));
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!(err; "Get flow stat size error");
|
||||
}
|
||||
}
|
||||
|
||||
match worker.get_last_exec_time_map().await {
|
||||
Ok(last_exec_time) => {
|
||||
last_exec_time_map
|
||||
.extend(last_exec_time.into_iter().map(|(k, v)| (k as u32, v)));
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!(err; "Get last exec time error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlowStat {
|
||||
state_size: full_report,
|
||||
last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let meta = TableMetaBuilder::default()
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(Arc::new(schema))
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
|
||||
@@ -98,6 +98,10 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
|
||||
self.state.set_current_ts(ts);
|
||||
}
|
||||
|
||||
pub fn set_last_exec_time(&mut self, ts: repr::Timestamp) {
|
||||
self.state.set_last_exec_time(ts);
|
||||
}
|
||||
|
||||
/// Run all available subgraph
|
||||
///
|
||||
/// return true if any subgraph actually executed
|
||||
@@ -212,6 +216,21 @@ impl WorkerHandle {
|
||||
.build()
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_last_exec_time_map(&self) -> Result<BTreeMap<FlowId, i64>, Error> {
|
||||
let ret = self
|
||||
.itc_client
|
||||
.call_with_resp(Request::QueryLastExecTimeMap)
|
||||
.await?;
|
||||
ret.into_query_last_exec_time_map().map_err(|ret| {
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"Flow Node/Worker get_last_exec_time_map failed, expect Response::QueryLastExecTimeMap, found {ret:?}"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WorkerHandle {
|
||||
@@ -335,6 +354,7 @@ impl<'s> Worker<'s> {
|
||||
pub fn run_tick(&mut self, now: repr::Timestamp) {
|
||||
for (_flow_id, task_state) in self.task_states.iter_mut() {
|
||||
task_state.set_current_ts(now);
|
||||
task_state.set_last_exec_time(now);
|
||||
task_state.run_available();
|
||||
}
|
||||
}
|
||||
@@ -395,6 +415,15 @@ impl<'s> Worker<'s> {
|
||||
}
|
||||
Some(Response::QueryStateSize { result: ret })
|
||||
}
|
||||
Request::QueryLastExecTimeMap => {
|
||||
let mut ret = BTreeMap::new();
|
||||
for (flow_id, task_state) in self.task_states.iter() {
|
||||
if let Some(last_exec_time) = task_state.state.last_exec_time() {
|
||||
ret.insert(*flow_id, last_exec_time);
|
||||
}
|
||||
}
|
||||
Some(Response::QueryLastExecTimeMap { result: ret })
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
@@ -427,6 +456,7 @@ pub enum Request {
|
||||
},
|
||||
Shutdown,
|
||||
QueryStateSize,
|
||||
QueryLastExecTimeMap,
|
||||
}
|
||||
|
||||
#[derive(Debug, EnumAsInner)]
|
||||
@@ -446,6 +476,10 @@ enum Response {
|
||||
/// each flow tasks' state size
|
||||
result: BTreeMap<FlowId, usize>,
|
||||
},
|
||||
QueryLastExecTimeMap {
|
||||
/// each flow tasks' last execution time
|
||||
result: BTreeMap<FlowId, i64>,
|
||||
},
|
||||
}
|
||||
|
||||
fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) {
|
||||
|
||||
@@ -290,7 +290,9 @@ mod test {
|
||||
let mfp = MapFilterProject::new(1)
|
||||
.filter(vec![
|
||||
ScalarExpr::Column(0)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_unary(expr::UnaryFunc::Cast(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Gte,
|
||||
@@ -300,7 +302,9 @@ mod test {
|
||||
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
|
||||
BinaryFunc::SubInt64,
|
||||
)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_unary(expr::UnaryFunc::Cast(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Lt,
|
||||
|
||||
@@ -45,6 +45,8 @@ pub struct DataflowState {
|
||||
arrange_used: Vec<ArrangeHandler>,
|
||||
/// the time arrangement need to be expired after a certain time in milliseconds
|
||||
expire_after: Option<Timestamp>,
|
||||
/// the last time each subgraph executed
|
||||
last_exec_time: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl DataflowState {
|
||||
@@ -114,6 +116,14 @@ impl DataflowState {
|
||||
pub fn get_state_size(&self) -> usize {
|
||||
self.arrange_used.iter().map(|x| x.read().get_size()).sum()
|
||||
}
|
||||
|
||||
pub fn set_last_exec_time(&mut self, time: Timestamp) {
|
||||
self.last_exec_time = Some(time);
|
||||
}
|
||||
|
||||
pub fn last_exec_time(&self) -> Option<Timestamp> {
|
||||
self.last_exec_time
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -479,7 +479,7 @@ impl ScalarUDFImpl for TumbleExpand {
|
||||
match (arg_types.first(), arg_types.get(1), arg_types.get(2)) {
|
||||
(Some(ts), Some(window), opt) => {
|
||||
use arrow_schema::DataType::*;
|
||||
if !matches!(ts, Date32 | Date64 | Timestamp(_, _)) {
|
||||
if !matches!(ts, Date32 | Timestamp(_, _)) {
|
||||
return Err(DataFusionError::Plan(
|
||||
format!("Expect timestamp column as first arg for tumble_start, found {:?}", ts)
|
||||
));
|
||||
@@ -491,7 +491,7 @@ impl ScalarUDFImpl for TumbleExpand {
|
||||
}
|
||||
|
||||
if let Some(start_time) = opt{
|
||||
if !matches!(start_time, Utf8 | Date32 | Date64 | Timestamp(_, _)){
|
||||
if !matches!(start_time, Utf8 | Date32 | Timestamp(_, _)){
|
||||
return Err(DataFusionError::Plan(
|
||||
format!("Expect start_time to either be date, timestamp or string, found {:?}", start_time)
|
||||
));
|
||||
|
||||
@@ -554,8 +554,6 @@ fn get_ts_as_millisecond(arg: Value) -> Result<repr::Timestamp, EvalError> {
|
||||
ts.convert_to(TimeUnit::Millisecond)
|
||||
.context(OverflowSnafu)?
|
||||
.value()
|
||||
} else if let Some(ts) = arg.as_datetime() {
|
||||
ts.val()
|
||||
} else {
|
||||
InvalidArgumentSnafu {
|
||||
reason: "Expect input to be timestamp or datetime type",
|
||||
|
||||
@@ -759,7 +759,7 @@ fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> b
|
||||
#[allow(clippy::too_many_lines)]
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::DateTime;
|
||||
use common_time::Timestamp;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -813,13 +813,13 @@ mod test {
|
||||
(
|
||||
AggregateFunc::MaxDateTime,
|
||||
vec![
|
||||
(Value::DateTime(DateTime::from(0)), 1),
|
||||
(Value::DateTime(DateTime::from(1)), 1),
|
||||
(Value::Timestamp(Timestamp::from(0)), 1),
|
||||
(Value::Timestamp(Timestamp::from(1)), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::DateTime(DateTime::from(1)),
|
||||
vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
|
||||
Value::Timestamp(Timestamp::from(1)),
|
||||
vec![Value::Timestamp(Timestamp::from(1)), 2i64.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
|
||||
@@ -267,7 +267,7 @@ impl AggregateFunc {
|
||||
MaxBool => (boolean_datatype, Max),
|
||||
MaxString => (string_datatype, Max),
|
||||
MaxDate => (date_datatype, Max),
|
||||
MaxDateTime => (datetime_datatype, Max),
|
||||
MaxDateTime => (timestamp_microsecond_datatype, Max),
|
||||
MaxTimestamp => (timestamp_second_datatype, Max),
|
||||
MaxTime => (time_second_datatype, Max),
|
||||
MaxDuration => (duration_second_datatype, Max),
|
||||
@@ -283,7 +283,7 @@ impl AggregateFunc {
|
||||
MinBool => (boolean_datatype, Min),
|
||||
MinString => (string_datatype, Min),
|
||||
MinDate => (date_datatype, Min),
|
||||
MinDateTime => (datetime_datatype, Min),
|
||||
MinDateTime => (timestamp_microsecond_datatype, Min),
|
||||
MinTimestamp => (timestamp_second_datatype, Min),
|
||||
MinTime => (time_second_datatype, Min),
|
||||
MinDuration => (duration_second_datatype, Min),
|
||||
|
||||
@@ -154,14 +154,18 @@ impl HeartbeatTask {
|
||||
};
|
||||
let flow_stat = latest_report
|
||||
.as_ref()
|
||||
.map(|report| {
|
||||
report
|
||||
.map(|report| api::v1::meta::FlowStat {
|
||||
flow_stat_size: report
|
||||
.state_size
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v as u64))
|
||||
.collect()
|
||||
})
|
||||
.map(|f| api::v1::meta::FlowStat { flow_stat_size: f });
|
||||
.collect(),
|
||||
flow_last_exec_time_map: report
|
||||
.last_exec_time_map
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v))
|
||||
.collect(),
|
||||
});
|
||||
|
||||
Some(HeartbeatRequest {
|
||||
mailbox_message,
|
||||
|
||||
@@ -68,13 +68,10 @@ pub fn value_to_internal_ts(value: Value) -> Result<i64, EvalError> {
|
||||
let ty = arg.data_type();
|
||||
matches!(
|
||||
ty,
|
||||
ConcreteDataType::Date(..)
|
||||
| ConcreteDataType::DateTime(..)
|
||||
| ConcreteDataType::Timestamp(..)
|
||||
ConcreteDataType::Date(..) | ConcreteDataType::Timestamp(..)
|
||||
)
|
||||
};
|
||||
match value {
|
||||
Value::DateTime(ts) => Ok(ts.val()),
|
||||
Value::Int64(ts) => Ok(ts),
|
||||
arg if is_supported_time_type(&arg) => {
|
||||
let arg_ty = arg.data_type();
|
||||
@@ -214,7 +211,7 @@ impl From<Row> for ProtoRow {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::{Date, Timestamp};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -244,7 +241,7 @@ mod test {
|
||||
{
|
||||
let a = Value::from(1i32);
|
||||
let b = Value::from(1i64);
|
||||
let c = Value::DateTime(DateTime::new(1i64));
|
||||
let c = Value::Timestamp(Timestamp::new_millisecond(1i64));
|
||||
let d = Value::from(1.0);
|
||||
|
||||
assert!(value_to_internal_ts(a).is_err());
|
||||
|
||||
@@ -238,6 +238,13 @@ pub enum Error {
|
||||
source: servers::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create logical plan for prometheus label values query"))]
|
||||
PrometheusLabelValuesQueryPlan {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: query::promql::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to describe schema for given statement"))]
|
||||
DescribeStatement {
|
||||
#[snafu(implicit)]
|
||||
@@ -366,6 +373,8 @@ impl ErrorExt for Error {
|
||||
| Error::PrometheusMetricNamesQueryPlan { source, .. }
|
||||
| Error::ExecutePromql { source, .. } => source.status_code(),
|
||||
|
||||
Error::PrometheusLabelValuesQueryPlan { source, .. } => source.status_code(),
|
||||
|
||||
Error::CollectRecordbatch { .. } => StatusCode::EngineExecuteQuery,
|
||||
|
||||
Error::SqlExecIntercepted { source, .. } => source.status_code(),
|
||||
|
||||
@@ -26,6 +26,7 @@ mod region_query;
|
||||
pub mod standalone;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
@@ -471,6 +472,21 @@ impl PrometheusHandler for Instance {
|
||||
.context(ExecuteQuerySnafu)
|
||||
}
|
||||
|
||||
async fn query_label_values(
|
||||
&self,
|
||||
metric: String,
|
||||
label_name: String,
|
||||
matchers: Vec<Matcher>,
|
||||
start: SystemTime,
|
||||
end: SystemTime,
|
||||
ctx: &QueryContextRef,
|
||||
) -> server_error::Result<Vec<String>> {
|
||||
self.handle_query_label_values(metric, label_name, matchers, start, end, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)
|
||||
}
|
||||
|
||||
fn catalog_manager(&self) -> CatalogManagerRef {
|
||||
self.catalog_manager.clone()
|
||||
}
|
||||
|
||||
@@ -133,6 +133,7 @@ impl FrontendBuilder {
|
||||
.context(error::CacheRequiredSnafu {
|
||||
name: TABLE_FLOWNODE_SET_CACHE_NAME,
|
||||
})?;
|
||||
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
self.catalog_manager.clone(),
|
||||
partition_manager.clone(),
|
||||
|
||||
@@ -12,20 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::SystemTime;
|
||||
|
||||
use catalog::information_schema::TABLES;
|
||||
use client::OutputData;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_recordbatch::util;
|
||||
use common_telemetry::tracing;
|
||||
use datatypes::prelude::Value;
|
||||
use promql_parser::label::Matcher;
|
||||
use promql_parser::label::{Matcher, Matchers};
|
||||
use query::promql;
|
||||
use query::promql::planner::PromPlanner;
|
||||
use servers::prometheus;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, CollectRecordbatchSnafu, ExecLogicalPlanSnafu,
|
||||
PrometheusMetricNamesQueryPlanSnafu, ReadTableSnafu, Result, TableNotFoundSnafu,
|
||||
PrometheusLabelValuesQueryPlanSnafu, PrometheusMetricNamesQueryPlanSnafu, ReadTableSnafu,
|
||||
Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::instance::Instance;
|
||||
|
||||
@@ -96,4 +102,77 @@ impl Instance {
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Handles label values query request, returns the values.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn handle_query_label_values(
|
||||
&self,
|
||||
metric: String,
|
||||
label_name: String,
|
||||
matchers: Vec<Matcher>,
|
||||
start: SystemTime,
|
||||
end: SystemTime,
|
||||
ctx: &QueryContextRef,
|
||||
) -> Result<Vec<String>> {
|
||||
let table_schema = ctx.current_schema();
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(ctx.current_catalog(), &table_schema, &metric, Some(ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
|
||||
})?;
|
||||
|
||||
let dataframe = self
|
||||
.query_engine
|
||||
.read_table(table.clone())
|
||||
.with_context(|_| ReadTableSnafu {
|
||||
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
|
||||
})?;
|
||||
|
||||
let scan_plan = dataframe.into_logical_plan();
|
||||
let filter_conditions =
|
||||
PromPlanner::matchers_to_expr(Matchers::new(matchers), scan_plan.schema())
|
||||
.context(PrometheusLabelValuesQueryPlanSnafu)?;
|
||||
let logical_plan = promql::label_values::rewrite_label_values_query(
|
||||
table,
|
||||
scan_plan,
|
||||
filter_conditions,
|
||||
label_name,
|
||||
start,
|
||||
end,
|
||||
)
|
||||
.context(PrometheusLabelValuesQueryPlanSnafu)?;
|
||||
|
||||
let results = self
|
||||
.query_engine
|
||||
.execute(logical_plan, ctx.clone())
|
||||
.await
|
||||
.context(ExecLogicalPlanSnafu)?;
|
||||
|
||||
let batches = match results.data {
|
||||
OutputData::Stream(stream) => util::collect(stream)
|
||||
.await
|
||||
.context(CollectRecordbatchSnafu)?,
|
||||
OutputData::RecordBatches(rbs) => rbs.take(),
|
||||
_ => unreachable!("should not happen"),
|
||||
};
|
||||
|
||||
let mut results = Vec::with_capacity(batches.iter().map(|b| b.num_rows()).sum());
|
||||
for batch in batches {
|
||||
// Only one column the results, ensured by `prometheus::label_values_matchers_to_plan`.
|
||||
let names = batch.column(0);
|
||||
|
||||
for i in 0..names.len() {
|
||||
let Value::String(name) = names.get(i) else {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
results.push(name.into_string());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ prost.workspace = true
|
||||
puffin.workspace = true
|
||||
regex.workspace = true
|
||||
regex-automata.workspace = true
|
||||
roaring = "0.10"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
868
src/index/src/bitmap.rs
Normal file
868
src/index/src/bitmap.rs
Normal file
@@ -0,0 +1,868 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use common_base::BitVec;
|
||||
/// `BitmapType` enumerates how bitmaps are encoded within the inverted index.
|
||||
pub use greptime_proto::v1::index::BitmapType;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
/// A bitmap representation supporting both BitVec and RoaringBitmap formats.
|
||||
///
|
||||
/// This enum provides unified bitmap operations while allowing efficient storage
|
||||
/// in different formats. The implementation automatically handles type conversions
|
||||
/// when performing operations between different formats.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Creating a new Roaring bitmap:
|
||||
/// ```
|
||||
/// use bitmap::Bitmap;
|
||||
/// let bitmap = Bitmap::new_roaring();
|
||||
/// assert!(bitmap.is_empty());
|
||||
/// ```
|
||||
///
|
||||
/// Creating a full BitVec bitmap:
|
||||
/// ```
|
||||
/// use bitmap::Bitmap;
|
||||
/// let bitmap = Bitmap::full_bitvec(10);
|
||||
/// assert_eq!(bitmap.count_ones(), 10);
|
||||
/// ```
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Bitmap {
|
||||
Roaring(RoaringBitmap),
|
||||
BitVec(BitVec),
|
||||
}
|
||||
|
||||
impl Bitmap {
|
||||
/// Creates a new empty BitVec-based bitmap.
|
||||
pub fn new_bitvec() -> Self {
|
||||
Bitmap::BitVec(BitVec::EMPTY)
|
||||
}
|
||||
|
||||
/// Creates a new empty RoaringBitmap-based bitmap.
|
||||
pub fn new_roaring() -> Self {
|
||||
Bitmap::Roaring(RoaringBitmap::new())
|
||||
}
|
||||
|
||||
/// Creates a full BitVec-based bitmap with all bits set to 1.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `size` - The number of bits to allocate and set
|
||||
pub fn full_bitvec(size: usize) -> Self {
|
||||
Bitmap::BitVec(BitVec::repeat(true, size))
|
||||
}
|
||||
|
||||
/// Creates a full RoaringBitmap-based bitmap with bits 0..size set to 1.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `size` - The exclusive upper bound for the bit range
|
||||
pub fn full_roaring(size: usize) -> Self {
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
roaring.insert_range(0..size as u32);
|
||||
Bitmap::Roaring(roaring)
|
||||
}
|
||||
|
||||
/// Returns the number of bits set to 1 in the bitmap.
|
||||
pub fn count_ones(&self) -> usize {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => bitvec.count_ones(),
|
||||
Bitmap::Roaring(roaring) => roaring.len() as _,
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the bitmap contains no set bits.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => bitvec.is_empty(),
|
||||
Bitmap::Roaring(roaring) => roaring.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts a range of bits into the bitmap.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `range` - Inclusive range of bits to set
|
||||
pub fn insert_range(&mut self, range: RangeInclusive<usize>) {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => {
|
||||
if *range.end() >= bitvec.len() {
|
||||
bitvec.resize(range.end() + 1, false);
|
||||
}
|
||||
for i in range {
|
||||
bitvec.set(i, true);
|
||||
}
|
||||
}
|
||||
Bitmap::Roaring(roaring) => {
|
||||
let range = *range.start() as u32..=*range.end() as u32;
|
||||
roaring.insert_range(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializes the bitmap into a byte buffer using the specified format.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `serialize_type` - Target format for serialization
|
||||
/// * `writer` - Output writer to write the serialized data
|
||||
pub fn serialize_into(
|
||||
&self,
|
||||
serialize_type: BitmapType,
|
||||
mut writer: impl io::Write,
|
||||
) -> io::Result<()> {
|
||||
match (self, serialize_type) {
|
||||
(Bitmap::BitVec(bitvec), BitmapType::BitVec) => {
|
||||
writer.write_all(bitvec.as_raw_slice())?;
|
||||
}
|
||||
(Bitmap::Roaring(roaring), BitmapType::Roaring) => {
|
||||
roaring.serialize_into(writer)?;
|
||||
}
|
||||
(Bitmap::BitVec(bitvec), BitmapType::Roaring) => {
|
||||
let bitmap = Bitmap::bitvec_to_roaring(bitvec.clone());
|
||||
bitmap.serialize_into(writer)?;
|
||||
}
|
||||
(Bitmap::Roaring(roaring), BitmapType::BitVec) => {
|
||||
let bitvec = Bitmap::roaring_to_bitvec(roaring);
|
||||
writer.write_all(bitvec.as_raw_slice())?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Computes the size of the serialized bitmap in bytes.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `bitmap_type` - Format of data to be serialized
|
||||
pub fn serialized_size(&self, bitmap_type: BitmapType) -> usize {
|
||||
match (self, bitmap_type) {
|
||||
(Bitmap::BitVec(bitvec), BitmapType::BitVec) => bitvec.as_raw_slice().len(),
|
||||
(Bitmap::Roaring(roaring), BitmapType::Roaring) => roaring.serialized_size(),
|
||||
(Bitmap::BitVec(bitvec), BitmapType::Roaring) => {
|
||||
let bitmap = Bitmap::bitvec_to_roaring(bitvec.clone());
|
||||
bitmap.serialized_size()
|
||||
}
|
||||
(Bitmap::Roaring(roaring), BitmapType::BitVec) => {
|
||||
let bitvec = Bitmap::roaring_to_bitvec(roaring);
|
||||
bitvec.as_raw_slice().len()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserializes a bitmap from a byte buffer.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `buf` - Input buffer containing serialized data
|
||||
/// * `bitmap_type` - Format of the serialized data
|
||||
pub fn deserialize_from(buf: &[u8], bitmap_type: BitmapType) -> std::io::Result<Self> {
|
||||
match bitmap_type {
|
||||
BitmapType::BitVec => {
|
||||
let bitvec = BitVec::from_slice(buf);
|
||||
Ok(Bitmap::BitVec(bitvec))
|
||||
}
|
||||
BitmapType::Roaring => {
|
||||
let roaring = RoaringBitmap::deserialize_from(buf)?;
|
||||
Ok(Bitmap::Roaring(roaring))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the union with another bitmap (in-place).
|
||||
///
|
||||
/// If the other bitmap is a different type, it will be converted to match
|
||||
/// the current bitmap's type.
|
||||
pub fn union(&mut self, other: Self) {
|
||||
if self.is_empty() {
|
||||
*self = other;
|
||||
return;
|
||||
}
|
||||
|
||||
match (self, other) {
|
||||
(Bitmap::BitVec(bitvec1), bitmap) => {
|
||||
let bitvec2 = bitmap.into_bitvec();
|
||||
if bitvec1.len() > bitvec2.len() {
|
||||
*bitvec1 |= bitvec2
|
||||
} else {
|
||||
*bitvec1 = bitvec2 | &*bitvec1;
|
||||
}
|
||||
}
|
||||
(Bitmap::Roaring(roaring1), bitmap) => {
|
||||
let roaring2 = bitmap.into_roaring();
|
||||
*roaring1 |= roaring2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the intersection with another bitmap (in-place).
|
||||
///
|
||||
/// If the other bitmap is a different type, it will be converted to match
|
||||
/// the current bitmap's type.
|
||||
pub fn intersect(&mut self, other: Self) {
|
||||
match (self, other) {
|
||||
(Bitmap::BitVec(bitvec1), bitmap) => {
|
||||
let mut bitvec2 = bitmap.into_bitvec();
|
||||
let len = (bitvec1.len() - bitvec1.trailing_zeros())
|
||||
.min(bitvec2.len() - bitvec2.trailing_zeros());
|
||||
bitvec1.truncate(len);
|
||||
bitvec2.truncate(len);
|
||||
*bitvec1 &= bitvec2;
|
||||
}
|
||||
(Bitmap::Roaring(roaring1), bitmap) => {
|
||||
let roaring2 = bitmap.into_roaring();
|
||||
*roaring1 &= roaring2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the indices of set bits.
|
||||
pub fn iter_ones(&self) -> Box<dyn Iterator<Item = usize> + '_> {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => Box::new(bitvec.iter_ones()),
|
||||
Bitmap::Roaring(roaring) => Box::new(roaring.iter().map(|x| x as usize)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a bitmap from bytes in LSB0 (least significant bit first) order.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `bytes` - Input bytes in LSB0 order
|
||||
/// * `bitmap_type` - Type of bitmap to create
|
||||
pub fn from_lsb0_bytes(bytes: &[u8], bitmap_type: BitmapType) -> Self {
|
||||
match bitmap_type {
|
||||
BitmapType::BitVec => {
|
||||
let bitvec = BitVec::from_slice(bytes);
|
||||
Bitmap::BitVec(bitvec)
|
||||
}
|
||||
BitmapType::Roaring => {
|
||||
let roaring = RoaringBitmap::from_lsb0_bytes(0, bytes);
|
||||
Bitmap::Roaring(roaring)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes memory usage of the bitmap in bytes.
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => bitvec.capacity(),
|
||||
Bitmap::Roaring(roaring) => {
|
||||
let stat = roaring.statistics();
|
||||
(stat.n_bytes_array_containers
|
||||
+ stat.n_bytes_bitset_containers
|
||||
+ stat.n_bytes_run_containers) as usize
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_bitvec(self) -> BitVec {
|
||||
match self {
|
||||
Bitmap::BitVec(bitvec) => bitvec,
|
||||
Bitmap::Roaring(roaring) => Self::roaring_to_bitvec(&roaring),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_roaring(self) -> RoaringBitmap {
|
||||
match self {
|
||||
Bitmap::Roaring(roaring) => roaring,
|
||||
Bitmap::BitVec(bitvec) => Self::bitvec_to_roaring(bitvec),
|
||||
}
|
||||
}
|
||||
|
||||
fn roaring_to_bitvec(roaring: &RoaringBitmap) -> BitVec {
|
||||
let max_value = roaring.max().unwrap_or(0);
|
||||
let mut bitvec = BitVec::repeat(false, max_value as usize + 1);
|
||||
for i in roaring {
|
||||
bitvec.set(i as usize, true);
|
||||
}
|
||||
bitvec
|
||||
}
|
||||
|
||||
fn bitvec_to_roaring(mut bitvec: BitVec) -> RoaringBitmap {
|
||||
bitvec.resize(bitvec.capacity(), false);
|
||||
RoaringBitmap::from_lsb0_bytes(0, bitvec.as_raw_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Bitmap {
|
||||
fn default() -> Self {
|
||||
Bitmap::new_roaring()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_full_bitmaps() {
|
||||
let bv = Bitmap::full_bitvec(10);
|
||||
assert_eq!(bv.count_ones(), 10);
|
||||
|
||||
let rb = Bitmap::full_roaring(10);
|
||||
assert_eq!(rb.count_ones(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialization_roundtrip() {
|
||||
let original = Bitmap::full_roaring(100);
|
||||
let mut buf = Vec::new();
|
||||
|
||||
// Serialize as Roaring
|
||||
original
|
||||
.serialize_into(BitmapType::Roaring, &mut buf)
|
||||
.unwrap();
|
||||
let deserialized = Bitmap::deserialize_from(&buf, BitmapType::Roaring).unwrap();
|
||||
assert_eq!(original, deserialized);
|
||||
|
||||
// Serialize as BitVec
|
||||
buf.clear();
|
||||
original
|
||||
.serialize_into(BitmapType::BitVec, &mut buf)
|
||||
.unwrap();
|
||||
let deserialized = Bitmap::deserialize_from(&buf, BitmapType::BitVec).unwrap();
|
||||
assert_eq!(original.count_ones(), deserialized.count_ones());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_fulls() {
|
||||
// Test BitVec union
|
||||
let mut bv1 = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
let bv2 = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
bv1.union(bv2);
|
||||
assert_eq!(bv1.count_ones(), 5);
|
||||
|
||||
let mut bv1 = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
let bv2 = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
bv1.union(bv2);
|
||||
assert_eq!(bv1.count_ones(), 5);
|
||||
|
||||
// Test Roaring union
|
||||
let mut rb1 = Bitmap::full_roaring(3); // 0-2: 111
|
||||
let rb2 = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
rb1.union(rb2);
|
||||
assert_eq!(rb1.count_ones(), 5);
|
||||
|
||||
let mut rb1 = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
let rb2 = Bitmap::full_roaring(3); // 0-2: 111
|
||||
rb1.union(rb2);
|
||||
assert_eq!(rb1.count_ones(), 5);
|
||||
|
||||
// Test cross-type union
|
||||
let mut rb = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
let bv = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
rb.union(bv);
|
||||
assert_eq!(rb.count_ones(), 5);
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
let rb = Bitmap::full_roaring(3); // 0-2: 111
|
||||
bv.union(rb);
|
||||
assert_eq!(bv.count_ones(), 5);
|
||||
|
||||
let mut rb = Bitmap::full_roaring(3); // 0-2: 111
|
||||
let bv = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
rb.union(bv);
|
||||
assert_eq!(rb.count_ones(), 5);
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
let rb = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
bv.union(rb);
|
||||
assert_eq!(bv.count_ones(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_bitvec() {
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
bv1.union(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
// Test different lengths
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101, 0b00000001], BitmapType::BitVec);
|
||||
bv1.union(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
bv1.union(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
// Test empty bitmaps
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.union(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
bv1.union(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.union(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
// Test empty and full bitmaps
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::full_bitvec(8);
|
||||
bv1.union(bv2);
|
||||
assert_eq!(bv1, Bitmap::full_bitvec(8));
|
||||
|
||||
let mut bv1 = Bitmap::full_bitvec(8);
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.union(bv2);
|
||||
assert_eq!(bv1, Bitmap::full_bitvec(8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_roaring() {
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
rb1.union(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// Test different lengths
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101, 0b00000001], BitmapType::Roaring);
|
||||
rb1.union(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
rb1.union(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111, 0b00000001], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// Test empty bitmaps
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.union(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
rb1.union(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.union(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// Test empty and full bit
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::full_roaring(8);
|
||||
rb1.union(rb2);
|
||||
assert_eq!(rb1, Bitmap::full_roaring(8));
|
||||
|
||||
let mut rb1 = Bitmap::full_roaring(8);
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.union(rb2);
|
||||
assert_eq!(rb1, Bitmap::full_roaring(8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_mixed() {
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
rb.union(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
bv.union(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b11111111], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let bv = Bitmap::full_bitvec(8);
|
||||
rb.union(bv);
|
||||
assert_eq!(rb, Bitmap::full_roaring(8));
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(8);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
bv.union(rb);
|
||||
assert_eq!(bv, Bitmap::full_bitvec(8));
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
let bv = Bitmap::full_bitvec(8);
|
||||
rb.union(bv);
|
||||
assert_eq!(rb, Bitmap::full_bitvec(8));
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(8);
|
||||
let rb = Bitmap::new_roaring();
|
||||
bv.union(rb);
|
||||
assert_eq!(bv, Bitmap::full_bitvec(8));
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
let bv = Bitmap::new_bitvec();
|
||||
rb.union(bv);
|
||||
assert!(rb.is_empty());
|
||||
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
let rb = Bitmap::new_roaring();
|
||||
bv.union(rb);
|
||||
assert!(bv.is_empty());
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
rb.union(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec);
|
||||
let rb = Bitmap::new_roaring();
|
||||
bv.union(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
let bv = Bitmap::new_bitvec();
|
||||
rb.union(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring);
|
||||
bv.union(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b01010101], BitmapType::Roaring)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersect_fulls() {
|
||||
// Test BitVec intersect
|
||||
let mut bv1 = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
let bv2 = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
bv1.intersect(bv2);
|
||||
assert_eq!(bv1.count_ones(), 3);
|
||||
|
||||
let mut bv1 = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
let bv2 = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
bv1.intersect(bv2);
|
||||
assert_eq!(bv1.count_ones(), 3);
|
||||
|
||||
// Test Roaring intersect
|
||||
let mut rb1 = Bitmap::full_roaring(3); // 0-2: 111
|
||||
let rb2 = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
rb1.intersect(rb2);
|
||||
assert_eq!(rb1.count_ones(), 3);
|
||||
|
||||
let mut rb1 = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
let rb2 = Bitmap::full_roaring(3); // 0-2: 111
|
||||
rb1.intersect(rb2);
|
||||
assert_eq!(rb1.count_ones(), 3);
|
||||
|
||||
// Test cross-type intersect
|
||||
let mut rb = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
let bv = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
rb.intersect(bv);
|
||||
assert_eq!(rb.count_ones(), 3);
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
let rb = Bitmap::full_roaring(3); // 0-2: 111
|
||||
bv.intersect(rb);
|
||||
assert_eq!(bv.count_ones(), 3);
|
||||
|
||||
let mut rb = Bitmap::full_roaring(3); // 0-2: 111
|
||||
let bv = Bitmap::full_bitvec(5); // 0-4: 11111
|
||||
rb.intersect(bv);
|
||||
assert_eq!(rb.count_ones(), 3);
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(3); // 0-2: 111
|
||||
let rb = Bitmap::full_roaring(5); // 0-4: 11111
|
||||
bv.intersect(rb);
|
||||
assert_eq!(bv.count_ones(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersect_bitvec() {
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
bv1.intersect(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
// Test different lengths
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
|
||||
bv1.intersect(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
bv1.intersect(bv2);
|
||||
assert_eq!(
|
||||
bv1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
// Test empty bitmaps
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.intersect(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
bv1.intersect(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
|
||||
let mut bv1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.intersect(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
|
||||
// Test empty and full bitmaps
|
||||
let mut bv1 = Bitmap::new_bitvec();
|
||||
let bv2 = Bitmap::full_bitvec(8);
|
||||
bv1.intersect(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
|
||||
let mut bv1 = Bitmap::full_bitvec(8);
|
||||
let bv2 = Bitmap::new_bitvec();
|
||||
bv1.intersect(bv2);
|
||||
assert!(bv1.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersect_roaring() {
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
rb1.intersect(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// Test different lengths
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
|
||||
rb1.intersect(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
rb1.intersect(rb2);
|
||||
assert_eq!(
|
||||
rb1,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// Test empty bitmaps
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.intersect(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
rb1.intersect(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
|
||||
let mut rb1 = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.intersect(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
|
||||
// Test empty and full bitmaps
|
||||
let mut rb1 = Bitmap::new_roaring();
|
||||
let rb2 = Bitmap::full_roaring(8);
|
||||
rb1.intersect(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
|
||||
let mut rb1 = Bitmap::full_roaring(8);
|
||||
let rb2 = Bitmap::new_roaring();
|
||||
rb1.intersect(rb2);
|
||||
assert!(rb1.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersect_mixed() {
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
rb.intersect(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
bv.intersect(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
let bv = Bitmap::full_bitvec(8);
|
||||
rb.intersect(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(8);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
bv.intersect(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::Roaring);
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::BitVec);
|
||||
rb.intersect(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::BitVec);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
bv.intersect(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b11110000, 0b00000001], BitmapType::Roaring);
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
rb.intersect(bv);
|
||||
assert_eq!(
|
||||
rb,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b11110000], BitmapType::BitVec);
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b10101010, 0b00000001], BitmapType::Roaring);
|
||||
bv.intersect(rb);
|
||||
assert_eq!(
|
||||
bv,
|
||||
Bitmap::from_lsb0_bytes(&[0b10100000], BitmapType::BitVec)
|
||||
);
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
let bv = Bitmap::full_bitvec(8);
|
||||
rb.intersect(bv);
|
||||
assert!(rb.is_empty());
|
||||
|
||||
let mut bv = Bitmap::full_bitvec(8);
|
||||
let rb = Bitmap::new_roaring();
|
||||
bv.intersect(rb);
|
||||
assert!(bv.is_empty());
|
||||
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
let rb = Bitmap::full_roaring(8);
|
||||
bv.intersect(rb);
|
||||
assert!(bv.is_empty());
|
||||
|
||||
let mut rb = Bitmap::full_roaring(8);
|
||||
let bv = Bitmap::new_bitvec();
|
||||
rb.intersect(bv);
|
||||
assert!(rb.is_empty());
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
let bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
rb.intersect(bv);
|
||||
assert!(rb.is_empty());
|
||||
|
||||
let mut bv = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::BitVec);
|
||||
let rb = Bitmap::new_roaring();
|
||||
bv.intersect(rb);
|
||||
assert!(bv.is_empty());
|
||||
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
let rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
bv.intersect(rb);
|
||||
assert!(bv.is_empty());
|
||||
|
||||
let mut rb = Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring);
|
||||
let bv = Bitmap::new_bitvec();
|
||||
rb.intersect(bv);
|
||||
assert!(rb.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_range() {
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
bv.insert_range(0..=5);
|
||||
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 5]);
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
rb.insert_range(0..=5);
|
||||
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 5]);
|
||||
|
||||
let mut bv = Bitmap::new_bitvec();
|
||||
bv.insert_range(10..=10);
|
||||
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![10]);
|
||||
|
||||
let mut rb = Bitmap::new_roaring();
|
||||
rb.insert_range(10..=10);
|
||||
assert_eq!(bv.iter_ones().collect::<Vec<_>>(), vec![10]);
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ pub mod sort_create;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::bitmap::BitmapType;
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::format::writer::InvertedIndexWriter;
|
||||
use crate::BytesRef;
|
||||
@@ -53,5 +54,9 @@ pub trait InvertedIndexCreator: Send {
|
||||
|
||||
/// Finalizes the index creation process, ensuring all data is properly indexed and stored
|
||||
/// in the provided writer
|
||||
async fn finish(&mut self, writer: &mut dyn InvertedIndexWriter) -> Result<()>;
|
||||
async fn finish(
|
||||
&mut self,
|
||||
writer: &mut dyn InvertedIndexWriter,
|
||||
bitmap_type: BitmapType,
|
||||
) -> Result<()>;
|
||||
}
|
||||
|
||||
@@ -17,22 +17,23 @@ mod intermediate_rw;
|
||||
mod merge_stream;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use futures::Stream;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::format::writer::ValueStream;
|
||||
use crate::{Bytes, BytesRef};
|
||||
|
||||
/// A stream of sorted values along with their associated bitmap
|
||||
pub type SortedStream = Box<dyn Stream<Item = Result<(Bytes, BitVec)>> + Send + Unpin>;
|
||||
pub type SortedStream = Box<dyn Stream<Item = Result<(Bytes, Bitmap)>> + Send + Unpin>;
|
||||
|
||||
/// Output of a sorting operation, encapsulating a bitmap for null values and a stream of sorted items
|
||||
pub struct SortOutput {
|
||||
/// Bitmap indicating which segments have null values
|
||||
pub segment_null_bitmap: BitVec,
|
||||
pub segment_null_bitmap: Bitmap,
|
||||
|
||||
/// Stream of sorted items
|
||||
pub sorted_stream: SortedStream,
|
||||
pub sorted_stream: ValueStream,
|
||||
|
||||
/// Total number of rows in the sorted data
|
||||
pub total_row_count: usize,
|
||||
|
||||
@@ -20,11 +20,11 @@ use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use common_telemetry::{debug, error};
|
||||
use futures::stream;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
use crate::inverted_index::create::sort::intermediate_rw::{
|
||||
IntermediateReader, IntermediateWriter,
|
||||
@@ -45,18 +45,10 @@ pub struct ExternalSorter {
|
||||
temp_file_provider: Arc<dyn ExternalTempFileProvider>,
|
||||
|
||||
/// Bitmap indicating which segments have null values
|
||||
segment_null_bitmap: BitVec,
|
||||
segment_null_bitmap: Bitmap,
|
||||
|
||||
/// In-memory buffer to hold values and their corresponding bitmaps until memory threshold is exceeded
|
||||
values_buffer: BTreeMap<Bytes, BitVec>,
|
||||
|
||||
/// Count of rows in the last dumped buffer, used to streamline memory usage of `values_buffer`.
|
||||
///
|
||||
/// After data is dumped to external files, `last_dump_row_count` is updated to reflect the new starting point
|
||||
/// for `BitVec` indexing. This means each `BitVec` in `values_buffer` thereafter encodes positions relative to
|
||||
/// this count, not from 0. This mechanism effectively shrinks the memory footprint of each `BitVec`, helping manage
|
||||
/// memory use more efficiently by focusing only on newly ingested data post-dump.
|
||||
last_dump_row_count: usize,
|
||||
values_buffer: BTreeMap<Bytes, (Bitmap, usize)>,
|
||||
|
||||
/// Count of all rows ingested so far
|
||||
total_row_count: usize,
|
||||
@@ -93,14 +85,14 @@ impl Sorter for ExternalSorter {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let segment_index_range = self.segment_index_range(n, value.is_none());
|
||||
let segment_index_range = self.segment_index_range(n);
|
||||
self.total_row_count += n;
|
||||
|
||||
if let Some(value) = value {
|
||||
let memory_diff = self.push_not_null(value, segment_index_range);
|
||||
self.may_dump_buffer(memory_diff).await
|
||||
} else {
|
||||
set_bits(&mut self.segment_null_bitmap, segment_index_range);
|
||||
self.segment_null_bitmap.insert_range(segment_index_range);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -117,15 +109,10 @@ impl Sorter for ExternalSorter {
|
||||
// TODO(zhongzc): k-way merge instead of 2-way merge
|
||||
|
||||
let mut tree_nodes: VecDeque<SortedStream> = VecDeque::with_capacity(readers.len() + 1);
|
||||
let leading_zeros = self.last_dump_row_count / self.segment_row_count;
|
||||
tree_nodes.push_back(Box::new(stream::iter(
|
||||
mem::take(&mut self.values_buffer)
|
||||
.into_iter()
|
||||
.map(move |(value, mut bitmap)| {
|
||||
bitmap.resize(bitmap.len() + leading_zeros, false);
|
||||
bitmap.shift_right(leading_zeros);
|
||||
Ok((value, bitmap))
|
||||
}),
|
||||
.map(|(value, (bitmap, _))| Ok((value, bitmap))),
|
||||
)));
|
||||
for (_, reader) in readers {
|
||||
tree_nodes.push_back(IntermediateReader::new(reader).into_stream().await?);
|
||||
@@ -161,11 +148,10 @@ impl ExternalSorter {
|
||||
index_name,
|
||||
temp_file_provider,
|
||||
|
||||
segment_null_bitmap: BitVec::new(),
|
||||
segment_null_bitmap: Bitmap::new_bitvec(), // bitvec is more efficient for many null values
|
||||
values_buffer: BTreeMap::new(),
|
||||
|
||||
total_row_count: 0,
|
||||
last_dump_row_count: 0,
|
||||
segment_row_count,
|
||||
|
||||
current_memory_usage: 0,
|
||||
@@ -195,7 +181,7 @@ impl ExternalSorter {
|
||||
}
|
||||
|
||||
/// Pushes the non-null values to the values buffer and sets the bits within
|
||||
/// the specified range in the given BitVec to true.
|
||||
/// the specified range in the given bitmap to true.
|
||||
/// Returns the memory usage difference of the buffer after the operation.
|
||||
fn push_not_null(
|
||||
&mut self,
|
||||
@@ -203,20 +189,23 @@ impl ExternalSorter {
|
||||
segment_index_range: RangeInclusive<usize>,
|
||||
) -> usize {
|
||||
match self.values_buffer.get_mut(value) {
|
||||
Some(bitmap) => {
|
||||
let old_len = bitmap.as_raw_slice().len();
|
||||
set_bits(bitmap, segment_index_range);
|
||||
Some((bitmap, mem_usage)) => {
|
||||
bitmap.insert_range(segment_index_range);
|
||||
let new_usage = bitmap.memory_usage() + value.len();
|
||||
let diff = new_usage - *mem_usage;
|
||||
*mem_usage = new_usage;
|
||||
|
||||
bitmap.as_raw_slice().len() - old_len
|
||||
diff
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = BitVec::default();
|
||||
set_bits(&mut bitmap, segment_index_range);
|
||||
let mut bitmap = Bitmap::new_roaring();
|
||||
bitmap.insert_range(segment_index_range);
|
||||
|
||||
let mem_diff = bitmap.as_raw_slice().len() + value.len();
|
||||
self.values_buffer.insert(value.to_vec(), bitmap);
|
||||
let mem_usage = bitmap.memory_usage() + value.len();
|
||||
self.values_buffer
|
||||
.insert(value.to_vec(), (bitmap, mem_usage));
|
||||
|
||||
mem_diff
|
||||
mem_usage
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -257,12 +246,8 @@ impl ExternalSorter {
|
||||
.fetch_sub(memory_usage, Ordering::Relaxed);
|
||||
self.current_memory_usage = 0;
|
||||
|
||||
let bitmap_leading_zeros = self.last_dump_row_count / self.segment_row_count;
|
||||
self.last_dump_row_count =
|
||||
self.total_row_count - self.total_row_count % self.segment_row_count; // align to segment
|
||||
|
||||
let entries = values.len();
|
||||
IntermediateWriter::new(writer).write_all(values, bitmap_leading_zeros as _).await.inspect(|_|
|
||||
IntermediateWriter::new(writer).write_all(values.into_iter().map(|(k, (b, _))| (k, b))).await.inspect(|_|
|
||||
debug!("Dumped {entries} entries ({memory_usage} bytes) to intermediate file {file_id} for index {index_name}")
|
||||
).inspect_err(|e|
|
||||
error!(e; "Failed to dump {entries} entries to intermediate file {file_id} for index {index_name}")
|
||||
@@ -271,13 +256,8 @@ impl ExternalSorter {
|
||||
|
||||
/// Determines the segment index range for the row index range
|
||||
/// `[row_begin, row_begin + n - 1]`
|
||||
fn segment_index_range(&self, n: usize, is_null: bool) -> RangeInclusive<usize> {
|
||||
let row_begin = if is_null {
|
||||
self.total_row_count
|
||||
} else {
|
||||
self.total_row_count - self.last_dump_row_count
|
||||
};
|
||||
|
||||
fn segment_index_range(&self, n: usize) -> RangeInclusive<usize> {
|
||||
let row_begin = self.total_row_count;
|
||||
let start = self.segment_index(row_begin);
|
||||
let end = self.segment_index(row_begin + n - 1);
|
||||
start..=end
|
||||
@@ -289,16 +269,6 @@ impl ExternalSorter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the bits within the specified range in the given `BitVec` to true
|
||||
fn set_bits(bitmap: &mut BitVec, index_range: RangeInclusive<usize>) {
|
||||
if *index_range.end() >= bitmap.len() {
|
||||
bitmap.resize(index_range.end() + 1, false);
|
||||
}
|
||||
for index in index_range {
|
||||
bitmap.set(index, true);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
@@ -330,7 +300,7 @@ mod tests {
|
||||
move |index_name, file_id| {
|
||||
assert_eq!(index_name, "test");
|
||||
let mut files = files.lock().unwrap();
|
||||
let (writer, reader) = duplex(8 * 1024);
|
||||
let (writer, reader) = duplex(1024 * 1024);
|
||||
files.insert(file_id.to_string(), Box::new(reader.compat()));
|
||||
Ok(Box::new(writer.compat_write()))
|
||||
}
|
||||
|
||||
@@ -19,29 +19,24 @@
|
||||
//! The serialization format is as follows:
|
||||
//!
|
||||
//! ```text
|
||||
//! [magic][bitmap leading zeros][item][item]...[item]
|
||||
//! [4] [4] [?]
|
||||
//! [magic][item][item]...[item]
|
||||
//! [4] [?]
|
||||
//!
|
||||
//! Each [item] is structured as:
|
||||
//! [value len][value][bitmap len][bitmap]
|
||||
//! [8] [?] [8] [?]
|
||||
//! ```
|
||||
//!
|
||||
//! The format starts with a 4-byte magic identifier, followed by a 4-byte
|
||||
//! bitmap leading zeros count, indicating how many leading zeros are in the
|
||||
//! fixed-size region of the bitmap. Following that, each item represents
|
||||
//! a value and its associated bitmap, serialized with their lengths for
|
||||
//! Each item represents a value and its associated bitmap, serialized with their lengths for
|
||||
//! easier deserialization.
|
||||
|
||||
mod codec_v1;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use asynchronous_codec::{FramedRead, FramedWrite};
|
||||
use common_base::BitVec;
|
||||
use futures::{stream, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, StreamExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bitmap::{Bitmap, BitmapType};
|
||||
use crate::inverted_index::create::sort::SortedStream;
|
||||
use crate::inverted_index::error::{
|
||||
CloseSnafu, FlushSnafu, ReadSnafu, Result, UnknownIntermediateCodecMagicSnafu, WriteSnafu,
|
||||
@@ -62,12 +57,13 @@ impl<W: AsyncWrite + Unpin> IntermediateWriter<W> {
|
||||
/// Serializes and writes all provided values to the wrapped writer
|
||||
pub async fn write_all(
|
||||
mut self,
|
||||
values: BTreeMap<Bytes, BitVec>,
|
||||
bitmap_leading_zeros: u32,
|
||||
values: impl IntoIterator<Item = (Bytes, Bitmap)>,
|
||||
) -> Result<()> {
|
||||
let (codec_magic, encoder) = (
|
||||
codec_v1::CODEC_V1_MAGIC,
|
||||
codec_v1::IntermediateItemEncoderV1,
|
||||
codec_v1::IntermediateItemEncoderV1 {
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
},
|
||||
);
|
||||
|
||||
self.writer
|
||||
@@ -75,11 +71,6 @@ impl<W: AsyncWrite + Unpin> IntermediateWriter<W> {
|
||||
.await
|
||||
.context(WriteSnafu)?;
|
||||
|
||||
self.writer
|
||||
.write_all(&bitmap_leading_zeros.to_be_bytes())
|
||||
.await
|
||||
.context(WriteSnafu)?;
|
||||
|
||||
let value_stream = stream::iter(values.into_iter().map(Ok));
|
||||
let frame_write = FramedWrite::new(&mut self.writer, encoder);
|
||||
// `forward()` will flush and close the writer when the stream ends
|
||||
@@ -112,17 +103,9 @@ impl<R: AsyncRead + Unpin + Send + 'static> IntermediateReader<R> {
|
||||
.context(ReadSnafu)?;
|
||||
|
||||
let decoder = match &magic {
|
||||
codec_v1::CODEC_V1_MAGIC => {
|
||||
let bitmap_leading_zeros = {
|
||||
let mut buf = [0u8; 4];
|
||||
self.reader.read_exact(&mut buf).await.context(ReadSnafu)?;
|
||||
u32::from_be_bytes(buf)
|
||||
};
|
||||
|
||||
codec_v1::IntermediateItemDecoderV1 {
|
||||
bitmap_leading_zeros,
|
||||
}
|
||||
}
|
||||
codec_v1::CODEC_V1_MAGIC => codec_v1::IntermediateItemDecoderV1 {
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
},
|
||||
_ => return UnknownIntermediateCodecMagicSnafu { magic }.fail(),
|
||||
};
|
||||
|
||||
@@ -132,6 +115,7 @@ impl<R: AsyncRead + Unpin + Send + 'static> IntermediateReader<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{Seek, SeekFrom};
|
||||
|
||||
use futures::io::{AllowStdIo, Cursor};
|
||||
@@ -140,6 +124,10 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::inverted_index::error::Error;
|
||||
|
||||
fn bitmap(bytes: &[u8]) -> Bitmap {
|
||||
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_read_write_basic() {
|
||||
let file_r = tempfile().unwrap();
|
||||
@@ -148,12 +136,12 @@ mod tests {
|
||||
let buf_w = AllowStdIo::new(file_w);
|
||||
|
||||
let values = BTreeMap::from_iter([
|
||||
(Bytes::from("a"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("b"), BitVec::from_slice(&[0b01010101])),
|
||||
(Bytes::from("a"), bitmap(&[0b10101010])),
|
||||
(Bytes::from("b"), bitmap(&[0b01010101])),
|
||||
]);
|
||||
|
||||
let writer = IntermediateWriter::new(buf_w);
|
||||
writer.write_all(values.clone(), 0).await.unwrap();
|
||||
writer.write_all(values.clone()).await.unwrap();
|
||||
// reset the handle
|
||||
buf_r.seek(SeekFrom::Start(0)).unwrap();
|
||||
|
||||
@@ -161,48 +149,9 @@ mod tests {
|
||||
let mut stream = reader.into_stream().await.unwrap();
|
||||
|
||||
let a = stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(a, (Bytes::from("a"), BitVec::from_slice(&[0b10101010])));
|
||||
assert_eq!(a, (Bytes::from("a"), bitmap(&[0b10101010])));
|
||||
let b = stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(b, (Bytes::from("b"), BitVec::from_slice(&[0b01010101])));
|
||||
assert!(stream.next().await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_read_write_with_prefix_zeros() {
|
||||
let file_r = tempfile().unwrap();
|
||||
let file_w = file_r.try_clone().unwrap();
|
||||
let mut buf_r = AllowStdIo::new(file_r);
|
||||
let buf_w = AllowStdIo::new(file_w);
|
||||
|
||||
let values = BTreeMap::from_iter([
|
||||
(Bytes::from("a"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("b"), BitVec::from_slice(&[0b01010101])),
|
||||
]);
|
||||
|
||||
let writer = IntermediateWriter::new(buf_w);
|
||||
writer.write_all(values.clone(), 8).await.unwrap();
|
||||
// reset the handle
|
||||
buf_r.seek(SeekFrom::Start(0)).unwrap();
|
||||
|
||||
let reader = IntermediateReader::new(buf_r);
|
||||
let mut stream = reader.into_stream().await.unwrap();
|
||||
|
||||
let a = stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
a,
|
||||
(
|
||||
Bytes::from("a"),
|
||||
BitVec::from_slice(&[0b00000000, 0b10101010])
|
||||
)
|
||||
);
|
||||
let b = stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
b,
|
||||
(
|
||||
Bytes::from("b"),
|
||||
BitVec::from_slice(&[0b00000000, 0b01010101])
|
||||
)
|
||||
);
|
||||
assert_eq!(b, (Bytes::from("b"), bitmap(&[0b01010101])));
|
||||
assert!(stream.next().await.is_none());
|
||||
}
|
||||
|
||||
@@ -213,7 +162,7 @@ mod tests {
|
||||
let values = BTreeMap::new();
|
||||
|
||||
let writer = IntermediateWriter::new(&mut buf);
|
||||
writer.write_all(values.clone(), 0).await.unwrap();
|
||||
writer.write_all(values.clone()).await.unwrap();
|
||||
|
||||
let reader = IntermediateReader::new(Cursor::new(buf));
|
||||
let mut stream = reader.into_stream().await.unwrap();
|
||||
|
||||
@@ -16,9 +16,10 @@ use std::io;
|
||||
|
||||
use asynchronous_codec::{BytesMut, Decoder, Encoder};
|
||||
use bytes::{Buf, BufMut};
|
||||
use common_base::BitVec;
|
||||
use greptime_proto::v1::index::BitmapType;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::{CommonIoSnafu, Error, Result};
|
||||
use crate::Bytes;
|
||||
|
||||
@@ -28,37 +29,42 @@ const U64_LENGTH: usize = std::mem::size_of::<u64>();
|
||||
pub const CODEC_V1_MAGIC: &[u8; 4] = b"im01";
|
||||
|
||||
/// Serializes items of external sorting intermediate files.
|
||||
pub struct IntermediateItemEncoderV1;
|
||||
pub struct IntermediateItemEncoderV1 {
|
||||
pub bitmap_type: BitmapType,
|
||||
}
|
||||
|
||||
/// [`FramedWrite`] requires the [`Encoder`] trait to be implemented.
|
||||
impl Encoder for IntermediateItemEncoderV1 {
|
||||
type Item<'a> = (Bytes, BitVec);
|
||||
type Item<'a> = (Bytes, Bitmap);
|
||||
type Error = Error;
|
||||
|
||||
fn encode(&mut self, item: (Bytes, BitVec), dst: &mut BytesMut) -> Result<()> {
|
||||
fn encode(&mut self, item: (Bytes, Bitmap), dst: &mut BytesMut) -> Result<()> {
|
||||
let value_bytes = item.0;
|
||||
let bitmap_bytes = item.1.into_vec();
|
||||
let bitmap_size = item.1.serialized_size(self.bitmap_type);
|
||||
|
||||
dst.reserve(U64_LENGTH * 2 + value_bytes.len() + bitmap_bytes.len());
|
||||
dst.reserve(U64_LENGTH * 2 + value_bytes.len() + bitmap_size);
|
||||
dst.put_u64_le(value_bytes.len() as u64);
|
||||
dst.extend_from_slice(&value_bytes);
|
||||
dst.put_u64_le(bitmap_bytes.len() as u64);
|
||||
dst.extend_from_slice(&bitmap_bytes);
|
||||
dst.put_u64_le(bitmap_size as u64);
|
||||
item.1
|
||||
.serialize_into(self.bitmap_type, &mut dst.writer())
|
||||
.context(CommonIoSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserializes items of external sorting intermediate files.
|
||||
pub struct IntermediateItemDecoderV1 {
|
||||
pub(crate) bitmap_leading_zeros: u32,
|
||||
pub bitmap_type: BitmapType,
|
||||
}
|
||||
|
||||
/// [`FramedRead`] requires the [`Decoder`] trait to be implemented.
|
||||
impl Decoder for IntermediateItemDecoderV1 {
|
||||
type Item = (Bytes, BitVec);
|
||||
type Item = (Bytes, Bitmap);
|
||||
type Error = Error;
|
||||
|
||||
/// Decodes the `src` into `(Bytes, BitVec)`. Returns `None` if
|
||||
/// Decodes the `src` into `(Bytes, RoaringBitmap)`. Returns `None` if
|
||||
/// the `src` does not contain enough data for a complete item.
|
||||
///
|
||||
/// Only after successful decoding, the `src` is advanced. Otherwise,
|
||||
@@ -92,8 +98,8 @@ impl Decoder for IntermediateItemDecoderV1 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut bitmap = BitVec::repeat(false, self.bitmap_leading_zeros as _);
|
||||
bitmap.extend_from_raw_slice(&buf[..bitmap_len]);
|
||||
let bitmap = Bitmap::deserialize_from(&buf[..bitmap_len], self.bitmap_type)
|
||||
.context(CommonIoSnafu)?;
|
||||
|
||||
let item = (value_bytes.to_vec(), bitmap);
|
||||
|
||||
@@ -113,25 +119,29 @@ impl From<io::Error> for Error {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bit_vec::prelude::{bitvec, Lsb0};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn bitmap(bytes: &[u8]) -> Bitmap {
|
||||
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_codec_basic() {
|
||||
let mut encoder = IntermediateItemEncoderV1;
|
||||
let mut encoder = IntermediateItemEncoderV1 {
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
let item = (b"hello".to_vec(), BitVec::from_slice(&[0b10101010]));
|
||||
let item = (b"hello".to_vec(), bitmap(&[0b10101010]));
|
||||
encoder.encode(item.clone(), &mut buf).unwrap();
|
||||
|
||||
let mut decoder = IntermediateItemDecoderV1 {
|
||||
bitmap_leading_zeros: 0,
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap(), None);
|
||||
|
||||
let item1 = (b"world".to_vec(), BitVec::from_slice(&[0b01010101]));
|
||||
let item1 = (b"world".to_vec(), bitmap(&[0b01010101]));
|
||||
encoder.encode(item.clone(), &mut buf).unwrap();
|
||||
encoder.encode(item1.clone(), &mut buf).unwrap();
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
|
||||
@@ -142,14 +152,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_codec_empty_item() {
|
||||
let mut encoder = IntermediateItemEncoderV1;
|
||||
let mut encoder = IntermediateItemEncoderV1 {
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
let item = (b"".to_vec(), BitVec::from_slice(&[]));
|
||||
let item = (b"".to_vec(), bitmap(&[]));
|
||||
encoder.encode(item.clone(), &mut buf).unwrap();
|
||||
|
||||
let mut decoder = IntermediateItemDecoderV1 {
|
||||
bitmap_leading_zeros: 0,
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap().unwrap(), item);
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap(), None);
|
||||
@@ -158,17 +170,19 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_codec_partial() {
|
||||
let mut encoder = IntermediateItemEncoderV1;
|
||||
let mut encoder = IntermediateItemEncoderV1 {
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
let item = (b"hello".to_vec(), BitVec::from_slice(&[0b10101010]));
|
||||
let item = (b"hello".to_vec(), bitmap(&[0b10101010]));
|
||||
encoder.encode(item.clone(), &mut buf).unwrap();
|
||||
|
||||
let partial_length = U64_LENGTH + 3;
|
||||
let mut partial_bytes = buf.split_to(partial_length);
|
||||
|
||||
let mut decoder = IntermediateItemDecoderV1 {
|
||||
bitmap_leading_zeros: 0,
|
||||
bitmap_type: BitmapType::Roaring,
|
||||
};
|
||||
assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None); // not enough data
|
||||
partial_bytes.extend_from_slice(&buf[..]);
|
||||
@@ -176,25 +190,4 @@ mod tests {
|
||||
assert_eq!(decoder.decode(&mut partial_bytes).unwrap(), None);
|
||||
assert!(partial_bytes.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_codec_prefix_zeros() {
|
||||
let mut encoder = IntermediateItemEncoderV1;
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
let item = (b"hello".to_vec(), bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]);
|
||||
encoder.encode(item.clone(), &mut buf).unwrap();
|
||||
|
||||
let mut decoder = IntermediateItemDecoderV1 {
|
||||
bitmap_leading_zeros: 3,
|
||||
};
|
||||
let decoded_item = decoder.decode(&mut buf).unwrap().unwrap();
|
||||
assert_eq!(decoded_item.0, b"hello");
|
||||
assert_eq!(
|
||||
decoded_item.1,
|
||||
bitvec![u8, Lsb0; 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0]
|
||||
);
|
||||
assert_eq!(decoder.decode(&mut buf).unwrap(), None);
|
||||
assert!(buf.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,10 +16,10 @@ use std::cmp::Ordering;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use common_base::BitVec;
|
||||
use futures::{ready, Stream, StreamExt};
|
||||
use pin_project::pin_project;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::create::sort::SortedStream;
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::Bytes;
|
||||
@@ -28,10 +28,10 @@ use crate::Bytes;
|
||||
#[pin_project]
|
||||
pub struct MergeSortedStream {
|
||||
stream1: Option<SortedStream>,
|
||||
peek1: Option<(Bytes, BitVec)>,
|
||||
peek1: Option<(Bytes, Bitmap)>,
|
||||
|
||||
stream2: Option<SortedStream>,
|
||||
peek2: Option<(Bytes, BitVec)>,
|
||||
peek2: Option<(Bytes, Bitmap)>,
|
||||
}
|
||||
|
||||
impl MergeSortedStream {
|
||||
@@ -49,7 +49,7 @@ impl MergeSortedStream {
|
||||
}
|
||||
|
||||
impl Stream for MergeSortedStream {
|
||||
type Item = Result<(Bytes, BitVec)>;
|
||||
type Item = Result<(Bytes, Bitmap)>;
|
||||
|
||||
/// Polls both streams and returns the next item from the stream that has the smaller next item.
|
||||
/// If both streams have the same next item, the bitmaps are unioned together.
|
||||
@@ -89,77 +89,77 @@ impl Stream for MergeSortedStream {
|
||||
}
|
||||
|
||||
/// Merges two bitmaps by bit-wise OR'ing them together, preserving all bits from both
|
||||
fn merge_bitmaps(bitmap1: BitVec, bitmap2: BitVec) -> BitVec {
|
||||
// make sure longer bitmap is on the left to avoid truncation
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if bitmap1.len() > bitmap2.len() {
|
||||
bitmap1 | bitmap2
|
||||
} else {
|
||||
bitmap2 | bitmap1
|
||||
}
|
||||
fn merge_bitmaps(mut bitmap1: Bitmap, bitmap2: Bitmap) -> Bitmap {
|
||||
bitmap1.union(bitmap2);
|
||||
bitmap1
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::stream;
|
||||
use greptime_proto::v1::index::BitmapType;
|
||||
|
||||
use super::*;
|
||||
use crate::inverted_index::error::Error;
|
||||
|
||||
fn sorted_stream_from_vec(vec: Vec<(Bytes, BitVec)>) -> SortedStream {
|
||||
fn bitmap(bytes: &[u8]) -> Bitmap {
|
||||
Bitmap::from_lsb0_bytes(bytes, BitmapType::Roaring)
|
||||
}
|
||||
|
||||
fn sorted_stream_from_vec(vec: Vec<(Bytes, Bitmap)>) -> SortedStream {
|
||||
Box::new(stream::iter(vec.into_iter().map(Ok::<_, Error>)))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_sorted_stream_non_overlapping() {
|
||||
let stream1 = sorted_stream_from_vec(vec![
|
||||
(Bytes::from("apple"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("orange"), BitVec::from_slice(&[0b01010101])),
|
||||
(Bytes::from("apple"), bitmap(&[0b10101010])),
|
||||
(Bytes::from("orange"), bitmap(&[0b01010101])),
|
||||
]);
|
||||
let stream2 = sorted_stream_from_vec(vec![
|
||||
(Bytes::from("banana"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("peach"), BitVec::from_slice(&[0b01010101])),
|
||||
(Bytes::from("banana"), bitmap(&[0b10101010])),
|
||||
(Bytes::from("peach"), bitmap(&[0b01010101])),
|
||||
]);
|
||||
|
||||
let mut merged_stream = MergeSortedStream::merge(stream1, stream2);
|
||||
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("apple"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
|
||||
assert_eq!(item.1, bitmap(&[0b10101010]));
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("banana"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
|
||||
assert_eq!(item.1, bitmap(&[0b10101010]));
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("orange"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
|
||||
assert_eq!(item.1, bitmap(&[0b01010101]));
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("peach"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
|
||||
assert_eq!(item.1, bitmap(&[0b01010101]));
|
||||
assert!(merged_stream.next().await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_sorted_stream_overlapping() {
|
||||
let stream1 = sorted_stream_from_vec(vec![
|
||||
(Bytes::from("apple"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("orange"), BitVec::from_slice(&[0b10101010])),
|
||||
(Bytes::from("apple"), bitmap(&[0b10101010])),
|
||||
(Bytes::from("orange"), bitmap(&[0b10101010])),
|
||||
]);
|
||||
let stream2 = sorted_stream_from_vec(vec![
|
||||
(Bytes::from("apple"), BitVec::from_slice(&[0b01010101])),
|
||||
(Bytes::from("peach"), BitVec::from_slice(&[0b01010101])),
|
||||
(Bytes::from("apple"), bitmap(&[0b01010101])),
|
||||
(Bytes::from("peach"), bitmap(&[0b01010101])),
|
||||
]);
|
||||
|
||||
let mut merged_stream = MergeSortedStream::merge(stream1, stream2);
|
||||
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("apple"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b11111111]));
|
||||
assert_eq!(item.1, bitmap(&[0b11111111]));
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("orange"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b10101010]));
|
||||
assert_eq!(item.1, bitmap(&[0b10101010]));
|
||||
let item = merged_stream.next().await.unwrap().unwrap();
|
||||
assert_eq!(item.0, Bytes::from("peach"));
|
||||
assert_eq!(item.1, BitVec::from_slice(&[0b01010101]));
|
||||
assert_eq!(item.1, bitmap(&[0b01010101]));
|
||||
assert!(merged_stream.next().await.is_none());
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::num::NonZeroUsize;
|
||||
use async_trait::async_trait;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::bitmap::BitmapType;
|
||||
use crate::inverted_index::create::sort::{SortOutput, Sorter};
|
||||
use crate::inverted_index::create::InvertedIndexCreator;
|
||||
use crate::inverted_index::error::{InconsistentRowCountSnafu, Result};
|
||||
@@ -68,7 +69,11 @@ impl InvertedIndexCreator for SortIndexCreator {
|
||||
}
|
||||
|
||||
/// Finalizes the sorting for all indexes and writes them using the inverted index writer
|
||||
async fn finish(&mut self, writer: &mut dyn InvertedIndexWriter) -> Result<()> {
|
||||
async fn finish(
|
||||
&mut self,
|
||||
writer: &mut dyn InvertedIndexWriter,
|
||||
bitmap_type: BitmapType,
|
||||
) -> Result<()> {
|
||||
let mut output_row_count = None;
|
||||
for (index_name, mut sorter) in self.sorters.drain() {
|
||||
let SortOutput {
|
||||
@@ -88,7 +93,7 @@ impl InvertedIndexCreator for SortIndexCreator {
|
||||
);
|
||||
|
||||
writer
|
||||
.add_index(index_name, segment_null_bitmap, sorted_stream)
|
||||
.add_index(index_name, segment_null_bitmap, sorted_stream, bitmap_type)
|
||||
.await?;
|
||||
}
|
||||
|
||||
@@ -117,9 +122,9 @@ mod tests {
|
||||
use futures::{stream, StreamExt};
|
||||
|
||||
use super::*;
|
||||
use crate::inverted_index::create::sort::SortedStream;
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::Error;
|
||||
use crate::inverted_index::format::writer::MockInvertedIndexWriter;
|
||||
use crate::inverted_index::format::writer::{MockInvertedIndexWriter, ValueStream};
|
||||
use crate::Bytes;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -143,11 +148,10 @@ mod tests {
|
||||
}
|
||||
|
||||
let mut mock_writer = MockInvertedIndexWriter::new();
|
||||
mock_writer
|
||||
.expect_add_index()
|
||||
.times(3)
|
||||
.returning(|name, null_bitmap, stream| {
|
||||
mock_writer.expect_add_index().times(3).returning(
|
||||
|name, null_bitmap, stream, bitmap_type| {
|
||||
assert!(null_bitmap.is_empty());
|
||||
assert_eq!(bitmap_type, BitmapType::Roaring);
|
||||
match name.as_str() {
|
||||
"a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]),
|
||||
"b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]),
|
||||
@@ -155,7 +159,8 @@ mod tests {
|
||||
_ => panic!("unexpected index name: {}", name),
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
},
|
||||
);
|
||||
mock_writer
|
||||
.expect_finish()
|
||||
.times(1)
|
||||
@@ -165,7 +170,10 @@ mod tests {
|
||||
Ok(())
|
||||
});
|
||||
|
||||
creator.finish(&mut mock_writer).await.unwrap();
|
||||
creator
|
||||
.finish(&mut mock_writer, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -191,8 +199,9 @@ mod tests {
|
||||
let mut mock_writer = MockInvertedIndexWriter::new();
|
||||
mock_writer
|
||||
.expect_add_index()
|
||||
.returning(|name, null_bitmap, stream| {
|
||||
.returning(|name, null_bitmap, stream, bitmap_type| {
|
||||
assert!(null_bitmap.is_empty());
|
||||
assert_eq!(bitmap_type, BitmapType::Roaring);
|
||||
match name.as_str() {
|
||||
"a" => assert_eq!(stream_to_values(stream), vec![b"1", b"2", b"3"]),
|
||||
"b" => assert_eq!(stream_to_values(stream), vec![b"4", b"5", b"6"]),
|
||||
@@ -203,7 +212,7 @@ mod tests {
|
||||
});
|
||||
mock_writer.expect_finish().never();
|
||||
|
||||
let res = creator.finish(&mut mock_writer).await;
|
||||
let res = creator.finish(&mut mock_writer, BitmapType::Roaring).await;
|
||||
assert!(matches!(res, Err(Error::InconsistentRowCount { .. })));
|
||||
}
|
||||
|
||||
@@ -219,8 +228,9 @@ mod tests {
|
||||
let mut mock_writer = MockInvertedIndexWriter::new();
|
||||
mock_writer
|
||||
.expect_add_index()
|
||||
.returning(|name, null_bitmap, stream| {
|
||||
.returning(|name, null_bitmap, stream, bitmap_type| {
|
||||
assert!(null_bitmap.is_empty());
|
||||
assert_eq!(bitmap_type, BitmapType::Roaring);
|
||||
assert!(matches!(name.as_str(), "a" | "b" | "c"));
|
||||
assert!(stream_to_values(stream).is_empty());
|
||||
Ok(())
|
||||
@@ -234,7 +244,10 @@ mod tests {
|
||||
Ok(())
|
||||
});
|
||||
|
||||
creator.finish(&mut mock_writer).await.unwrap();
|
||||
creator
|
||||
.finish(&mut mock_writer, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn set_bit(bit_vec: &mut BitVec, index: usize) {
|
||||
@@ -283,20 +296,21 @@ mod tests {
|
||||
|
||||
async fn output(&mut self) -> Result<SortOutput> {
|
||||
let segment_null_bitmap = self.values.remove(&None).unwrap_or_default();
|
||||
let segment_null_bitmap = Bitmap::BitVec(segment_null_bitmap);
|
||||
|
||||
Ok(SortOutput {
|
||||
segment_null_bitmap,
|
||||
sorted_stream: Box::new(stream::iter(
|
||||
std::mem::take(&mut self.values)
|
||||
.into_iter()
|
||||
.map(|(v, b)| Ok((v.unwrap(), b))),
|
||||
.map(|(v, b)| Ok((v.unwrap(), Bitmap::BitVec(b)))),
|
||||
)),
|
||||
total_row_count: self.total_row_count,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn stream_to_values(stream: SortedStream) -> Vec<Bytes> {
|
||||
fn stream_to_values(stream: ValueStream) -> Vec<Bytes> {
|
||||
futures::executor::block_on(async {
|
||||
stream.map(|r| r.unwrap().0).collect::<Vec<Bytes>>().await
|
||||
})
|
||||
|
||||
@@ -110,6 +110,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode bitmap"))]
|
||||
DecodeBitmap {
|
||||
#[snafu(source)]
|
||||
error: IoError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode protobuf"))]
|
||||
DecodeProto {
|
||||
#[snafu(source)]
|
||||
@@ -240,6 +248,7 @@ impl ErrorExt for Error {
|
||||
| CommonIo { .. }
|
||||
| UnknownIntermediateCodecMagic { .. }
|
||||
| FstCompile { .. }
|
||||
| DecodeBitmap { .. }
|
||||
| InvalidFooterPayloadSize { .. }
|
||||
| BlobSizeTooSmall { .. } => StatusCode::Unexpected,
|
||||
|
||||
|
||||
@@ -18,11 +18,11 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::BitVec;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::inverted_index::error::{DecodeFstSnafu, Result};
|
||||
use crate::bitmap::{Bitmap, BitmapType};
|
||||
use crate::inverted_index::error::{DecodeBitmapSnafu, DecodeFstSnafu, Result};
|
||||
pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
|
||||
use crate::inverted_index::FstMap;
|
||||
|
||||
@@ -67,17 +67,25 @@ pub trait InvertedIndexReader: Send + Sync {
|
||||
}
|
||||
|
||||
/// Retrieves the bitmap from the given offset and size.
|
||||
async fn bitmap(&self, offset: u64, size: u32) -> Result<BitVec> {
|
||||
self.range_read(offset, size).await.map(BitVec::from_vec)
|
||||
async fn bitmap(&self, offset: u64, size: u32, bitmap_type: BitmapType) -> Result<Bitmap> {
|
||||
self.range_read(offset, size).await.and_then(|bytes| {
|
||||
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
|
||||
})
|
||||
}
|
||||
|
||||
/// Retrieves the multiple bitmaps from the given ranges.
|
||||
async fn bitmap_deque(&mut self, ranges: &[Range<u64>]) -> Result<VecDeque<BitVec>> {
|
||||
Ok(self
|
||||
.read_vec(ranges)
|
||||
.await?
|
||||
async fn bitmap_deque(
|
||||
&mut self,
|
||||
ranges: &[(Range<u64>, BitmapType)],
|
||||
) -> Result<VecDeque<Bitmap>> {
|
||||
let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
|
||||
let bytes = self.read_vec(&ranges).await?;
|
||||
bytes
|
||||
.into_iter()
|
||||
.map(|bytes| BitVec::from_slice(bytes.as_ref()))
|
||||
.collect::<VecDeque<_>>())
|
||||
.zip(types)
|
||||
.map(|(bytes, bitmap_type)| {
|
||||
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
|
||||
})
|
||||
.collect::<Result<VecDeque<_>>>()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,14 +78,14 @@ impl<R: RangeReader + Sync> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bit_vec::prelude::*;
|
||||
use fst::MapBuilder;
|
||||
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
|
||||
use greptime_proto::v1::index::{BitmapType, InvertedIndexMeta, InvertedIndexMetas};
|
||||
use prost::Message;
|
||||
|
||||
use super::*;
|
||||
use crate::bitmap::Bitmap;
|
||||
|
||||
fn create_fake_fst() -> Vec<u8> {
|
||||
fn mock_fst() -> Vec<u8> {
|
||||
let mut fst_buf = Vec::new();
|
||||
let mut build = MapBuilder::new(&mut fst_buf).unwrap();
|
||||
build.insert("key1".as_bytes(), 1).unwrap();
|
||||
@@ -94,19 +94,27 @@ mod tests {
|
||||
fst_buf
|
||||
}
|
||||
|
||||
fn create_fake_bitmap() -> Vec<u8> {
|
||||
bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0, 1, 0].into_vec()
|
||||
fn mock_bitmap() -> Bitmap {
|
||||
Bitmap::from_lsb0_bytes(&[0b10101010, 0b10000000], BitmapType::Roaring)
|
||||
}
|
||||
|
||||
fn mock_bitmap_bytes() -> Vec<u8> {
|
||||
let mut buf = Vec::new();
|
||||
mock_bitmap()
|
||||
.serialize_into(BitmapType::Roaring, &mut buf)
|
||||
.unwrap();
|
||||
buf
|
||||
}
|
||||
|
||||
fn create_inverted_index_blob() -> Vec<u8> {
|
||||
let bitmap_size = create_fake_bitmap().len();
|
||||
let fst_size = create_fake_fst().len();
|
||||
let bitmap_size = mock_bitmap_bytes().len();
|
||||
let fst_size = mock_fst().len();
|
||||
|
||||
// first index
|
||||
let mut inverted_index = Vec::new();
|
||||
inverted_index.extend_from_slice(&create_fake_bitmap()); // value bitmap
|
||||
inverted_index.extend_from_slice(&create_fake_bitmap()); // null bitmap
|
||||
inverted_index.extend_from_slice(&create_fake_fst()); // fst
|
||||
inverted_index.extend_from_slice(&mock_bitmap_bytes()); // value bitmap
|
||||
inverted_index.extend_from_slice(&mock_bitmap_bytes()); // null bitmap
|
||||
inverted_index.extend_from_slice(&mock_fst()); // fst
|
||||
|
||||
let meta = InvertedIndexMeta {
|
||||
name: "tag0".to_string(),
|
||||
@@ -116,6 +124,7 @@ mod tests {
|
||||
null_bitmap_size: bitmap_size as _,
|
||||
relative_fst_offset: (bitmap_size * 2) as _,
|
||||
fst_size: fst_size as _,
|
||||
bitmap_type: BitmapType::Roaring as _,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -128,6 +137,7 @@ mod tests {
|
||||
null_bitmap_size: bitmap_size as _,
|
||||
relative_fst_offset: (bitmap_size * 2) as _,
|
||||
fst_size: fst_size as _,
|
||||
bitmap_type: BitmapType::Roaring as _,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -168,19 +178,19 @@ mod tests {
|
||||
let meta0 = metas.metas.get("tag0").unwrap();
|
||||
assert_eq!(meta0.name, "tag0");
|
||||
assert_eq!(meta0.base_offset, 0);
|
||||
assert_eq!(meta0.inverted_index_size, 54);
|
||||
assert_eq!(meta0.relative_null_bitmap_offset, 2);
|
||||
assert_eq!(meta0.null_bitmap_size, 2);
|
||||
assert_eq!(meta0.relative_fst_offset, 4);
|
||||
assert_eq!(meta0.inverted_index_size, 102);
|
||||
assert_eq!(meta0.relative_null_bitmap_offset, 26);
|
||||
assert_eq!(meta0.null_bitmap_size, 26);
|
||||
assert_eq!(meta0.relative_fst_offset, 52);
|
||||
assert_eq!(meta0.fst_size, 50);
|
||||
|
||||
let meta1 = metas.metas.get("tag1").unwrap();
|
||||
assert_eq!(meta1.name, "tag1");
|
||||
assert_eq!(meta1.base_offset, 54);
|
||||
assert_eq!(meta1.inverted_index_size, 54);
|
||||
assert_eq!(meta1.relative_null_bitmap_offset, 2);
|
||||
assert_eq!(meta1.null_bitmap_size, 2);
|
||||
assert_eq!(meta1.relative_fst_offset, 4);
|
||||
assert_eq!(meta1.base_offset, 102);
|
||||
assert_eq!(meta1.inverted_index_size, 102);
|
||||
assert_eq!(meta1.relative_null_bitmap_offset, 26);
|
||||
assert_eq!(meta1.null_bitmap_size, 26);
|
||||
assert_eq!(meta1.relative_fst_offset, 52);
|
||||
assert_eq!(meta1.fst_size, 50);
|
||||
}
|
||||
|
||||
@@ -224,17 +234,29 @@ mod tests {
|
||||
let metas = blob_reader.metadata().await.unwrap();
|
||||
let meta = metas.metas.get("tag0").unwrap();
|
||||
|
||||
let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap();
|
||||
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
|
||||
let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap();
|
||||
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
|
||||
let metas = blob_reader.metadata().await.unwrap();
|
||||
let meta = metas.metas.get("tag1").unwrap();
|
||||
|
||||
let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap();
|
||||
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
|
||||
let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap();
|
||||
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,14 +18,14 @@ mod single;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use futures::Stream;
|
||||
|
||||
use crate::bitmap::{Bitmap, BitmapType};
|
||||
use crate::inverted_index::error::Result;
|
||||
pub use crate::inverted_index::format::writer::blob::InvertedIndexBlobWriter;
|
||||
use crate::Bytes;
|
||||
|
||||
pub type ValueStream = Box<dyn Stream<Item = Result<(Bytes, BitVec)>> + Send + Unpin>;
|
||||
pub type ValueStream = Box<dyn Stream<Item = Result<(Bytes, Bitmap)>> + Send + Unpin>;
|
||||
|
||||
/// Trait for writing inverted index data to underlying storage.
|
||||
#[mockall::automock]
|
||||
@@ -37,11 +37,13 @@ pub trait InvertedIndexWriter: Send {
|
||||
/// * `null_bitmap` marks positions of null entries.
|
||||
/// * `values` is a stream of values and their locations, yielded lexicographically.
|
||||
/// Errors occur if the values are out of order.
|
||||
/// * `bitmap_type` is the type of bitmap to encode.
|
||||
async fn add_index(
|
||||
&mut self,
|
||||
name: String,
|
||||
null_bitmap: BitVec,
|
||||
null_bitmap: Bitmap,
|
||||
values: ValueStream,
|
||||
bitmap_type: BitmapType,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Finalizes the index writing process, ensuring all data is written.
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use futures::{AsyncWrite, AsyncWriteExt};
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use prost::Message;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bitmap::{Bitmap, BitmapType};
|
||||
use crate::inverted_index::error::{CloseSnafu, FlushSnafu, Result, WriteSnafu};
|
||||
use crate::inverted_index::format::writer::single::SingleIndexWriter;
|
||||
use crate::inverted_index::format::writer::{InvertedIndexWriter, ValueStream};
|
||||
@@ -43,8 +43,9 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexWriter for InvertedIndexBlobWrit
|
||||
async fn add_index(
|
||||
&mut self,
|
||||
name: String,
|
||||
null_bitmap: BitVec,
|
||||
null_bitmap: Bitmap,
|
||||
values: ValueStream,
|
||||
bitmap_type: BitmapType,
|
||||
) -> Result<()> {
|
||||
let single_writer = SingleIndexWriter::new(
|
||||
name.clone(),
|
||||
@@ -52,6 +53,7 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexWriter for InvertedIndexBlobWrit
|
||||
null_bitmap,
|
||||
values,
|
||||
&mut self.blob_writer,
|
||||
bitmap_type,
|
||||
);
|
||||
let metadata = single_writer.write().await?;
|
||||
|
||||
@@ -100,6 +102,7 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexBlobWriter<W> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::stream;
|
||||
use greptime_proto::v1::index::BitmapType;
|
||||
|
||||
use super::*;
|
||||
use crate::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
|
||||
@@ -132,24 +135,44 @@ mod tests {
|
||||
writer
|
||||
.add_index(
|
||||
"tag0".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001, 0b0000_0000], BitmapType::Roaring),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((
|
||||
Bytes::from("a"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring),
|
||||
)),
|
||||
Ok((
|
||||
Bytes::from("b"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0010_0000], BitmapType::Roaring),
|
||||
)),
|
||||
Ok((
|
||||
Bytes::from("c"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring),
|
||||
)),
|
||||
])),
|
||||
BitmapType::Roaring,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.add_index(
|
||||
"tag1".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001, 0b0000_0000], BitmapType::Roaring),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((
|
||||
Bytes::from("x"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring),
|
||||
)),
|
||||
Ok((
|
||||
Bytes::from("y"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0010_0000], BitmapType::Roaring),
|
||||
)),
|
||||
Ok((
|
||||
Bytes::from("z"),
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring),
|
||||
)),
|
||||
])),
|
||||
BitmapType::Roaring,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -181,22 +204,31 @@ mod tests {
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring)
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0010_0000], BitmapType::Roaring)
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring)
|
||||
);
|
||||
|
||||
// tag1
|
||||
let tag1 = metadata.metas.get("tag1").unwrap();
|
||||
@@ -215,21 +247,30 @@ mod tests {
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring)
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0010_0000], BitmapType::Roaring)
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
assert_eq!(
|
||||
bitmap,
|
||||
Bitmap::from_lsb0_bytes(&[0b0000_0001], BitmapType::Roaring)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user