mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
Compare commits
37 Commits
v0.4.0-nig
...
stream
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f995204060 | ||
|
|
93561291e4 | ||
|
|
9f59d68391 | ||
|
|
51083b12bd | ||
|
|
c80165c377 | ||
|
|
76d8709774 | ||
|
|
2cf7d6d569 | ||
|
|
045c8079e6 | ||
|
|
54f2f6495f | ||
|
|
2798d266f5 | ||
|
|
824d03a642 | ||
|
|
47f41371d0 | ||
|
|
d702b6e5c4 | ||
|
|
13c02f3f92 | ||
|
|
b52eb2313e | ||
|
|
d422bc8401 | ||
|
|
b8c50d00aa | ||
|
|
a12ee5cab8 | ||
|
|
a0d15b489a | ||
|
|
baa372520d | ||
|
|
5df4d44761 | ||
|
|
8e9f2ffce4 | ||
|
|
1101e7bb18 | ||
|
|
5fbc941023 | ||
|
|
68600a2cf9 | ||
|
|
805f254d15 | ||
|
|
2a6c830ca7 | ||
|
|
22dea02485 | ||
|
|
ef75e8f7c3 | ||
|
|
71fc3c42d9 | ||
|
|
c02ac36ce8 | ||
|
|
c112b9a763 | ||
|
|
96fd17aa0a | ||
|
|
6b8cf0bbf0 | ||
|
|
e2522dff21 | ||
|
|
d8f851bef2 | ||
|
|
63b22b2403 |
@@ -32,6 +32,10 @@ inputs:
|
||||
description: Upload to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
upload-latest-artifacts:
|
||||
description: Upload the latest artifacts to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
working-dir:
|
||||
description: Working directory to build the artifacts
|
||||
required: false
|
||||
@@ -59,4 +63,5 @@ runs:
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
upload-to-s3: ${{ inputs.upload-to-s3 }}
|
||||
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
@@ -33,6 +33,10 @@ inputs:
|
||||
description: Upload to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
upload-latest-artifacts:
|
||||
description: Upload the latest artifacts to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
working-dir:
|
||||
description: Working directory to build the artifacts
|
||||
required: false
|
||||
@@ -69,6 +73,7 @@ runs:
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
upload-to-s3: ${{ inputs.upload-to-s3 }}
|
||||
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
- name: Build greptime without pyo3
|
||||
@@ -85,6 +90,7 @@ runs:
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
upload-to-s3: ${{ inputs.upload-to-s3 }}
|
||||
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
- name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build.
|
||||
@@ -106,4 +112,5 @@ runs:
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
upload-to-s3: ${{ inputs.upload-to-s3 }}
|
||||
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
35
.github/actions/upload-artifacts/action.yml
vendored
35
.github/actions/upload-artifacts/action.yml
vendored
@@ -26,6 +26,18 @@ inputs:
|
||||
description: Upload to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
upload-latest-artifacts:
|
||||
description: Upload the latest artifacts to S3
|
||||
required: false
|
||||
default: 'true'
|
||||
upload-max-retry-times:
|
||||
description: Max retry times for uploading artifacts to S3
|
||||
required: false
|
||||
default: "20"
|
||||
upload-retry-timeout:
|
||||
description: Timeout for uploading artifacts to S3
|
||||
required: false
|
||||
default: "10" # minutes
|
||||
working-dir:
|
||||
description: Working directory to upload the artifacts
|
||||
required: false
|
||||
@@ -74,8 +86,8 @@ runs:
|
||||
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
|
||||
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_minutes: 5
|
||||
max_attempts: ${{ inputs.upload-max-retry-times }}
|
||||
timeout_minutes: ${{ inputs.upload-retry-timeout }}
|
||||
# The bucket layout will be:
|
||||
# releases/greptimedb
|
||||
# ├── v0.1.0
|
||||
@@ -92,3 +104,22 @@ runs:
|
||||
aws s3 cp \
|
||||
${{ inputs.artifacts-dir }}.sha256sum \
|
||||
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.sha256sum
|
||||
|
||||
- name: Upload latest artifacts to S3
|
||||
if: ${{ inputs.upload-to-s3 == 'true' && inputs.upload-latest-artifacts == 'true' }} # We'll also upload the latest artifacts to S3 in the scheduled and formal release.
|
||||
uses: nick-invision/retry@v2
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
|
||||
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
|
||||
with:
|
||||
max_attempts: ${{ inputs.upload-max-retry-times }}
|
||||
timeout_minutes: ${{ inputs.upload-retry-timeout }}
|
||||
command: |
|
||||
cd ${{ inputs.working-dir }} && \
|
||||
aws s3 cp \
|
||||
${{ inputs.artifacts-dir }}.tar.gz \
|
||||
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.tar.gz && \
|
||||
aws s3 cp \
|
||||
${{ inputs.artifacts-dir }}.sha256sum \
|
||||
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.sha256sum
|
||||
|
||||
2
.github/workflows/nightly-build.yml
vendored
2
.github/workflows/nightly-build.yml
vendored
@@ -151,6 +151,7 @@ jobs:
|
||||
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
|
||||
upload-latest-artifacts: false
|
||||
|
||||
build-linux-arm64-artifacts:
|
||||
name: Build linux-arm64 artifacts
|
||||
@@ -174,6 +175,7 @@ jobs:
|
||||
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
|
||||
upload-latest-artifacts: false
|
||||
|
||||
release-images-to-dockerhub:
|
||||
name: Build and push images to DockerHub
|
||||
|
||||
191
Cargo.lock
generated
191
Cargo.lock
generated
@@ -8,6 +8,23 @@ version = "0.11.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
|
||||
|
||||
[[package]]
|
||||
name = "abomonation"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56e72913c99b1f927aa7bd59a41518fdd9995f63ffc8760f211609e0241c4fb2"
|
||||
|
||||
[[package]]
|
||||
name = "abomonation_derive"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e50e2a046af56a864c62d97b7153fda72c596e646be1b0c7963736821f6e1efa"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.20.0"
|
||||
@@ -767,7 +784,7 @@ version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdca6a10ecad987bda04e95606ef85a5417dcaac1a78455242d72e031e2b6b62"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
@@ -1475,7 +1492,7 @@ version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1488,7 +1505,7 @@ version = "4.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
@@ -1530,6 +1547,7 @@ dependencies = [
|
||||
"datanode",
|
||||
"datatypes",
|
||||
"derive-new",
|
||||
"derive_builder 0.12.0",
|
||||
"enum_dispatch",
|
||||
"futures-util",
|
||||
"moka 0.9.9",
|
||||
@@ -1626,6 +1644,14 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||
|
||||
[[package]]
|
||||
name = "columnation"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/frankmcsherry/columnation#eb8e20c10e748dcbfe6266be8e24e14422d3de0f"
|
||||
dependencies = [
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "7.0.1"
|
||||
@@ -1689,7 +1715,7 @@ dependencies = [
|
||||
"paste",
|
||||
"regex",
|
||||
"snafu",
|
||||
"strum 0.21.0",
|
||||
"strum 0.25.0",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"url",
|
||||
@@ -1700,7 +1726,7 @@ name = "common-error"
|
||||
version = "0.4.0-nightly"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum 0.24.1",
|
||||
"strum 0.25.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1774,6 +1800,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"criterion 0.4.0",
|
||||
"dashmap",
|
||||
"datafusion",
|
||||
@@ -1835,6 +1862,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"etcd-client",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"prost",
|
||||
@@ -2829,6 +2857,19 @@ version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "differential-dataflow"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/differential-dataflow#2b9ac68aab9a1bf3fc3e4c12fcabea9c9d1ecc6a"
|
||||
dependencies = [
|
||||
"abomonation",
|
||||
"abomonation_derive",
|
||||
"fnv",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"timely",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
@@ -3247,6 +3288,19 @@ dependencies = [
|
||||
"spin 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"common-telemetry",
|
||||
"datafusion-expr",
|
||||
"datafusion-substrait",
|
||||
"datatypes",
|
||||
"differential-dataflow",
|
||||
"serde",
|
||||
"timely",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -4152,11 +4206,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=3489b4742150abe0a769faf1bb60fbb95b061fc8#3489b4742150abe0a769faf1bb60fbb95b061fc8"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=4a277f27caa035a801d5b9c020a0449777736614#4a277f27caa035a801d5b9c020a0449777736614"
|
||||
dependencies = [
|
||||
"prost",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum 0.25.0",
|
||||
"strum_macros 0.25.2",
|
||||
"tonic 0.9.2",
|
||||
"tonic-build",
|
||||
]
|
||||
@@ -4272,15 +4328,6 @@ dependencies = [
|
||||
"http",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
@@ -5344,6 +5391,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -5573,7 +5621,7 @@ dependencies = [
|
||||
"snafu",
|
||||
"storage",
|
||||
"store-api",
|
||||
"strum 0.21.0",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -5653,7 +5701,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f"
|
||||
dependencies = [
|
||||
"darling 0.20.3",
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"num-bigint",
|
||||
"proc-macro-crate 1.3.1",
|
||||
"proc-macro-error",
|
||||
@@ -7097,7 +7145,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"log",
|
||||
@@ -8917,7 +8965,7 @@ dependencies = [
|
||||
"snafu",
|
||||
"snap",
|
||||
"sql",
|
||||
"strum 0.24.1",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tokio",
|
||||
@@ -9139,7 +9187,7 @@ version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
@@ -9402,7 +9450,7 @@ checksum = "9966e64ae989e7e575b19d7265cb79d7fc3cbbdf179835cb0d716f294c2049c9"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"either",
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -9607,23 +9655,11 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2"
|
||||
dependencies = [
|
||||
"strum_macros 0.21.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros 0.24.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
@@ -9634,25 +9670,13 @@ dependencies = [
|
||||
"strum_macros 0.25.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec"
|
||||
dependencies = [
|
||||
"heck 0.3.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
@@ -9665,7 +9689,7 @@ version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
@@ -9715,7 +9739,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3ae64fb7ad0670c7d6d53d57b1b91beb2212afc30e164cc8edb02d6b2cff32a"
|
||||
dependencies = [
|
||||
"gix",
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"prettyplease 0.2.12",
|
||||
"prost",
|
||||
"prost-build",
|
||||
@@ -9737,7 +9761,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "658f6cbbd29a250869b87e1bb5a4b42db534cacfc1c03284f2536cd36b6c1617"
|
||||
dependencies = [
|
||||
"git2",
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"prettyplease 0.2.12",
|
||||
"prost",
|
||||
"prost-build",
|
||||
@@ -9818,6 +9842,18 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.12.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
@@ -10196,6 +10232,59 @@ dependencies = [
|
||||
"time-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "timely"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/timely-dataflow#b990faba8ea59ec2a7450809215145f3e940234a"
|
||||
dependencies = [
|
||||
"abomonation",
|
||||
"abomonation_derive",
|
||||
"crossbeam-channel",
|
||||
"futures-util",
|
||||
"getopts",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"timely_bytes",
|
||||
"timely_communication",
|
||||
"timely_container",
|
||||
"timely_logging",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "timely_bytes"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/timely-dataflow#b990faba8ea59ec2a7450809215145f3e940234a"
|
||||
|
||||
[[package]]
|
||||
name = "timely_communication"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/timely-dataflow#b990faba8ea59ec2a7450809215145f3e940234a"
|
||||
dependencies = [
|
||||
"abomonation",
|
||||
"abomonation_derive",
|
||||
"bincode",
|
||||
"crossbeam-channel",
|
||||
"getopts",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"timely_bytes",
|
||||
"timely_logging",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "timely_container"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/timely-dataflow#b990faba8ea59ec2a7450809215145f3e940234a"
|
||||
dependencies = [
|
||||
"columnation",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "timely_logging"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/TimelyDataflow/timely-dataflow#b990faba8ea59ec2a7450809215145f3e940234a"
|
||||
|
||||
[[package]]
|
||||
name = "timsort"
|
||||
version = "0.1.2"
|
||||
@@ -10776,7 +10865,7 @@ version = "0.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95d27d749378ceab6ec22188ed7ad102205c89ddb92ab662371c850ffc71aa1a"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -10794,7 +10883,7 @@ version = "0.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c8d9ecedde2fd77e975c38eeb9ca40b34ad0247b2259c6e6bbd2a8d6cc2444f"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
||||
@@ -46,6 +46,7 @@ members = [
|
||||
"src/sql",
|
||||
"src/storage",
|
||||
"src/store-api",
|
||||
"src/flow",
|
||||
"src/table",
|
||||
"src/table-procedure",
|
||||
"tests-integration",
|
||||
@@ -77,7 +78,8 @@ datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git
|
||||
derive_builder = "0.12"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "3489b4742150abe0a769faf1bb60fbb95b061fc8" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "4a277f27caa035a801d5b9c020a0449777736614" }
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.18"
|
||||
@@ -93,6 +95,7 @@ snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "296a4f6c73b129d6f565a42a2e5e53c6bc2b9da4", features = [
|
||||
"visitor",
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.28", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
|
||||
@@ -37,7 +37,6 @@ use greptime_proto::v1;
|
||||
use greptime_proto::v1::ddl_request::Expr;
|
||||
use greptime_proto::v1::greptime_request::Request;
|
||||
use greptime_proto::v1::query_request::Query;
|
||||
use greptime_proto::v1::region::region_request;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{DdlRequest, IntervalMonthDayNano, QueryRequest, SemanticType};
|
||||
use snafu::prelude::*;
|
||||
@@ -56,6 +55,10 @@ impl ColumnDataTypeWrapper {
|
||||
Ok(Self(datatype))
|
||||
}
|
||||
|
||||
pub fn new(datatype: ColumnDataType) -> Self {
|
||||
Self(datatype)
|
||||
}
|
||||
|
||||
pub fn datatype(&self) -> ColumnDataType {
|
||||
self.0
|
||||
}
|
||||
@@ -329,21 +332,6 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the type name of the [RegionRequest].
|
||||
pub fn region_request_type(request: ®ion_request::Request) -> &'static str {
|
||||
match request {
|
||||
region_request::Request::Inserts(_) => "region.inserts",
|
||||
region_request::Request::Deletes(_) => "region.deletes",
|
||||
region_request::Request::Create(_) => "region.create",
|
||||
region_request::Request::Drop(_) => "region.drop ",
|
||||
region_request::Request::Open(_) => "region.open",
|
||||
region_request::Request::Close(_) => "region.close",
|
||||
region_request::Request::Alter(_) => "region.alter",
|
||||
region_request::Request::Flush(_) => "region.flush",
|
||||
region_request::Request::Compact(_) => "region.compact",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the type name of the [DdlRequest].
|
||||
fn ddl_request_type(request: &DdlRequest) -> &'static str {
|
||||
match request.expr {
|
||||
|
||||
@@ -136,7 +136,7 @@ impl LocalCatalogManager {
|
||||
schema: INFORMATION_SCHEMA_NAME.to_string(),
|
||||
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
table_id: SYSTEM_CATALOG_TABLE_ID,
|
||||
table: self.system.information_schema.system.clone(),
|
||||
table: self.system.information_schema.system.as_table_ref(),
|
||||
};
|
||||
self.catalogs.register_table(register_table_req).await?;
|
||||
|
||||
|
||||
@@ -97,26 +97,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
}
|
||||
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let schema = catalogs
|
||||
.get_mut(&request.catalog)
|
||||
.with_context(|| CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.get_mut(&request.schema)
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
catalog: &request.catalog,
|
||||
schema: &request.schema,
|
||||
})?;
|
||||
let result = schema.remove(&request.table_name);
|
||||
if result.is_some() {
|
||||
decrement_gauge!(
|
||||
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
|
||||
1.0,
|
||||
&[crate::metrics::db_label(&request.catalog, &request.schema)],
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
self.deregister_table_sync(request)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
@@ -157,15 +138,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
}
|
||||
|
||||
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
Ok(self
|
||||
.catalogs
|
||||
.read()
|
||||
.unwrap()
|
||||
.get(catalog)
|
||||
.with_context(|| CatalogNotFoundSnafu {
|
||||
catalog_name: catalog,
|
||||
})?
|
||||
.contains_key(schema))
|
||||
self.schema_exist_sync(catalog, schema)
|
||||
}
|
||||
|
||||
async fn table(
|
||||
@@ -187,7 +160,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
}
|
||||
|
||||
async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
|
||||
Ok(self.catalogs.read().unwrap().get(catalog).is_some())
|
||||
self.catalog_exist_sync(catalog)
|
||||
}
|
||||
|
||||
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
|
||||
@@ -245,7 +218,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
}
|
||||
|
||||
impl MemoryCatalogManager {
|
||||
/// Create a manager with some default setups
|
||||
/// Creates a manager with some default setups
|
||||
/// (e.g. default catalog/schema and information schema)
|
||||
pub fn with_default_setup() -> Arc<Self> {
|
||||
let manager = Arc::new(Self {
|
||||
@@ -267,19 +240,23 @@ impl MemoryCatalogManager {
|
||||
manager
|
||||
}
|
||||
|
||||
/// Registers a catalog and return the catalog already exist
|
||||
pub fn register_catalog_if_absent(&self, name: String) -> bool {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let entry = catalogs.entry(name);
|
||||
match entry {
|
||||
Entry::Occupied(_) => true,
|
||||
Entry::Vacant(v) => {
|
||||
let _ = v.insert(HashMap::new());
|
||||
false
|
||||
}
|
||||
}
|
||||
fn schema_exist_sync(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
Ok(self
|
||||
.catalogs
|
||||
.read()
|
||||
.unwrap()
|
||||
.get(catalog)
|
||||
.with_context(|| CatalogNotFoundSnafu {
|
||||
catalog_name: catalog,
|
||||
})?
|
||||
.contains_key(schema))
|
||||
}
|
||||
|
||||
fn catalog_exist_sync(&self, catalog: &str) -> Result<bool> {
|
||||
Ok(self.catalogs.read().unwrap().get(catalog).is_some())
|
||||
}
|
||||
|
||||
/// Registers a catalog if it does not exist and returns false if the schema exists.
|
||||
pub fn register_catalog_sync(self: &Arc<Self>, name: String) -> Result<bool> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
|
||||
@@ -294,6 +271,32 @@ impl MemoryCatalogManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deregister_table_sync(&self, request: DeregisterTableRequest) -> Result<()> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let schema = catalogs
|
||||
.get_mut(&request.catalog)
|
||||
.with_context(|| CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.get_mut(&request.schema)
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
catalog: &request.catalog,
|
||||
schema: &request.schema,
|
||||
})?;
|
||||
let result = schema.remove(&request.table_name);
|
||||
if result.is_some() {
|
||||
decrement_gauge!(
|
||||
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
|
||||
1.0,
|
||||
&[crate::metrics::db_label(&request.catalog, &request.schema)],
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Registers a schema if it does not exist.
|
||||
/// It returns an error if the catalog does not exist,
|
||||
/// and returns false if the schema exists.
|
||||
pub fn register_schema_sync(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
@@ -312,6 +315,7 @@ impl MemoryCatalogManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers a schema and returns an error if the catalog or schema does not exist.
|
||||
pub fn register_table_sync(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let schema = catalogs
|
||||
@@ -353,9 +357,25 @@ impl MemoryCatalogManager {
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn new_with_table(table: TableRef) -> Arc<Self> {
|
||||
let manager = Self::with_default_setup();
|
||||
let catalog = &table.table_info().catalog_name;
|
||||
let schema = &table.table_info().schema_name;
|
||||
|
||||
if !manager.catalog_exist_sync(catalog).unwrap() {
|
||||
manager.register_catalog_sync(catalog.to_string()).unwrap();
|
||||
}
|
||||
|
||||
if !manager.schema_exist_sync(catalog, schema).unwrap() {
|
||||
manager
|
||||
.register_schema_sync(RegisterSchemaRequest {
|
||||
catalog: catalog.to_string(),
|
||||
schema: schema.to_string(),
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let request = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
catalog: catalog.to_string(),
|
||||
schema: schema.to_string(),
|
||||
table_name: table.table_info().name.clone(),
|
||||
table_id: table.table_info().ident.table_id,
|
||||
table,
|
||||
@@ -524,10 +544,14 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_register_if_absent() {
|
||||
pub fn test_register_catalog_sync() {
|
||||
let list = MemoryCatalogManager::with_default_setup();
|
||||
assert!(!list.register_catalog_if_absent("test_catalog".to_string(),));
|
||||
assert!(list.register_catalog_if_absent("test_catalog".to_string()));
|
||||
assert!(list
|
||||
.register_catalog_sync("test_catalog".to_string())
|
||||
.unwrap());
|
||||
assert!(!list
|
||||
.register_catalog_sync("test_catalog".to_string())
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -226,7 +226,7 @@ async fn register_table(
|
||||
engine: table_info.meta.engine.clone(),
|
||||
};
|
||||
region_alive_keepers
|
||||
.register_table(table_ident, table)
|
||||
.register_table(table_ident, table, memory_catalog_manager.clone())
|
||||
.await?;
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,8 @@ use tokio::task::JoinHandle;
|
||||
use tokio::time::{Duration, Instant};
|
||||
|
||||
use crate::error::{Result, TableEngineNotFoundSnafu};
|
||||
use crate::local::MemoryCatalogManager;
|
||||
use crate::DeregisterTableRequest;
|
||||
|
||||
/// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables.
|
||||
pub struct RegionAliveKeepers {
|
||||
@@ -70,7 +72,12 @@ impl RegionAliveKeepers {
|
||||
self.keepers.lock().await.get(&table_id).cloned()
|
||||
}
|
||||
|
||||
pub async fn register_table(&self, table_ident: TableIdent, table: TableRef) -> Result<()> {
|
||||
pub async fn register_table(
|
||||
&self,
|
||||
table_ident: TableIdent,
|
||||
table: TableRef,
|
||||
catalog_manager: Arc<MemoryCatalogManager>,
|
||||
) -> Result<()> {
|
||||
let table_id = table_ident.table_id;
|
||||
let keeper = self.find_keeper(table_id).await;
|
||||
if keeper.is_some() {
|
||||
@@ -86,6 +93,7 @@ impl RegionAliveKeepers {
|
||||
|
||||
let keeper = Arc::new(RegionAliveKeeper::new(
|
||||
table_engine,
|
||||
catalog_manager,
|
||||
table_ident.clone(),
|
||||
self.heartbeat_interval_millis,
|
||||
));
|
||||
@@ -203,6 +211,7 @@ impl HeartbeatResponseHandler for RegionAliveKeepers {
|
||||
/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
|
||||
/// countdown.
|
||||
pub struct RegionAliveKeeper {
|
||||
catalog_manager: Arc<MemoryCatalogManager>,
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>,
|
||||
@@ -213,10 +222,12 @@ pub struct RegionAliveKeeper {
|
||||
impl RegionAliveKeeper {
|
||||
fn new(
|
||||
table_engine: TableEngineRef,
|
||||
catalog_manager: Arc<MemoryCatalogManager>,
|
||||
table_ident: TableIdent,
|
||||
heartbeat_interval_millis: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
table_engine,
|
||||
table_ident,
|
||||
countdown_task_handles: Arc::new(Mutex::new(HashMap::new())),
|
||||
@@ -244,11 +255,29 @@ impl RegionAliveKeeper {
|
||||
let _ = x.lock().await.remove(®ion);
|
||||
} // Else the countdown task handles map could be dropped because the keeper is dropped.
|
||||
};
|
||||
let catalog_manager = self.catalog_manager.clone();
|
||||
let ident = self.table_ident.clone();
|
||||
let handle = Arc::new(CountdownTaskHandle::new(
|
||||
self.table_engine.clone(),
|
||||
self.table_ident.clone(),
|
||||
region,
|
||||
|| on_task_finished,
|
||||
move |result: Option<CloseTableResult>| {
|
||||
if matches!(result, Some(CloseTableResult::Released(_))) {
|
||||
let result = catalog_manager.deregister_table_sync(DeregisterTableRequest {
|
||||
catalog: ident.catalog.to_string(),
|
||||
schema: ident.schema.to_string(),
|
||||
table_name: ident.table.to_string(),
|
||||
});
|
||||
|
||||
info!(
|
||||
"Deregister table: {} after countdown task finished, result: {result:?}",
|
||||
ident.table_id
|
||||
);
|
||||
} else {
|
||||
debug!("Countdown task returns: {result:?}");
|
||||
}
|
||||
on_task_finished
|
||||
},
|
||||
));
|
||||
|
||||
let mut handles = self.countdown_task_handles.lock().await;
|
||||
@@ -347,7 +376,7 @@ impl CountdownTaskHandle {
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
region: RegionNumber,
|
||||
on_task_finished: impl FnOnce() -> Fut + Send + 'static,
|
||||
on_task_finished: impl FnOnce(Option<CloseTableResult>) -> Fut + Send + 'static,
|
||||
) -> Self
|
||||
where
|
||||
Fut: Future<Output = ()> + Send,
|
||||
@@ -361,8 +390,8 @@ impl CountdownTaskHandle {
|
||||
rx,
|
||||
};
|
||||
let handler = common_runtime::spawn_bg(async move {
|
||||
countdown_task.run().await;
|
||||
on_task_finished().await;
|
||||
let result = countdown_task.run().await;
|
||||
on_task_finished(result).await;
|
||||
});
|
||||
|
||||
Self {
|
||||
@@ -414,7 +443,8 @@ struct CountdownTask {
|
||||
}
|
||||
|
||||
impl CountdownTask {
|
||||
async fn run(&mut self) {
|
||||
// returns true if
|
||||
async fn run(&mut self) -> Option<CloseTableResult> {
|
||||
// 30 years. See `Instant::far_future`.
|
||||
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
|
||||
|
||||
@@ -468,10 +498,11 @@ impl CountdownTask {
|
||||
"Region {region} of table {table_ident} is closed, result: {result:?}. \
|
||||
RegionAliveKeeper out.",
|
||||
);
|
||||
break;
|
||||
return Some(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn close_region(&self) -> CloseTableResult {
|
||||
@@ -547,8 +578,9 @@ mod test {
|
||||
table_options: TableOptions::default(),
|
||||
engine: "MockTableEngine".to_string(),
|
||||
}));
|
||||
let catalog_manager = MemoryCatalogManager::new_with_table(table.clone());
|
||||
keepers
|
||||
.register_table(table_ident.clone(), table)
|
||||
.register_table(table_ident.clone(), table, catalog_manager)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(keepers
|
||||
@@ -684,7 +716,8 @@ mod test {
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
let keeper = RegionAliveKeeper::new(table_engine, table_ident, 1000);
|
||||
let catalog_manager = MemoryCatalogManager::with_default_setup();
|
||||
let keeper = RegionAliveKeeper::new(table_engine, catalog_manager, table_ident, 1000);
|
||||
|
||||
let region = 1;
|
||||
assert!(keeper.find_handle(®ion).await.is_none());
|
||||
@@ -727,7 +760,7 @@ mod test {
|
||||
table_engine.clone(),
|
||||
table_ident.clone(),
|
||||
1,
|
||||
|| async move { finished_clone.store(true, Ordering::Relaxed) },
|
||||
|_| async move { finished_clone.store(true, Ordering::Relaxed) },
|
||||
);
|
||||
let tx = handle.tx.clone();
|
||||
|
||||
@@ -749,7 +782,7 @@ mod test {
|
||||
|
||||
let finished = Arc::new(AtomicBool::new(false));
|
||||
let finished_clone = finished.clone();
|
||||
let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, || async move {
|
||||
let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, |_| async move {
|
||||
finished_clone.store(true, Ordering::Relaxed)
|
||||
});
|
||||
handle.tx.send(CountdownCommand::Start(100)).await.unwrap();
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -21,24 +20,23 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::{debug, warn};
|
||||
use common_time::util;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, SchemaRef};
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::{TableId, TableInfoRef, TableType};
|
||||
use table::requests::{
|
||||
CreateTableRequest, DeleteRequest, InsertRequest, OpenTableRequest, TableOptions,
|
||||
};
|
||||
use table::{Result as TableResult, Table, TableRef};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest, TableOptions};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
|
||||
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
|
||||
self, CreateSystemCatalogSnafu, DeregisterTableSnafu, EmptyValueSnafu, Error,
|
||||
InsertCatalogRecordSnafu, InvalidEntryTypeSnafu, InvalidKeySnafu, OpenSystemCatalogSnafu,
|
||||
Result, ValueDeserializeSnafu,
|
||||
};
|
||||
use crate::DeregisterTableRequest;
|
||||
|
||||
@@ -48,42 +46,6 @@ pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable(TableRef);
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Table for SystemCatalogTable {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.0.schema()
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
|
||||
self.0.scan_to_stream(request).await
|
||||
}
|
||||
|
||||
/// Insert values into table.
|
||||
async fn insert(&self, request: InsertRequest) -> TableResult<usize> {
|
||||
self.0.insert(request).await
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
self.0.table_info()
|
||||
}
|
||||
|
||||
fn table_type(&self) -> TableType {
|
||||
self.0.table_type()
|
||||
}
|
||||
|
||||
async fn delete(&self, request: DeleteRequest) -> TableResult<usize> {
|
||||
self.0.delete(request).await
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Option<table::stats::TableStatistics> {
|
||||
self.0.statistics()
|
||||
}
|
||||
}
|
||||
|
||||
impl SystemCatalogTable {
|
||||
pub async fn new(engine: TableEngineRef) -> Result<Self> {
|
||||
let request = OpenTableRequest {
|
||||
@@ -126,6 +88,54 @@ impl SystemCatalogTable {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn register_table(
|
||||
&self,
|
||||
catalog: String,
|
||||
schema: String,
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
engine: String,
|
||||
) -> Result<usize> {
|
||||
let insert_request =
|
||||
build_table_insert_request(catalog, schema, table_name, table_id, engine);
|
||||
self.0
|
||||
.insert(insert_request)
|
||||
.await
|
||||
.context(InsertCatalogRecordSnafu)
|
||||
}
|
||||
|
||||
pub(crate) async fn deregister_table(
|
||||
&self,
|
||||
request: &DeregisterTableRequest,
|
||||
table_id: TableId,
|
||||
) -> Result<()> {
|
||||
let deletion_request = build_table_deletion_request(request, table_id);
|
||||
self.0
|
||||
.insert(deletion_request)
|
||||
.await
|
||||
.map(|x| {
|
||||
if x != 1 {
|
||||
let table = common_catalog::format_full_table_name(
|
||||
&request.catalog,
|
||||
&request.schema,
|
||||
&request.table_name
|
||||
);
|
||||
warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
|
||||
}
|
||||
})
|
||||
.with_context(|_| DeregisterTableSnafu {
|
||||
request: request.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn register_schema(&self, catalog: String, schema: String) -> Result<usize> {
|
||||
let insert_request = build_schema_insert_request(catalog, schema);
|
||||
self.0
|
||||
.insert(insert_request)
|
||||
.await
|
||||
.context(InsertCatalogRecordSnafu)
|
||||
}
|
||||
|
||||
/// Create a stream of all entries inside system catalog table
|
||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||
let full_projection = None;
|
||||
@@ -137,11 +147,16 @@ impl SystemCatalogTable {
|
||||
limit: None,
|
||||
};
|
||||
let stream = self
|
||||
.0
|
||||
.scan_to_stream(scan_req)
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
pub fn as_table_ref(&self) -> TableRef {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build system catalog table schema.
|
||||
@@ -541,14 +556,14 @@ mod tests {
|
||||
async fn test_system_table_type() {
|
||||
let (_dir, table_engine) = prepare_table_engine().await;
|
||||
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
|
||||
assert_eq!(Base, system_table.table_type());
|
||||
assert_eq!(Base, system_table.as_table_ref().table_type());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_system_table_info() {
|
||||
let (_dir, table_engine) = prepare_table_engine().await;
|
||||
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
|
||||
let info = system_table.table_info();
|
||||
let info = system_table.as_table_ref().table_info();
|
||||
assert_eq!(TableType::Base, info.table_type);
|
||||
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
|
||||
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
|
||||
@@ -561,14 +576,16 @@ mod tests {
|
||||
let (_, table_engine) = prepare_table_engine().await;
|
||||
let catalog_table = SystemCatalogTable::new(table_engine).await.unwrap();
|
||||
|
||||
let table_insertion = build_table_insert_request(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
"my_table".to_string(),
|
||||
1,
|
||||
MITO_ENGINE.to_string(),
|
||||
);
|
||||
let result = catalog_table.insert(table_insertion).await.unwrap();
|
||||
let result = catalog_table
|
||||
.register_table(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
"my_table".to_string(),
|
||||
1,
|
||||
MITO_ENGINE.to_string(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result, 1);
|
||||
|
||||
let records = catalog_table.records().await.unwrap();
|
||||
@@ -598,16 +615,17 @@ mod tests {
|
||||
});
|
||||
assert_eq!(entry, expected);
|
||||
|
||||
let table_deletion = build_table_deletion_request(
|
||||
&DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "my_table".to_string(),
|
||||
},
|
||||
1,
|
||||
);
|
||||
let result = catalog_table.insert(table_deletion).await.unwrap();
|
||||
assert_eq!(result, 1);
|
||||
catalog_table
|
||||
.deregister_table(
|
||||
&DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "my_table".to_string(),
|
||||
},
|
||||
1,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let records = catalog_table.records().await.unwrap();
|
||||
let batches = RecordBatches::try_collect(records).await.unwrap().take();
|
||||
|
||||
@@ -16,16 +16,9 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::logging;
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableId;
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{self, InsertCatalogRecordSnafu, Result as CatalogResult};
|
||||
use crate::system::{
|
||||
build_schema_insert_request, build_table_deletion_request, build_table_insert_request,
|
||||
SystemCatalogTable,
|
||||
};
|
||||
use crate::system::SystemCatalogTable;
|
||||
use crate::DeregisterTableRequest;
|
||||
|
||||
pub struct InformationSchema {
|
||||
@@ -54,36 +47,21 @@ impl SystemCatalog {
|
||||
table_id: TableId,
|
||||
engine: String,
|
||||
) -> crate::error::Result<usize> {
|
||||
let request = build_table_insert_request(catalog, schema, table_name, table_id, engine);
|
||||
self.information_schema
|
||||
.system
|
||||
.insert(request)
|
||||
.register_table(catalog, schema, table_name, table_id, engine)
|
||||
.await
|
||||
.context(InsertCatalogRecordSnafu)
|
||||
}
|
||||
|
||||
pub(crate) async fn deregister_table(
|
||||
&self,
|
||||
request: &DeregisterTableRequest,
|
||||
table_id: TableId,
|
||||
) -> CatalogResult<()> {
|
||||
) -> crate::error::Result<()> {
|
||||
self.information_schema
|
||||
.system
|
||||
.insert(build_table_deletion_request(request, table_id))
|
||||
.deregister_table(request, table_id)
|
||||
.await
|
||||
.map(|x| {
|
||||
if x != 1 {
|
||||
let table = common_catalog::format_full_table_name(
|
||||
&request.catalog,
|
||||
&request.schema,
|
||||
&request.table_name
|
||||
);
|
||||
logging::warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
|
||||
}
|
||||
})
|
||||
.with_context(|_| error::DeregisterTableSnafu {
|
||||
request: request.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn register_schema(
|
||||
@@ -91,11 +69,9 @@ impl SystemCatalog {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
) -> crate::error::Result<usize> {
|
||||
let request = build_schema_insert_request(catalog, schema);
|
||||
self.information_schema
|
||||
.system
|
||||
.insert(request)
|
||||
.register_schema(catalog, schema)
|
||||
.await
|
||||
.context(InsertCatalogRecordSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ common-telemetry = { workspace = true }
|
||||
common-time = { workspace = true }
|
||||
datafusion.workspace = true
|
||||
datatypes = { workspace = true }
|
||||
derive_builder.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
futures-util.workspace = true
|
||||
moka = { version = "0.9", features = ["future"] }
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use api::v1::greptime_database_client::GreptimeDatabaseClient;
|
||||
use api::v1::health_check_client::HealthCheckClient;
|
||||
use api::v1::prometheus_gateway_client::PrometheusGatewayClient;
|
||||
use api::v1::region::region_client::RegionClient as PbRegionClient;
|
||||
use api::v1::HealthCheckRequest;
|
||||
use arrow_flight::flight_service_client::FlightServiceClient;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
@@ -82,11 +83,6 @@ impl Client {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn with_manager(channel_manager: ChannelManager) -> Self {
|
||||
let inner = Arc::new(Inner::with_manager(channel_manager));
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn with_urls<U, A>(urls: A) -> Self
|
||||
where
|
||||
U: AsRef<str>,
|
||||
@@ -157,6 +153,11 @@ impl Client {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn raw_region_client(&self) -> Result<PbRegionClient<Channel>> {
|
||||
let (_, channel) = self.find_channel()?;
|
||||
Ok(PbRegionClient::new(channel))
|
||||
}
|
||||
|
||||
pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> {
|
||||
let (_, channel) = self.find_channel()?;
|
||||
Ok(PrometheusGatewayClient::new(channel))
|
||||
|
||||
@@ -18,6 +18,7 @@ mod database;
|
||||
pub mod error;
|
||||
pub mod load_balance;
|
||||
mod metrics;
|
||||
pub mod region;
|
||||
mod stream_insert;
|
||||
|
||||
pub use api;
|
||||
|
||||
@@ -25,3 +25,4 @@ pub const METRIC_GRPC_FLUSH_TABLE: &str = "grpc.flush_table";
|
||||
pub const METRIC_GRPC_COMPACT_TABLE: &str = "grpc.compact_table";
|
||||
pub const METRIC_GRPC_TRUNCATE_TABLE: &str = "grpc.truncate_table";
|
||||
pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get";
|
||||
pub(crate) const METRIC_REGION_REQUEST_GRPC: &str = "grpc.region_request";
|
||||
|
||||
146
src/client/src/region.rs
Normal file
146
src/client/src/region.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::region::{region_request, RegionRequest, RegionRequestHeader, RegionResponse};
|
||||
use api::v1::ResponseHeader;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_telemetry::timer;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{IllegalDatabaseResponseSnafu, Result, ServerSnafu};
|
||||
use crate::{metrics, Client};
|
||||
|
||||
type AffectedRows = u64;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegionRequester {
|
||||
trace_id: Option<u64>,
|
||||
span_id: Option<u64>,
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl RegionRequester {
|
||||
pub fn new(client: Client) -> Self {
|
||||
// TODO(LFC): Pass in trace_id and span_id from some context when we have it.
|
||||
Self {
|
||||
trace_id: None,
|
||||
span_id: None,
|
||||
client,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle(self, request: region_request::Body) -> Result<AffectedRows> {
|
||||
let request_type = request.as_ref().to_string();
|
||||
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
trace_id: self.trace_id,
|
||||
span_id: self.span_id,
|
||||
}),
|
||||
body: Some(request),
|
||||
};
|
||||
|
||||
let _timer = timer!(
|
||||
metrics::METRIC_REGION_REQUEST_GRPC,
|
||||
&[("request_type", request_type)]
|
||||
);
|
||||
|
||||
let mut client = self.client.raw_region_client()?;
|
||||
|
||||
let RegionResponse {
|
||||
header,
|
||||
affected_rows,
|
||||
} = client.handle(request).await?.into_inner();
|
||||
|
||||
check_response_header(header)?;
|
||||
|
||||
Ok(affected_rows)
|
||||
}
|
||||
}
|
||||
|
||||
fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
|
||||
let status = header
|
||||
.and_then(|header| header.status)
|
||||
.context(IllegalDatabaseResponseSnafu {
|
||||
err_msg: "either response header or status is missing",
|
||||
})?;
|
||||
|
||||
if StatusCode::is_success(status.status_code) {
|
||||
Ok(())
|
||||
} else {
|
||||
let code =
|
||||
StatusCode::from_u32(status.status_code).context(IllegalDatabaseResponseSnafu {
|
||||
err_msg: format!("unknown server status: {:?}", status),
|
||||
})?;
|
||||
ServerSnafu {
|
||||
code,
|
||||
msg: status.err_msg,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use api::v1::Status as PbStatus;
|
||||
|
||||
use super::*;
|
||||
use crate::Error::{IllegalDatabaseResponse, Server};
|
||||
|
||||
#[test]
|
||||
fn test_check_response_header() {
|
||||
let result = check_response_header(None);
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader { status: None }));
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Success as u32,
|
||||
err_msg: "".to_string(),
|
||||
}),
|
||||
}));
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: u32::MAX,
|
||||
err_msg: "".to_string(),
|
||||
}),
|
||||
}));
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Internal as u32,
|
||||
err_msg: "blabla".to_string(),
|
||||
}),
|
||||
}));
|
||||
let Server { code, msg } = result.unwrap_err() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(code, StatusCode::Internal);
|
||||
assert_eq!(msg, "blabla");
|
||||
}
|
||||
}
|
||||
@@ -205,7 +205,7 @@ impl MigrateTableMetadata {
|
||||
|
||||
async fn migrate_schema_key(&self, key: &v1SchemaKey) -> Result<()> {
|
||||
let new_key = SchemaNameKey::new(&key.catalog_name, &key.schema_name);
|
||||
let schema_name_value = SchemaNameValue;
|
||||
let schema_name_value = SchemaNameValue::default();
|
||||
|
||||
info!("Creating '{new_key}'");
|
||||
|
||||
|
||||
@@ -35,6 +35,12 @@ pub const INFORMATION_SCHEMA_TABLES_TABLE_ID: u32 = 3;
|
||||
pub const INFORMATION_SCHEMA_COLUMNS_TABLE_ID: u32 = 4;
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
pub const MITO2_ENGINE: &str = "mito2";
|
||||
|
||||
pub fn default_engine() -> &'static str {
|
||||
MITO_ENGINE
|
||||
}
|
||||
|
||||
pub const IMMUTABLE_FILE_ENGINE: &str = "file";
|
||||
|
||||
pub const SEMANTIC_TYPE_PRIMARY_KEY: &str = "TAG";
|
||||
|
||||
@@ -27,7 +27,7 @@ orc-rust = "0.2"
|
||||
paste = "1.0"
|
||||
regex = "1.7"
|
||||
snafu.workspace = true
|
||||
strum = { version = "0.21", features = ["derive"] }
|
||||
strum.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio.workspace = true
|
||||
url = "2.3"
|
||||
|
||||
@@ -6,4 +6,4 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
strum = { version = "0.24", features = ["std", "derive"] }
|
||||
strum.workspace = true
|
||||
|
||||
@@ -14,6 +14,7 @@ common-error = { workspace = true }
|
||||
common-recordbatch = { workspace = true }
|
||||
common-runtime = { workspace = true }
|
||||
common-telemetry = { workspace = true }
|
||||
common-time = { workspace = true }
|
||||
dashmap = "5.4"
|
||||
datafusion.workspace = true
|
||||
datatypes = { workspace = true }
|
||||
|
||||
@@ -75,6 +75,9 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Not supported: {}", feat))]
|
||||
NotSupported { feat: String },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -83,7 +86,8 @@ impl ErrorExt for Error {
|
||||
Error::InvalidTlsConfig { .. }
|
||||
| Error::InvalidConfigFilePath { .. }
|
||||
| Error::TypeMismatch { .. }
|
||||
| Error::InvalidFlightData { .. } => StatusCode::InvalidArguments,
|
||||
| Error::InvalidFlightData { .. }
|
||||
| Error::NotSupported { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::CreateChannel { .. }
|
||||
| Error::Conversion { .. }
|
||||
|
||||
@@ -18,9 +18,11 @@ use std::fmt::Display;
|
||||
use api::helper::values_with_capacity;
|
||||
use api::v1::{Column, ColumnDataType, SemanticType};
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{Result, TypeMismatchSnafu};
|
||||
use crate::Error;
|
||||
|
||||
type ColumnName = String;
|
||||
|
||||
@@ -259,6 +261,24 @@ impl Display for Precision {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Precision> for TimeUnit {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(precision: Precision) -> std::result::Result<Self, Self::Error> {
|
||||
Ok(match precision {
|
||||
Precision::Second => TimeUnit::Second,
|
||||
Precision::Millisecond => TimeUnit::Millisecond,
|
||||
Precision::Microsecond => TimeUnit::Microsecond,
|
||||
Precision::Nanosecond => TimeUnit::Nanosecond,
|
||||
_ => {
|
||||
return Err(Error::NotSupported {
|
||||
feat: format!("convert {precision} into TimeUnit"),
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
|
||||
@@ -15,6 +15,7 @@ common-telemetry = { workspace = true }
|
||||
common-time = { workspace = true }
|
||||
etcd-client.workspace = true
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
prost.workspace = true
|
||||
regex.workspace = true
|
||||
|
||||
@@ -54,6 +54,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse value {} into key {}", value, key))]
|
||||
ParseOption {
|
||||
key: String,
|
||||
value: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Corrupted table route data, err: {}", err_msg))]
|
||||
RouteInfoCorrupted { err_msg: String, location: Location },
|
||||
|
||||
@@ -151,6 +158,7 @@ impl ErrorExt for Error {
|
||||
IllegalServerState { .. } | EtcdTxnOpResponse { .. } => StatusCode::Internal,
|
||||
|
||||
SerdeJson { .. }
|
||||
| ParseOption { .. }
|
||||
| RouteInfoCorrupted { .. }
|
||||
| InvalidProtoMsg { .. }
|
||||
| InvalidTableMetadata { .. }
|
||||
|
||||
@@ -494,15 +494,35 @@ macro_rules! impl_table_meta_value {
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_optional_meta_value {
|
||||
($($val_ty: ty), *) => {
|
||||
$(
|
||||
impl $val_ty {
|
||||
pub fn try_from_raw_value(raw_value: &[u8]) -> Result<Option<Self>> {
|
||||
serde_json::from_slice(raw_value).context(SerdeJsonSnafu)
|
||||
}
|
||||
|
||||
pub fn try_as_raw_value(&self) -> Result<Vec<u8>> {
|
||||
serde_json::to_vec(self).context(SerdeJsonSnafu)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_table_meta_value! {
|
||||
CatalogNameValue,
|
||||
SchemaNameValue,
|
||||
TableNameValue,
|
||||
TableInfoValue,
|
||||
DatanodeTableValue,
|
||||
TableRouteValue
|
||||
}
|
||||
|
||||
impl_optional_meta_value! {
|
||||
CatalogNameValue,
|
||||
SchemaNameValue
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
@@ -12,22 +12,27 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use futures::stream::BoxStream;
|
||||
use futures::StreamExt;
|
||||
use humantime_serde::re::humantime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, Error, InvalidTableMetadataSnafu, Result};
|
||||
use crate::error::{self, Error, InvalidTableMetadataSnafu, ParseOptionSnafu, Result};
|
||||
use crate::key::{TableMetaKey, SCHEMA_NAME_KEY_PATTERN, SCHEMA_NAME_KEY_PREFIX};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
|
||||
use crate::rpc::store::{PutRequest, RangeRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
const OPT_KEY_TTL: &str = "ttl";
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct SchemaNameKey<'a> {
|
||||
pub catalog: &'a str,
|
||||
@@ -43,8 +48,33 @@ impl<'a> Default for SchemaNameKey<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SchemaNameValue;
|
||||
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SchemaNameValue {
|
||||
#[serde(default)]
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub ttl: Option<Duration>,
|
||||
}
|
||||
|
||||
impl TryFrom<&HashMap<String, String>> for SchemaNameValue {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &HashMap<String, String>) -> std::result::Result<Self, Self::Error> {
|
||||
let ttl = value
|
||||
.get(OPT_KEY_TTL)
|
||||
.map(|ttl_str| {
|
||||
ttl_str.parse::<humantime::Duration>().map_err(|_| {
|
||||
ParseOptionSnafu {
|
||||
key: OPT_KEY_TTL,
|
||||
value: ttl_str.clone(),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map(|ttl| ttl.into());
|
||||
Ok(Self { ttl })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SchemaNameKey<'a> {
|
||||
pub fn new(catalog: &'a str, schema: &'a str) -> Self {
|
||||
@@ -108,11 +138,15 @@ impl SchemaManager {
|
||||
}
|
||||
|
||||
/// Creates `SchemaNameKey`.
|
||||
pub async fn create(&self, schema: SchemaNameKey<'_>) -> Result<()> {
|
||||
pub async fn create(
|
||||
&self,
|
||||
schema: SchemaNameKey<'_>,
|
||||
value: Option<SchemaNameValue>,
|
||||
) -> Result<()> {
|
||||
let raw_key = schema.as_raw_key();
|
||||
let req = PutRequest::new()
|
||||
.with_key(raw_key)
|
||||
.with_value(SchemaNameValue.try_as_raw_value()?);
|
||||
.with_value(value.unwrap_or_default().try_as_raw_value()?);
|
||||
|
||||
self.kv_backend.put(req).await?;
|
||||
|
||||
@@ -125,6 +159,14 @@ impl SchemaManager {
|
||||
Ok(self.kv_backend.get(&raw_key).await?.is_some())
|
||||
}
|
||||
|
||||
pub async fn get(&self, schema: SchemaNameKey<'_>) -> Result<Option<SchemaNameValue>> {
|
||||
let raw_key = schema.as_raw_key();
|
||||
let value = self.kv_backend.get(&raw_key).await?;
|
||||
value
|
||||
.and_then(|v| SchemaNameValue::try_from_raw_value(v.value.as_ref()).transpose())
|
||||
.transpose()
|
||||
}
|
||||
|
||||
/// Returns a schema stream, it lists all schemas belong to the target `catalog`.
|
||||
pub async fn schema_names(&self, catalog: &str) -> BoxStream<'static, Result<String>> {
|
||||
let start_key = SchemaNameKey::range_start_key(catalog);
|
||||
@@ -143,25 +185,39 @@ impl SchemaManager {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
|
||||
#[test]
|
||||
fn test_serialization() {
|
||||
let key = SchemaNameKey::new("my-catalog", "my-schema");
|
||||
|
||||
assert_eq!(key.to_string(), "__schema_name/my-catalog/my-schema");
|
||||
|
||||
let parsed: SchemaNameKey<'_> = "__schema_name/my-catalog/my-schema".try_into().unwrap();
|
||||
|
||||
assert_eq!(key, parsed);
|
||||
|
||||
let value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(10)),
|
||||
};
|
||||
let mut opts: HashMap<String, String> = HashMap::new();
|
||||
opts.insert("ttl".to_string(), "10s".to_string());
|
||||
let from_value = SchemaNameValue::try_from(&opts).unwrap();
|
||||
assert_eq!(value, from_value);
|
||||
|
||||
let parsed = SchemaNameValue::try_from_raw_value("{\"ttl\":\"10s\"}".as_bytes()).unwrap();
|
||||
assert_eq!(Some(value), parsed);
|
||||
let none = SchemaNameValue::try_from_raw_value("null".as_bytes()).unwrap();
|
||||
assert!(none.is_none());
|
||||
let err_empty = SchemaNameValue::try_from_raw_value("".as_bytes());
|
||||
assert!(err_empty.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_key_exist() {
|
||||
let manager = SchemaManager::new(Arc::new(MemoryKvBackend::default()));
|
||||
let schema_key = SchemaNameKey::new("my-catalog", "my-schema");
|
||||
manager.create(schema_key).await.unwrap();
|
||||
manager.create(schema_key, None).await.unwrap();
|
||||
|
||||
assert!(manager.exist(schema_key).await.unwrap());
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ common-error = { workspace = true }
|
||||
common-runtime = { workspace = true }
|
||||
common-telemetry = { workspace = true }
|
||||
futures.workspace = true
|
||||
humantime-serde = "1.1"
|
||||
humantime-serde.workspace = true
|
||||
object-store = { workspace = true }
|
||||
serde.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
@@ -286,6 +286,7 @@ mod test {
|
||||
use snafu::IntoError;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::RecordBatches;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -354,20 +355,24 @@ mod test {
|
||||
.into_error(BoxedError::new(MockError::new(StatusCode::Unknown)))),
|
||||
]));
|
||||
let adapter = AsyncRecordBatchStreamAdapter::new(schema.clone(), poll_err_stream);
|
||||
let result = RecordBatches::try_collect(Box::pin(adapter)).await;
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"External error, source: Unknown",
|
||||
let err = RecordBatches::try_collect(Box::pin(adapter))
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
matches!(err, Error::External { .. }),
|
||||
"unexpected err {err}"
|
||||
);
|
||||
|
||||
let failed_to_init_stream =
|
||||
new_future_stream(Err(error::ExternalSnafu
|
||||
.into_error(BoxedError::new(MockError::new(StatusCode::Internal)))));
|
||||
let adapter = AsyncRecordBatchStreamAdapter::new(schema.clone(), failed_to_init_stream);
|
||||
let result = RecordBatches::try_collect(Box::pin(adapter)).await;
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"External error, source: Internal",
|
||||
let err = RecordBatches::try_collect(Box::pin(adapter))
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
matches!(err, Error::External { .. }),
|
||||
"unexpected err {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ pub enum Error {
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("External error, source: {}", source))]
|
||||
#[snafu(display("External error, location: {}, source: {}", location, source))]
|
||||
External {
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
|
||||
@@ -202,13 +202,26 @@ impl Stream for SimpleRecordBatchStream {
|
||||
}
|
||||
|
||||
/// Adapt a [Stream] of [RecordBatch] to a [RecordBatchStream].
|
||||
pub struct RecordBatchStreamAdaptor {
|
||||
pub struct RecordBatchStreamAdaptor<S> {
|
||||
pub schema: SchemaRef,
|
||||
pub stream: Pin<Box<dyn Stream<Item = Result<RecordBatch>> + Send>>,
|
||||
pub stream: S,
|
||||
pub output_ordering: Option<Vec<OrderOption>>,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for RecordBatchStreamAdaptor {
|
||||
impl<S> RecordBatchStreamAdaptor<S> {
|
||||
/// Creates a RecordBatchStreamAdaptor without output ordering requirement.
|
||||
pub fn new(schema: SchemaRef, stream: S) -> RecordBatchStreamAdaptor<S> {
|
||||
RecordBatchStreamAdaptor {
|
||||
schema,
|
||||
stream,
|
||||
output_ordering: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Stream<Item = Result<RecordBatch>> + Unpin> RecordBatchStream
|
||||
for RecordBatchStreamAdaptor<S>
|
||||
{
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
@@ -218,7 +231,7 @@ impl RecordBatchStream for RecordBatchStreamAdaptor {
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for RecordBatchStreamAdaptor {
|
||||
impl<S: Stream<Item = Result<RecordBatch>> + Unpin> Stream for RecordBatchStreamAdaptor<S> {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
|
||||
@@ -40,7 +40,7 @@ datatypes = { workspace = true }
|
||||
file-table-engine = { workspace = true }
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
humantime-serde = "1.1"
|
||||
humantime-serde.workspace = true
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
key-lock = "0.1"
|
||||
log-store = { workspace = true }
|
||||
|
||||
@@ -556,6 +556,16 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to build region requests, location:{}, source: {}",
|
||||
location,
|
||||
source
|
||||
))]
|
||||
BuildRegionRequests {
|
||||
location: Location,
|
||||
source: store_api::metadata::MetadataError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -569,6 +579,7 @@ impl ErrorExt for Error {
|
||||
| ExecuteStatement { source, .. }
|
||||
| ExecuteLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
BuildRegionRequests { source, .. } => source.status_code(),
|
||||
HandleHeartbeatResponse { source, .. } => source.status_code(),
|
||||
|
||||
DecodeLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::error::Error as CatalogError;
|
||||
use catalog::error::{Error as CatalogError, Result as CatalogResult};
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::{CatalogManagerRef, RegisterTableRequest};
|
||||
use catalog::{CatalogManagerRef, RegisterSchemaRequest, RegisterTableRequest};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::heartbeat::handler::{
|
||||
@@ -30,6 +30,7 @@ use store_api::storage::RegionNumber;
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::engine::EngineContext;
|
||||
use table::requests::OpenTableRequest;
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
|
||||
@@ -157,6 +158,45 @@ impl OpenRegionHandler {
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn register_table(
|
||||
&self,
|
||||
request: &OpenTableRequest,
|
||||
table: Arc<dyn Table>,
|
||||
) -> CatalogResult<bool> {
|
||||
if !self
|
||||
.catalog_manager
|
||||
.catalog_exist(&request.catalog_name)
|
||||
.await?
|
||||
{
|
||||
self.catalog_manager
|
||||
.clone()
|
||||
.register_catalog(request.catalog_name.to_string())
|
||||
.await?;
|
||||
}
|
||||
|
||||
if !self
|
||||
.catalog_manager
|
||||
.schema_exist(&request.catalog_name, &request.schema_name)
|
||||
.await?
|
||||
{
|
||||
self.catalog_manager
|
||||
.register_schema(RegisterSchemaRequest {
|
||||
catalog: request.catalog_name.to_string(),
|
||||
schema: request.schema_name.to_string(),
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
|
||||
let request = RegisterTableRequest {
|
||||
catalog: request.catalog_name.to_string(),
|
||||
schema: request.schema_name.to_string(),
|
||||
table_name: request.table_name.to_string(),
|
||||
table_id: request.table_id,
|
||||
table,
|
||||
};
|
||||
self.catalog_manager.register_table(request).await
|
||||
}
|
||||
|
||||
async fn open_region_inner(&self, engine: String, request: OpenTableRequest) -> Result<bool> {
|
||||
let OpenTableRequest {
|
||||
catalog_name,
|
||||
@@ -187,14 +227,8 @@ impl OpenRegionHandler {
|
||||
table_name: format_full_table_name(catalog_name, schema_name, table_name),
|
||||
})?
|
||||
{
|
||||
let request = RegisterTableRequest {
|
||||
catalog: request.catalog_name.clone(),
|
||||
schema: request.schema_name.clone(),
|
||||
table_name: request.table_name.clone(),
|
||||
table_id: request.table_id,
|
||||
table,
|
||||
};
|
||||
let result = self.catalog_manager.register_table(request).await;
|
||||
let result = self.register_table(&request, table).await;
|
||||
|
||||
match result {
|
||||
Ok(_) | Err(CatalogError::TableExists { .. }) => Ok(true),
|
||||
e => e.with_context(|_| error::RegisterTableSnafu {
|
||||
|
||||
@@ -365,6 +365,7 @@ mod test {
|
||||
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
|
||||
database_name: "my_database".to_string(),
|
||||
create_if_not_exists: true,
|
||||
options: Default::default(),
|
||||
})),
|
||||
});
|
||||
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
|
||||
@@ -418,6 +419,7 @@ mod test {
|
||||
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
|
||||
database_name: "my_database".to_string(),
|
||||
create_if_not_exists: true,
|
||||
options: Default::default(),
|
||||
})),
|
||||
});
|
||||
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
|
||||
@@ -485,6 +487,7 @@ mod test {
|
||||
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
|
||||
database_name: "my_database".to_string(),
|
||||
create_if_not_exists: true,
|
||||
options: Default::default(),
|
||||
})),
|
||||
});
|
||||
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
|
||||
@@ -589,6 +592,7 @@ mod test {
|
||||
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
|
||||
database_name: "my_database".to_string(),
|
||||
create_if_not_exists: true,
|
||||
options: Default::default(),
|
||||
})),
|
||||
});
|
||||
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
|
||||
@@ -661,6 +665,7 @@ mod test {
|
||||
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
|
||||
database_name: "my_database".to_string(),
|
||||
create_if_not_exists: true,
|
||||
options: Default::default(),
|
||||
})),
|
||||
});
|
||||
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
|
||||
|
||||
@@ -16,15 +16,18 @@ use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
|
||||
use api::v1::region::region_request::Request as RequestBody;
|
||||
use api::v1::region::{QueryRequest, RegionResponse};
|
||||
use api::v1::region::{region_request, QueryRequest, RegionResponse};
|
||||
use api::v1::{ResponseHeader, Status};
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||
use common_query::{DfPhysicalPlan, Output};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_runtime::Runtime;
|
||||
use common_telemetry::info;
|
||||
use dashmap::DashMap;
|
||||
use datafusion::catalog::schema::SchemaProvider;
|
||||
@@ -35,10 +38,10 @@ use datafusion::execution::context::SessionState;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_expr::{Expr as DfExpr, TableType};
|
||||
use datatypes::arrow::datatypes::SchemaRef;
|
||||
use futures_util::future::try_join_all;
|
||||
use prost::Message;
|
||||
use query::QueryEngineRef;
|
||||
use servers::error as servers_error;
|
||||
use servers::error::Result as ServerResult;
|
||||
use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult};
|
||||
use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
use session::context::QueryContext;
|
||||
@@ -52,9 +55,9 @@ use table::table::scan::StreamScanAdapter;
|
||||
use tonic::{Request, Response, Result as TonicResult};
|
||||
|
||||
use crate::error::{
|
||||
DecodeLogicalPlanSnafu, ExecuteLogicalPlanSnafu, GetRegionMetadataSnafu,
|
||||
HandleRegionRequestSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu, Result,
|
||||
UnsupportedOutputSnafu,
|
||||
BuildRegionRequestsSnafu, DecodeLogicalPlanSnafu, ExecuteLogicalPlanSnafu,
|
||||
GetRegionMetadataSnafu, HandleRegionRequestSnafu, RegionEngineNotFoundSnafu,
|
||||
RegionNotFoundSnafu, Result, UnsupportedOutputSnafu,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -63,9 +66,9 @@ pub struct RegionServer {
|
||||
}
|
||||
|
||||
impl RegionServer {
|
||||
pub fn new(query_engine: QueryEngineRef) -> Self {
|
||||
pub fn new(query_engine: QueryEngineRef, runtime: Arc<Runtime>) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RegionServerInner::new(query_engine)),
|
||||
inner: Arc::new(RegionServerInner::new(query_engine, runtime)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,8 +91,47 @@ impl RegionServer {
|
||||
|
||||
#[async_trait]
|
||||
impl RegionServerHandler for RegionServer {
|
||||
async fn handle(&self, _request: RequestBody) -> ServerResult<RegionResponse> {
|
||||
todo!()
|
||||
async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponse> {
|
||||
let requests = RegionRequest::try_from_request_body(request)
|
||||
.context(BuildRegionRequestsSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
let join_tasks = requests.into_iter().map(|(region_id, req)| {
|
||||
let self_to_move = self.clone();
|
||||
self.inner
|
||||
.runtime
|
||||
.spawn(async move { self_to_move.handle_request(region_id, req).await })
|
||||
});
|
||||
|
||||
let results = try_join_all(join_tasks)
|
||||
.await
|
||||
.context(servers_error::JoinTaskSnafu)?;
|
||||
|
||||
// merge results by simply sum up affected rows.
|
||||
// only insert/delete will have multiple results.
|
||||
let mut affected_rows = 0;
|
||||
for result in results {
|
||||
match result
|
||||
.map_err(BoxedError::new)
|
||||
.context(servers_error::ExecuteGrpcRequestSnafu)?
|
||||
{
|
||||
Output::AffectedRows(rows) => affected_rows += rows,
|
||||
Output::Stream(_) | Output::RecordBatches(_) => {
|
||||
// TODO: change the output type to only contains `affected_rows`
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(RegionResponse {
|
||||
header: Some(ResponseHeader {
|
||||
status: Some(Status {
|
||||
status_code: StatusCode::Success as _,
|
||||
..Default::default()
|
||||
}),
|
||||
}),
|
||||
affected_rows: affected_rows as _,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,14 +156,16 @@ struct RegionServerInner {
|
||||
engines: RwLock<HashMap<String, RegionEngineRef>>,
|
||||
region_map: DashMap<RegionId, RegionEngineRef>,
|
||||
query_engine: QueryEngineRef,
|
||||
runtime: Arc<Runtime>,
|
||||
}
|
||||
|
||||
impl RegionServerInner {
|
||||
pub fn new(query_engine: QueryEngineRef) -> Self {
|
||||
pub fn new(query_engine: QueryEngineRef, runtime: Arc<Runtime>) -> Self {
|
||||
Self {
|
||||
engines: RwLock::new(HashMap::new()),
|
||||
region_map: DashMap::new(),
|
||||
query_engine,
|
||||
runtime,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ impl Services {
|
||||
.context(RuntimeResourceSnafu)?,
|
||||
);
|
||||
|
||||
let region_server = RegionServer::new(instance.query_engine());
|
||||
let region_server = RegionServer::new(instance.query_engine(), grpc_runtime.clone());
|
||||
let flight_handler = if enable_region_server {
|
||||
Some(Arc::new(region_server.clone()) as _)
|
||||
} else {
|
||||
|
||||
@@ -20,6 +20,7 @@ use api::v1::greptime_request::Request as GrpcRequest;
|
||||
use api::v1::meta::HeartbeatResponse;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::QueryRequest;
|
||||
use catalog::local::MemoryCatalogManager;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_meta::heartbeat::handler::{
|
||||
@@ -160,8 +161,10 @@ async fn test_open_region_handler() {
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
|
||||
let table = prepare_table(instance.inner()).await;
|
||||
|
||||
let dummy_catalog_manager = MemoryCatalogManager::with_default_setup();
|
||||
region_alive_keepers
|
||||
.register_table(table_ident.clone(), table)
|
||||
.register_table(table_ident.clone(), table, dummy_catalog_manager)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -182,8 +185,8 @@ async fn test_open_region_handler() {
|
||||
|
||||
// Opens a non-exist table
|
||||
let non_exist_table_ident = TableIdent {
|
||||
catalog: "greptime".to_string(),
|
||||
schema: "public".to_string(),
|
||||
catalog: "foo".to_string(),
|
||||
schema: "non-exist".to_string(),
|
||||
table: "non-exist".to_string(),
|
||||
table_id: 2024,
|
||||
engine: "mito".to_string(),
|
||||
|
||||
25
src/flow/Cargo.toml
Normal file
25
src/flow/Cargo.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
[package]
|
||||
name = "flow"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
# use version from crates.io for now to prevent version slewing
|
||||
# disable default-features which include `abomonaion` which we don't need for IPC
|
||||
# timely = {version = "0.12.0", default-features = false, features = ["bincode"]}
|
||||
# differential-dataflow = "0.12.0"
|
||||
# timely = "0.12.0"
|
||||
# differential-dataflow = "0.12.0"
|
||||
# TODO(discord9): fork later for fixed version git dependency
|
||||
timely = { git = "https://github.com/TimelyDataflow/timely-dataflow", default-features = false, features = [
|
||||
"bincode",
|
||||
] }
|
||||
differential-dataflow = { git = "https://github.com/TimelyDataflow/differential-dataflow" } #, rev = "99fa67db" }
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-substrait.workspace = true
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
datatypes = { path = "../datatypes" }
|
||||
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
3
src/flow/src/adapter/mod.rs
Normal file
3
src/flow/src/adapter/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
//! for getting data from source and sending results to sink
|
||||
//! and communicating with other parts of the database
|
||||
//! also commands storage and computation layer
|
||||
22
src/flow/src/compute/compute_state.rs
Normal file
22
src/flow/src/compute/compute_state.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::expr::GlobalId;
|
||||
|
||||
/// Worker-local state that is maintained across dataflows.
|
||||
///
|
||||
/// This state is restricted to the COMPUTE state, the deterministic, idempotent work
|
||||
/// done between data ingress and egress.
|
||||
pub struct ComputeState {
|
||||
/// State kept for each installed compute collection.
|
||||
///
|
||||
/// Each collection has exactly one frontier.
|
||||
/// How the frontier is communicated depends on the collection type:
|
||||
/// * Frontiers of indexes are equal to the frontier of their corresponding traces in the
|
||||
/// `TraceManager`.
|
||||
/// * Persist sinks store their current frontier in `CollectionState::sink_write_frontier`.
|
||||
/// * Subscribes report their frontiers through the `subscribe_response_buffer`.
|
||||
pub collections: BTreeMap<GlobalId, CollectionState>,
|
||||
}
|
||||
|
||||
/// State maintained for a compute collection.
|
||||
pub struct CollectionState {}
|
||||
743
src/flow/src/compute/context.rs
Normal file
743
src/flow/src/compute/context.rs
Normal file
@@ -0,0 +1,743 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use differential_dataflow::lattice::Lattice;
|
||||
use differential_dataflow::operators::arrange::Arranged;
|
||||
use differential_dataflow::trace::wrappers::enter::TraceEnter;
|
||||
use differential_dataflow::trace::wrappers::frontier::TraceFrontier;
|
||||
use differential_dataflow::trace::{BatchReader, Cursor, TraceReader};
|
||||
use differential_dataflow::{Collection, Data};
|
||||
use timely::communication::message::RefOrMut;
|
||||
use timely::dataflow::operators::generic::OutputHandle;
|
||||
use timely::dataflow::operators::Capability;
|
||||
use timely::dataflow::scopes::Child;
|
||||
use timely::dataflow::{Scope, ScopeParent};
|
||||
use timely::progress::timestamp::Refines;
|
||||
use timely::progress::{Antichain, Timestamp};
|
||||
|
||||
use super::plan::Plan;
|
||||
use super::types::DataflowDescription;
|
||||
use crate::compute::render::RenderTimestamp;
|
||||
use crate::compute::typedefs::{TraceErrHandle, TraceRowHandle};
|
||||
use crate::expr::{GlobalId, Id, MapFilterProject, ScalarExpr};
|
||||
use crate::repr;
|
||||
use crate::repr::{Diff, Row};
|
||||
use crate::storage::errors::DataflowError;
|
||||
|
||||
// Local type definition to avoid the horror in signatures.
|
||||
pub(crate) type KeyArrangement<S, K, V> =
|
||||
Arranged<S, TraceRowHandle<K, V, <S as ScopeParent>::Timestamp, Diff>>;
|
||||
pub(crate) type Arrangement<S, V> = KeyArrangement<S, V, V>;
|
||||
pub(crate) type ErrArrangement<S> =
|
||||
Arranged<S, TraceErrHandle<DataflowError, <S as ScopeParent>::Timestamp, Diff>>;
|
||||
pub(crate) type ArrangementImport<S, V, T> = Arranged<
|
||||
S,
|
||||
TraceEnter<TraceFrontier<TraceRowHandle<V, V, T, Diff>>, <S as ScopeParent>::Timestamp>,
|
||||
>;
|
||||
pub(crate) type ErrArrangementImport<S, T> = Arranged<
|
||||
S,
|
||||
TraceEnter<
|
||||
TraceFrontier<TraceErrHandle<DataflowError, T, Diff>>,
|
||||
<S as ScopeParent>::Timestamp,
|
||||
>,
|
||||
>;
|
||||
|
||||
/// Describes flavor of arrangement: local or imported trace.
|
||||
#[derive(Clone)]
|
||||
pub enum ArrangementFlavor<S: Scope, V: Data, T = repr::Timestamp>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// A dataflow-local arrangement.
|
||||
Local(Arrangement<S, V>, ErrArrangement<S>),
|
||||
/// An imported trace from outside the dataflow.
|
||||
///
|
||||
/// The `GlobalId` identifier exists so that exports of this same trace
|
||||
/// can refer back to and depend on the original instance.
|
||||
Trace(
|
||||
GlobalId,
|
||||
ArrangementImport<S, V, T>,
|
||||
ErrArrangementImport<S, T>,
|
||||
),
|
||||
}
|
||||
|
||||
impl<S: Scope, T> ArrangementFlavor<S, Row, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Presents `self` as a stream of updates.
|
||||
///
|
||||
/// This method presents the contents as they are, without further computation.
|
||||
/// If you have logic that could be applied to each record, consider using the
|
||||
/// `flat_map` methods which allows this and can reduce the work done.
|
||||
pub fn as_collection(&self) -> (Collection<S, Row, Diff>, Collection<S, DataflowError, Diff>) {
|
||||
match &self {
|
||||
ArrangementFlavor::Local(oks, errs) => (
|
||||
oks.as_collection(move |k: &Row, v: &Row| {
|
||||
// type annotated because rust-analzyer can't infer the type due to being complex closures
|
||||
// see https://github.com/rust-lang/rust-analyzer/issues/6338
|
||||
let mut k = k.clone();
|
||||
k.extend(v.clone().into_iter());
|
||||
k
|
||||
}),
|
||||
errs.as_collection(|k, &()| k.clone()),
|
||||
),
|
||||
ArrangementFlavor::Trace(_, oks, errs) => (
|
||||
oks.as_collection(move |k, v| {
|
||||
let mut k = k.clone();
|
||||
k.extend(v.clone().into_iter());
|
||||
k
|
||||
}),
|
||||
errs.as_collection(|k, &()| k.clone()),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs and applies logic to elements of `self` and returns the results.
|
||||
///
|
||||
/// `constructor` takes a permutation and produces the logic to apply on elements. The logic
|
||||
/// conceptually receives `(&Row, &Row)` pairs in the form of a slice. Only after borrowing
|
||||
/// the elements and applying the permutation the datums will be in the expected order.
|
||||
///
|
||||
/// If `key` is set, this is a promise that `logic` will produce no results on
|
||||
/// records for which the key does not evaluate to the value. This is used to
|
||||
/// leap directly to exactly those records.
|
||||
pub fn flat_map<I, C, L>(
|
||||
&self,
|
||||
key: Option<Row>,
|
||||
constructor: C,
|
||||
) -> (
|
||||
timely::dataflow::Stream<S, I::Item>,
|
||||
Collection<S, DataflowError, Diff>,
|
||||
)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: Data,
|
||||
C: FnOnce() -> L,
|
||||
L: for<'a, 'b> FnMut(&'a [&'b RefOrMut<'b, Row>], &'a S::Timestamp, &'a Diff) -> I
|
||||
+ 'static,
|
||||
{
|
||||
// Set a number of tuples after which the operator should yield.
|
||||
// This allows us to remain responsive even when enumerating a substantial
|
||||
// arrangement, as well as provides time to accumulate our produced output.
|
||||
let refuel = 1000000;
|
||||
|
||||
match &self {
|
||||
ArrangementFlavor::Local(oks, errs) => {
|
||||
let mut logic = constructor();
|
||||
let oks = CollectionBundle::<S, Row, T>::flat_map_core(
|
||||
oks,
|
||||
key,
|
||||
move |k, v, t, d| logic(&[&k, &v], t, d),
|
||||
refuel,
|
||||
);
|
||||
let errs = errs.as_collection(|k, &()| k.clone());
|
||||
(oks, errs)
|
||||
}
|
||||
ArrangementFlavor::Trace(_, oks, errs) => {
|
||||
let mut logic = constructor();
|
||||
let oks = CollectionBundle::<S, Row, T>::flat_map_core(
|
||||
oks,
|
||||
key,
|
||||
move |k, v, t, d| logic(&[&k, &v], t, d),
|
||||
refuel,
|
||||
);
|
||||
let errs = errs.as_collection(|k, &()| k.clone());
|
||||
(oks, errs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Scope, V: Data, T> ArrangementFlavor<S, V, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
pub fn scope(&self) -> S {
|
||||
match self {
|
||||
ArrangementFlavor::Local(oks, _errs) => oks.stream.scope(),
|
||||
ArrangementFlavor::Trace(_gid, oks, _errs) => oks.stream.scope(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Brings the arrangement flavor into a region.
|
||||
pub fn enter_region<'a>(
|
||||
&self,
|
||||
region: &Child<'a, S, S::Timestamp>,
|
||||
) -> ArrangementFlavor<Child<'a, S, S::Timestamp>, V, T> {
|
||||
match self {
|
||||
ArrangementFlavor::Local(oks, errs) => {
|
||||
ArrangementFlavor::Local(oks.enter_region(region), errs.enter_region(region))
|
||||
}
|
||||
ArrangementFlavor::Trace(gid, oks, errs) => {
|
||||
ArrangementFlavor::Trace(*gid, oks.enter_region(region), errs.enter_region(region))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, S: Scope, V: Data, T> ArrangementFlavor<Child<'a, S, S::Timestamp>, V, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Extracts the arrangement flavor from a region.
|
||||
pub fn leave_region(&self) -> ArrangementFlavor<S, V, T> {
|
||||
match self {
|
||||
ArrangementFlavor::Local(oks, errs) => {
|
||||
ArrangementFlavor::Local(oks.leave_region(), errs.leave_region())
|
||||
}
|
||||
ArrangementFlavor::Trace(gid, oks, errs) => {
|
||||
ArrangementFlavor::Trace(*gid, oks.leave_region(), errs.leave_region())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Context<S, V: Data, T = repr::Timestamp>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S: Scope,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// The scope within which all managed collections exist.
|
||||
///
|
||||
/// It is an error to add any collections not contained in this scope.
|
||||
pub(crate) scope: S,
|
||||
/// The debug name of the dataflow associated with this context.
|
||||
pub debug_name: String,
|
||||
/// The Timely ID of the dataflow associated with this context.
|
||||
pub dataflow_id: usize,
|
||||
/// Frontier before which updates should not be emitted.
|
||||
///
|
||||
/// We *must* apply it to sinks, to ensure correct outputs.
|
||||
/// We *should* apply it to sources and imported traces, because it improves performance.
|
||||
pub since_frontier: Antichain<T>,
|
||||
/// Frontier after which updates should not be emitted.
|
||||
/// Used to limit the amount of work done when appropriate.
|
||||
pub until_frontier: Antichain<T>,
|
||||
/// Bindings of identifiers to collections.
|
||||
pub bindings: BTreeMap<Id, CollectionBundle<S, V, T>>,
|
||||
}
|
||||
|
||||
impl<S: Scope, V: Data> Context<S, V>
|
||||
where
|
||||
S::Timestamp: Lattice + Refines<repr::Timestamp>,
|
||||
{
|
||||
/// TODO(discord9)" DataflowDesc & Plan & etc.
|
||||
/// Creates a new empty Context from given dataflow
|
||||
pub fn for_dataflow_in<Plan>(dataflow: &DataflowDescription<Plan, ()>, scope: S) -> Self {
|
||||
let dataflow_id = scope.addr()[0];
|
||||
let since_frontier = dataflow
|
||||
.as_of
|
||||
.clone()
|
||||
.unwrap_or_else(|| Antichain::from_elem(Timestamp::minimum()));
|
||||
// TODO(discord9)=: get since_frontier and until_frontier from dataflow_desc
|
||||
Self {
|
||||
scope,
|
||||
debug_name: dataflow.debug_name.clone(),
|
||||
dataflow_id,
|
||||
since_frontier,
|
||||
until_frontier: dataflow.until.clone(),
|
||||
bindings: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Scope, V: Data, T: Lattice> Context<S, V, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Insert a collection bundle by an identifier.
|
||||
///
|
||||
/// This is expected to be used to install external collections (sources, indexes, other views),
|
||||
/// as well as for `Let` bindings of local collections.
|
||||
pub fn insert_id(
|
||||
&mut self,
|
||||
id: Id,
|
||||
collection: CollectionBundle<S, V, T>,
|
||||
) -> Option<CollectionBundle<S, V, T>> {
|
||||
self.bindings.insert(id, collection)
|
||||
}
|
||||
|
||||
/// Remove a collection bundle by an identifier.
|
||||
///
|
||||
/// The primary use of this method is uninstalling `Let` bindings.
|
||||
pub fn remove_id(&mut self, id: Id) -> Option<CollectionBundle<S, V, T>> {
|
||||
self.bindings.remove(&id)
|
||||
}
|
||||
/// Melds a collection bundle to whatever exists.
|
||||
#[allow(clippy::map_entry)]
|
||||
pub fn update_id(&mut self, id: Id, collection: CollectionBundle<S, V, T>) {
|
||||
if !self.bindings.contains_key(&id) {
|
||||
self.bindings.insert(id, collection);
|
||||
} else {
|
||||
let binding = self
|
||||
.bindings
|
||||
.get_mut(&id)
|
||||
.expect("Binding verified to exist");
|
||||
if collection.collection.is_some() {
|
||||
binding.collection = collection.collection;
|
||||
}
|
||||
for (key, flavor) in collection.arranged.into_iter() {
|
||||
binding.arranged.insert(key, flavor);
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Look up a collection bundle by an identifier.
|
||||
pub fn lookup_id(&self, id: Id) -> Option<CollectionBundle<S, V, T>> {
|
||||
self.bindings.get(&id).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
type ResultCollection<S, V> = (Collection<S, V, Diff>, Collection<S, DataflowError, Diff>);
|
||||
|
||||
/// A bundle of the various ways a collection can be represented.
|
||||
///
|
||||
/// This type maintains the invariant that it does contain at least one valid
|
||||
/// source of data, either a collection or at least one arrangement.
|
||||
#[derive(Clone)]
|
||||
pub struct CollectionBundle<S, V, T = repr::Timestamp>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S: Scope,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
V: Data,
|
||||
{
|
||||
pub(crate) collection: Option<ResultCollection<S, V>>,
|
||||
/// TODO(discord9): impl: 1. ScalarExpr(Could be from substrait), 2. Arrangement
|
||||
pub(crate) arranged: BTreeMap<Vec<ScalarExpr>, ArrangementFlavor<S, V, T>>,
|
||||
}
|
||||
|
||||
impl<S: Scope, V: Data, T: Lattice> CollectionBundle<S, V, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Construct a new collection bundle from update streams.
|
||||
pub fn from_collections(
|
||||
oks: Collection<S, V, Diff>,
|
||||
errs: Collection<S, DataflowError, Diff>,
|
||||
) -> Self {
|
||||
Self {
|
||||
collection: Some((oks, errs)),
|
||||
arranged: BTreeMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts arrangements by the expressions on which they are keyed.
|
||||
pub fn from_expressions(
|
||||
exprs: Vec<ScalarExpr>,
|
||||
arrangements: ArrangementFlavor<S, V, T>,
|
||||
) -> Self {
|
||||
let mut arranged = BTreeMap::new();
|
||||
arranged.insert(exprs, arrangements);
|
||||
Self {
|
||||
collection: None,
|
||||
arranged,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts arrangements by the columns on which they are keyed.
|
||||
pub fn from_columns<I: IntoIterator<Item = usize>>(
|
||||
columns: I,
|
||||
arrangements: ArrangementFlavor<S, V, T>,
|
||||
) -> Self {
|
||||
let mut keys = Vec::new();
|
||||
for column in columns {
|
||||
keys.push(ScalarExpr::Column(column));
|
||||
}
|
||||
Self::from_expressions(keys, arrangements)
|
||||
}
|
||||
|
||||
/// The scope containing the collection bundle.
|
||||
pub fn scope(&self) -> S {
|
||||
if let Some((oks, _errs)) = &self.collection {
|
||||
oks.inner.scope()
|
||||
} else {
|
||||
self.arranged
|
||||
.values()
|
||||
.next()
|
||||
.expect("Must contain a valid collection")
|
||||
.scope()
|
||||
}
|
||||
}
|
||||
|
||||
/// Brings the collection bundle into a region.
|
||||
pub fn enter_region<'a>(
|
||||
&self,
|
||||
region: &Child<'a, S, S::Timestamp>,
|
||||
) -> CollectionBundle<Child<'a, S, S::Timestamp>, V, T> {
|
||||
CollectionBundle {
|
||||
collection: self
|
||||
.collection
|
||||
.as_ref()
|
||||
.map(|(oks, errs)| (oks.enter_region(region), errs.enter_region(region))),
|
||||
arranged: self
|
||||
.arranged
|
||||
.iter()
|
||||
.map(|(key, bundle)| (key.clone(), bundle.enter_region(region)))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T> CollectionBundle<S, repr::Row, T>
|
||||
where
|
||||
T: timely::progress::Timestamp + Lattice,
|
||||
S: Scope,
|
||||
S::Timestamp: Refines<T> + Lattice + RenderTimestamp,
|
||||
{
|
||||
/// Presents `self` as a stream of updates, having been subjected to `mfp`.
|
||||
///
|
||||
/// This operator is able to apply the logic of `mfp` early, which can substantially
|
||||
/// reduce the amount of data produced when `mfp` is non-trivial.
|
||||
///
|
||||
/// The `key_val` argument, when present, indicates that a specific arrangement should
|
||||
/// be used, and if, in addition, the `val` component is present,
|
||||
/// that we can seek to the supplied row.
|
||||
pub fn as_collection_core(
|
||||
&self,
|
||||
mut mfp: MapFilterProject,
|
||||
key_val: Option<(Vec<ScalarExpr>, Option<Row>)>,
|
||||
until: Antichain<repr::Timestamp>,
|
||||
) -> (Collection<S, Row, Diff>, Collection<S, DataflowError, Diff>) {
|
||||
mfp.optimize();
|
||||
|
||||
let mfp_plan = mfp.into_plan().unwrap();
|
||||
|
||||
// If the MFP is trivial, we can just call `as_collection`.
|
||||
// In the case that we weren't going to apply the `key_val` optimization,
|
||||
// this path results in a slightly smaller and faster
|
||||
// dataflow graph, and is intended to fix
|
||||
let has_key_val = matches!(&key_val, Some((_key, Some(_val))));
|
||||
|
||||
if mfp_plan.is_identity() && !has_key_val {
|
||||
let key = key_val.map(|(k, _v)| k);
|
||||
return self.as_specific_collection(key.as_deref());
|
||||
}
|
||||
let (stream, errors) = self.flat_map(key_val, || {
|
||||
let until = std::rc::Rc::new(until);
|
||||
// this logic get executed every time a new row arrives
|
||||
move |row_parts, time, diff| {
|
||||
let until = std::rc::Rc::clone(&until);
|
||||
let row_iters = row_parts
|
||||
.iter()
|
||||
.flat_map(|row| (**row).to_owned().into_iter());
|
||||
let mut datums_local = Vec::new();
|
||||
datums_local.extend(row_iters);
|
||||
let time = time.clone();
|
||||
let event_time: repr::Timestamp = *time.clone().event_time();
|
||||
mfp_plan
|
||||
.evaluate::<DataflowError, _>(
|
||||
&mut datums_local,
|
||||
event_time,
|
||||
*diff,
|
||||
move |time| !until.less_equal(time),
|
||||
)
|
||||
.map(move |x| match x {
|
||||
Ok((row, event_time, diff)) => {
|
||||
// Copy the whole time, and re-populate event time.
|
||||
let mut time: S::Timestamp = time.clone();
|
||||
*time.event_time() = event_time;
|
||||
Ok((row, time, diff))
|
||||
}
|
||||
Err((e, event_time, diff)) => {
|
||||
// Copy the whole time, and re-populate event time.
|
||||
let mut time: S::Timestamp = time.clone();
|
||||
*time.event_time() = event_time;
|
||||
Err((e, time, diff))
|
||||
}
|
||||
})
|
||||
}
|
||||
});
|
||||
|
||||
use timely::dataflow::operators::ok_err::OkErr;
|
||||
let (oks, errs) = stream.ok_err(|x| x);
|
||||
|
||||
use differential_dataflow::AsCollection;
|
||||
let oks = oks.as_collection();
|
||||
let errs = errs.as_collection();
|
||||
(oks, errors.concat(&errs))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, S: Scope, V: Data, T> CollectionBundle<Child<'a, S, S::Timestamp>, V, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Extracts the collection bundle from a region.
|
||||
pub fn leave_region(&self) -> CollectionBundle<S, V, T> {
|
||||
CollectionBundle {
|
||||
collection: self
|
||||
.collection
|
||||
.as_ref()
|
||||
.map(|(oks, errs)| (oks.leave_region(), errs.leave_region())),
|
||||
arranged: self
|
||||
.arranged
|
||||
.iter()
|
||||
.map(|(key, bundle)| (key.clone(), bundle.leave_region()))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Scope, T: Lattice> CollectionBundle<S, Row, T>
|
||||
where
|
||||
T: Timestamp + Lattice,
|
||||
S::Timestamp: Lattice + Refines<T>,
|
||||
{
|
||||
/// Asserts that the arrangement for a specific key
|
||||
/// (or the raw collection for no key) exists,
|
||||
/// and returns the corresponding collection.
|
||||
///
|
||||
/// This returns the collection as-is, without
|
||||
/// doing any unthinning transformation.
|
||||
/// Therefore, it should be used when the appropriate transformation
|
||||
/// was planned as part of a following MFP.
|
||||
pub fn as_specific_collection(
|
||||
&self,
|
||||
key: Option<&[ScalarExpr]>,
|
||||
) -> (Collection<S, Row, Diff>, Collection<S, DataflowError, Diff>) {
|
||||
// Any operator that uses this method was told to use a particular
|
||||
// collection during LIR planning, where we should have made
|
||||
// sure that that collection exists.
|
||||
//
|
||||
// If it doesn't, we panic.
|
||||
match key {
|
||||
None => self
|
||||
.collection
|
||||
.clone()
|
||||
.expect("The unarranged collection doesn't exist."),
|
||||
Some(key) => self
|
||||
.arranged
|
||||
.get(key)
|
||||
.unwrap_or_else(|| panic!("The collection arranged by {:?} doesn't exist.", key))
|
||||
.as_collection(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs and applies logic to elements of a collection and returns the results.
|
||||
///
|
||||
/// `constructor` takes a permutation and produces the logic to apply on elements. The logic
|
||||
/// conceptually receives `(&Row, &Row)` pairs in the form of a slice. Only after borrowing
|
||||
/// the elements and applying the permutation the datums will be in the expected order.
|
||||
///
|
||||
/// If `key_val` is set, this is a promise that `logic` will produce no results on
|
||||
/// records for which the key does not evaluate to the value. This is used when we
|
||||
/// have an arrangement by that key to leap directly to exactly those records.
|
||||
/// It is important that `logic` still guard against data that does not satisfy
|
||||
/// this constraint, as this method does not statically know that it will have
|
||||
/// that arrangement.
|
||||
pub fn flat_map<I, C, L>(
|
||||
&self,
|
||||
key_val: Option<(Vec<ScalarExpr>, Option<Row>)>,
|
||||
constructor: C,
|
||||
) -> (
|
||||
timely::dataflow::Stream<S, I::Item>,
|
||||
Collection<S, DataflowError, Diff>,
|
||||
)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: Data,
|
||||
C: FnOnce() -> L,
|
||||
L: for<'a, 'b> FnMut(&'a [&'b RefOrMut<'b, Row>], &'a S::Timestamp, &'a Diff) -> I
|
||||
+ 'static,
|
||||
{
|
||||
// If `key_val` is set, we should have use the corresponding arrangement.
|
||||
// If there isn't one, that implies an error in the contract between
|
||||
// key-production and available arrangements.
|
||||
if let Some((key, val)) = key_val {
|
||||
let flavor = self
|
||||
.arrangement(&key)
|
||||
.expect("Should have ensured during planning that this arrangement exists.");
|
||||
flavor.flat_map(val, constructor)
|
||||
} else {
|
||||
use timely::dataflow::operators::Map;
|
||||
let (oks, errs) = self
|
||||
.collection
|
||||
.clone()
|
||||
.expect("Invariant violated: CollectionBundle contains no collection.");
|
||||
let mut logic = constructor();
|
||||
(
|
||||
oks.inner
|
||||
.flat_map(move |(mut v, t, d)| logic(&[&RefOrMut::Mut(&mut v)], &t, &d)),
|
||||
errs,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Factored out common logic for using literal keys in general traces.
|
||||
///
|
||||
/// This logic is sufficiently interesting that we want to write it only
|
||||
/// once, and thereby avoid any skew in the two uses of the logic.
|
||||
///
|
||||
/// The function presents the contents of the trace as `(key, value, time, delta)` tuples,
|
||||
/// where key and value are rows.
|
||||
fn flat_map_core<Tr, I, L>(
|
||||
trace: &Arranged<S, Tr>,
|
||||
key: Option<Row>,
|
||||
mut logic: L,
|
||||
refuel: usize,
|
||||
) -> timely::dataflow::Stream<S, I::Item>
|
||||
where
|
||||
Tr: TraceReader<Key = Row, Val = Row, Time = S::Timestamp, R = repr::Diff>
|
||||
+ Clone
|
||||
+ 'static,
|
||||
I: IntoIterator,
|
||||
I::Item: Data,
|
||||
L: for<'a, 'b> FnMut(
|
||||
RefOrMut<'b, Row>,
|
||||
RefOrMut<'b, Row>,
|
||||
&'a S::Timestamp,
|
||||
&'a repr::Diff,
|
||||
) -> I
|
||||
+ 'static,
|
||||
{
|
||||
let mode = if key.is_some() { "index" } else { "scan" };
|
||||
let name = format!("ArrangementFlatMap({})", mode);
|
||||
use timely::dataflow::channels::pact::Pipeline;
|
||||
use timely::dataflow::operators::Operator;
|
||||
trace.stream.unary(Pipeline, &name, move |_, info| {
|
||||
// Acquire an activator to reschedule the operator when it has unfinished work.
|
||||
use timely::scheduling::Activator;
|
||||
let activations = trace.stream.scope().activations();
|
||||
let activator = Activator::new(&info.address[..], activations);
|
||||
// Maintain a list of work to do, cursor to navigate and process.
|
||||
let mut todo = std::collections::VecDeque::new();
|
||||
move |input, output| {
|
||||
// First, dequeue all batches.
|
||||
input.for_each(|time, data| {
|
||||
let capability = time.retain();
|
||||
for batch in data.iter() {
|
||||
// enqueue a capability, cursor, and batch.
|
||||
todo.push_back(PendingWork::new(
|
||||
capability.clone(),
|
||||
batch.cursor(),
|
||||
batch.clone(),
|
||||
));
|
||||
}
|
||||
});
|
||||
|
||||
// Second, make progress on `todo`.
|
||||
let mut fuel = refuel;
|
||||
while !todo.is_empty() && fuel > 0 {
|
||||
todo.front_mut()
|
||||
.unwrap()
|
||||
.do_work(&key, &mut logic, &mut fuel, output);
|
||||
if fuel > 0 {
|
||||
todo.pop_front();
|
||||
}
|
||||
}
|
||||
// If we have not finished all work, re-activate the operator.
|
||||
if !todo.is_empty() {
|
||||
activator.activate();
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Look up an arrangement by the expressions that form the key.
|
||||
///
|
||||
/// The result may be `None` if no such arrangement exists, or it may be one of many
|
||||
/// "arrangement flavors" that represent the types of arranged data we might have.
|
||||
pub fn arrangement(&self, key: &[ScalarExpr]) -> Option<ArrangementFlavor<S, Row, T>> {
|
||||
self.arranged.get(key).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
struct PendingWork<C>
|
||||
where
|
||||
C: Cursor,
|
||||
C::Time: Timestamp,
|
||||
{
|
||||
capability: Capability<C::Time>,
|
||||
cursor: C,
|
||||
batch: C::Storage,
|
||||
}
|
||||
|
||||
/// Handle specialized to `Vec`-based container.
|
||||
type PendingOutputHandle<'a, C, I> = OutputHandle<
|
||||
'a,
|
||||
<C as Cursor>::Time,
|
||||
<I as IntoIterator>::Item,
|
||||
timely::dataflow::channels::pushers::Tee<<C as Cursor>::Time, <I as IntoIterator>::Item>,
|
||||
>;
|
||||
impl<C: Cursor> PendingWork<C>
|
||||
where
|
||||
C::Key: PartialEq,
|
||||
C::Time: Timestamp,
|
||||
{
|
||||
/// Create a new bundle of pending work, from the capability, cursor, and backing storage.
|
||||
fn new(capability: Capability<C::Time>, cursor: C, batch: C::Storage) -> Self {
|
||||
Self {
|
||||
capability,
|
||||
cursor,
|
||||
batch,
|
||||
}
|
||||
}
|
||||
/// Perform roughly `fuel` work through the cursor, applying `logic` and sending results to `output`.
|
||||
fn do_work<I, L>(
|
||||
&mut self,
|
||||
key: &Option<C::Key>,
|
||||
logic: &mut L,
|
||||
fuel: &mut usize,
|
||||
output: &mut PendingOutputHandle<'_, C, I>,
|
||||
) where
|
||||
I: IntoIterator,
|
||||
I::Item: Data,
|
||||
L: for<'a, 'b> FnMut(
|
||||
RefOrMut<'b, C::Key>,
|
||||
RefOrMut<'b, C::Val>,
|
||||
&'a C::Time,
|
||||
&'a C::R,
|
||||
) -> I
|
||||
+ 'static,
|
||||
{
|
||||
// Attempt to make progress on this batch.
|
||||
let mut work: usize = 0;
|
||||
let mut session = output.session(&self.capability);
|
||||
if let Some(key) = key {
|
||||
if self.cursor.get_key(&self.batch) != Some(key) {
|
||||
self.cursor.seek_key(&self.batch, key);
|
||||
}
|
||||
if self.cursor.get_key(&self.batch) == Some(key) {
|
||||
while let Some(val) = self.cursor.get_val(&self.batch) {
|
||||
self.cursor.map_times(&self.batch, |time, diff| {
|
||||
for datum in logic(RefOrMut::Ref(key), RefOrMut::Ref(val), time, diff) {
|
||||
session.give(datum);
|
||||
work += 1;
|
||||
}
|
||||
});
|
||||
self.cursor.step_val(&self.batch);
|
||||
if work >= *fuel {
|
||||
*fuel = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while let Some(key) = self.cursor.get_key(&self.batch) {
|
||||
while let Some(val) = self.cursor.get_val(&self.batch) {
|
||||
self.cursor.map_times(&self.batch, |time, diff| {
|
||||
for datum in logic(RefOrMut::Ref(key), RefOrMut::Ref(val), time, diff) {
|
||||
session.give(datum);
|
||||
work += 1;
|
||||
}
|
||||
});
|
||||
self.cursor.step_val(&self.batch);
|
||||
if work >= *fuel {
|
||||
*fuel = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
self.cursor.step_key(&self.batch);
|
||||
}
|
||||
}
|
||||
*fuel -= work;
|
||||
}
|
||||
}
|
||||
15
src/flow/src/compute/mod.rs
Normal file
15
src/flow/src/compute/mod.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
//! for generate dataflow from logical plan and computing the dataflow
|
||||
mod compute_state;
|
||||
mod context;
|
||||
mod plan;
|
||||
mod render;
|
||||
mod typedefs;
|
||||
mod types;
|
||||
|
||||
pub use context::Context;
|
||||
|
||||
// TODO(discord9): make a simplified version of source/sink
|
||||
// sink: simply get rows out of sinked collection/err collection and put it somewhere
|
||||
// (R, T, D) row of course with since/until frontier to limit
|
||||
|
||||
// source: simply insert stuff into it
|
||||
10
src/flow/src/compute/plan/join/delta_join.rs
Normal file
10
src/flow/src/compute/plan/join/delta_join.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A delta query is implemented by a set of paths, one for each input.
|
||||
///
|
||||
/// Each delta query path responds to its input changes by repeated lookups
|
||||
/// in arrangements for other join inputs. These lookups require specific
|
||||
/// instructions about which expressions to use as keys. Along the way,
|
||||
/// various closures are applied to filter and project as early as possible.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct DeltaJoinPlan {}
|
||||
9
src/flow/src/compute/plan/join/linear_join.rs
Normal file
9
src/flow/src/compute/plan/join/linear_join.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// TODO(discord9): impl Join
|
||||
/// A plan for the execution of a linear join.
|
||||
///
|
||||
/// A linear join is a sequence of stages, each of which introduces
|
||||
/// a new collection. Each stage is represented by a [LinearStagePlan].
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct LinearJoinPlan {}
|
||||
15
src/flow/src/compute/plan/join/mod.rs
Normal file
15
src/flow/src/compute/plan/join/mod.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
mod delta_join;
|
||||
mod linear_join;
|
||||
pub use delta_join::DeltaJoinPlan;
|
||||
pub use linear_join::LinearJoinPlan;
|
||||
|
||||
/// TODO(discord9)(discord9): impl Join
|
||||
/// A complete enumeration of possible join plans to render.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum JoinPlan {
|
||||
/// A join implemented by a linear join.
|
||||
Linear(LinearJoinPlan),
|
||||
/// A join implemented by a delta join.
|
||||
Delta(DeltaJoinPlan),
|
||||
}
|
||||
222
src/flow/src/compute/plan/mod.rs
Normal file
222
src/flow/src/compute/plan/mod.rs
Normal file
@@ -0,0 +1,222 @@
|
||||
mod join;
|
||||
mod reduce;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use join::JoinPlan;
|
||||
pub(crate) use reduce::{
|
||||
convert_indexes_to_skips, AccumulablePlan, BucketedPlan, KeyValPlan, ReducePlan,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::{Id, LocalId, MapFilterProject, ScalarExpr, TableFunc};
|
||||
use crate::repr::{self, Diff, Row};
|
||||
use crate::storage::errors::EvalError;
|
||||
|
||||
/// The forms in which an operator's output is available;
|
||||
/// it can be considered the plan-time equivalent of
|
||||
/// `render::context::CollectionBundle`.
|
||||
///
|
||||
/// These forms are either "raw", representing an unarranged collection,
|
||||
/// or "arranged", representing one that has been arranged by some key.
|
||||
///
|
||||
/// The raw collection, if it exists, may be consumed directly.
|
||||
///
|
||||
/// The arranged collections are slightly more complicated:
|
||||
/// Each key here is attached to a description of how the corresponding
|
||||
/// arrangement is permuted to remove value columns
|
||||
/// that are redundant with key columns. Thus, the first element in each
|
||||
/// tuple of `arranged` is the arrangement key; the second is the map of
|
||||
/// logical output columns to columns in the key or value of the deduplicated
|
||||
/// representation, and the third is a "thinning expression",
|
||||
/// or list of columns to include in the value
|
||||
/// when arranging.
|
||||
///
|
||||
/// For example, assume a 5-column collection is to be arranged by the key
|
||||
/// `[Column(2), Column(0) + Column(3), Column(1)]`.
|
||||
/// Then `Column(1)` and `Column(2)` in the value are redundant with the key, and
|
||||
/// only columns 0, 3, and 4 need to be stored separately.
|
||||
/// The thinning expression will then be `[0, 3, 4]`.
|
||||
///
|
||||
/// The permutation represents how to recover the
|
||||
/// original values (logically `[Column(0), Column(1), Column(2), Column(3), Column(4)]`)
|
||||
/// from the key and value of the arrangement, logically
|
||||
/// `[Column(2), Column(0) + Column(3), Column(1), Column(0), Column(3), Column(4)]`.
|
||||
/// Thus, the permutation in this case should be `{0: 3, 1: 2, 2: 0, 3: 4, 4: 5}`.
|
||||
///
|
||||
/// Note that this description, while true at the time of writing, is merely illustrative;
|
||||
/// users of this struct should not rely on the exact strategy used for generating
|
||||
/// the permutations. As long as clients apply the thinning expression
|
||||
/// when creating arrangements, and permute by the hashmap when reading them,
|
||||
/// the contract of the function where they are generated (`expr::permutation_for_arrangement`)
|
||||
/// ensures that the correct values will be read.
|
||||
#[derive(Default, Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
|
||||
pub struct AvailableCollections {
|
||||
/// Whether the collection exists in unarranged form.
|
||||
pub raw: bool,
|
||||
/// The set of arrangements of the collection, along with a
|
||||
/// column permutation mapping
|
||||
pub arranged: Vec<KeyWithColumnPermutation>,
|
||||
}
|
||||
|
||||
pub type KeyWithColumnPermutation = (Vec<ScalarExpr>, BTreeMap<usize, usize>, Vec<usize>);
|
||||
|
||||
impl AvailableCollections {
|
||||
/// Represent a collection that has no arrangements.
|
||||
pub fn new_raw() -> Self {
|
||||
Self {
|
||||
raw: true,
|
||||
arranged: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Represent a collection that is arranged in the
|
||||
/// specified ways.
|
||||
pub fn new_arranged(arranged: Vec<KeyWithColumnPermutation>) -> Self {
|
||||
assert!(
|
||||
!arranged.is_empty(),
|
||||
"Invariant violated: at least one collection must exist"
|
||||
);
|
||||
Self {
|
||||
raw: false,
|
||||
arranged,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rendering Plan
|
||||
///
|
||||
/// TODO(discord9): see if we ever need to support recursive plans
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Plan<T = repr::Timestamp> {
|
||||
/// A collection containing a pre-determined collection.
|
||||
Constant {
|
||||
rows: Result<Vec<(Row, T, Diff)>, EvalError>,
|
||||
},
|
||||
/// A reference to a bound collection.
|
||||
///
|
||||
/// This is commonly either an external reference to an existing source or
|
||||
/// maintained arrangement, or an internal reference to a `Let` identifier.
|
||||
Get {
|
||||
id: Id,
|
||||
keys: AvailableCollections,
|
||||
plan: GetPlan,
|
||||
},
|
||||
/// Binds `value` to `id`, and then results in `body` with that binding.
|
||||
///
|
||||
/// This stage has the effect of sharing `value` across multiple possible
|
||||
/// uses in `body`, and is the only mechanism we have for sharing collection
|
||||
/// information across parts of a dataflow.
|
||||
///
|
||||
/// The binding is not available outside of `body`.
|
||||
Let {
|
||||
/// The local identifier to be used, available to `body` as `Id::Local(id)`.
|
||||
id: LocalId,
|
||||
/// The collection that should be bound to `id`.
|
||||
value: Box<Plan<T>>,
|
||||
/// The collection that results, which is allowed to contain `Get` stages
|
||||
/// that reference `Id::Local(id)`.
|
||||
body: Box<Plan<T>>,
|
||||
},
|
||||
/// Map, Filter, and Project operators.
|
||||
///
|
||||
/// This stage contains work that we would ideally like to fuse to other plan
|
||||
/// stages, but for practical reasons cannot. For example: reduce, threshold,
|
||||
/// and topk stages are not able to absorb this operator.
|
||||
Mfp {
|
||||
/// The input collection.
|
||||
input: Box<Plan<T>>,
|
||||
/// Linear operator to apply to each record.
|
||||
mfp: MapFilterProject,
|
||||
/// Whether the input is from an arrangement, and if so,
|
||||
/// whether we can seek to a specific value therein
|
||||
input_key_val: Option<(Vec<ScalarExpr>, Option<Row>)>,
|
||||
},
|
||||
/// A variable number of output records for each input record.
|
||||
///
|
||||
/// This stage is a bit of a catch-all for logic that does not easily fit in
|
||||
/// map stages. This includes table valued functions, but also functions of
|
||||
/// multiple arguments, and functions that modify the sign of updates.
|
||||
///
|
||||
/// This stage allows a `MapFilterProject` operator to be fused to its output,
|
||||
/// and this can be very important as otherwise the output of `func` is just
|
||||
/// appended to the input record, for as many outputs as it has. This has the
|
||||
/// unpleasant default behavior of repeating potentially large records that
|
||||
/// are being unpacked, producing quadratic output in those cases. Instead,
|
||||
/// in these cases use a `mfp` member that projects away these large fields.
|
||||
FlatMap {
|
||||
/// The input collection.
|
||||
input: Box<Plan<T>>,
|
||||
/// The variable-record emitting function.
|
||||
func: TableFunc,
|
||||
/// Expressions that for each row prepare the arguments to `func`.
|
||||
exprs: Vec<ScalarExpr>,
|
||||
/// Linear operator to apply to each record produced by `func`.
|
||||
mfp: MapFilterProject,
|
||||
/// The particular arrangement of the input we expect to use,
|
||||
/// if any
|
||||
input_key: Option<Vec<ScalarExpr>>,
|
||||
},
|
||||
/// A multiway relational equijoin, with fused map, filter, and projection.
|
||||
///
|
||||
/// This stage performs a multiway join among `inputs`, using the equality
|
||||
/// constraints expressed in `plan`. The plan also describes the implementation
|
||||
/// strategy we will use, and any pushed down per-record work.
|
||||
Join {
|
||||
/// An ordered list of inputs that will be joined.
|
||||
inputs: Vec<Plan<T>>,
|
||||
/// Detailed information about the implementation of the join.
|
||||
///
|
||||
/// This includes information about the implementation strategy, but also
|
||||
/// any map, filter, project work that we might follow the join with, but
|
||||
/// potentially pushed down into the implementation of the join.
|
||||
plan: JoinPlan,
|
||||
},
|
||||
/// Aggregation by key.
|
||||
Reduce {
|
||||
/// The input collection.
|
||||
input: Box<Plan<T>>,
|
||||
/// A plan for changing input records into key, value pairs.
|
||||
key_val_plan: KeyValPlan,
|
||||
/// A plan for performing the reduce.
|
||||
///
|
||||
/// The implementation of reduction has several different strategies based
|
||||
/// on the properties of the reduction, and the input itself. Please check
|
||||
/// out the documentation for this type for more detail.
|
||||
plan: ReducePlan,
|
||||
/// The particular arrangement of the input we expect to use,
|
||||
/// if any
|
||||
input_key: Option<Vec<ScalarExpr>>,
|
||||
},
|
||||
}
|
||||
|
||||
/// TODO(discord9): impl GetPlan
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum GetPlan {
|
||||
/// Simply pass input arrangements on to the next stage.
|
||||
PassArrangements,
|
||||
/// Using the supplied key, optionally seek the row, and apply the MFP.
|
||||
Arrangement(Vec<ScalarExpr>, Option<Row>, MapFilterProject),
|
||||
/// Scan the input collection (unarranged) and apply the MFP.
|
||||
Collection(MapFilterProject),
|
||||
}
|
||||
|
||||
/// Returns bucket sizes, descending, suitable for hierarchical decomposition of an operator, based
|
||||
/// on the expected number of rows that will have the same group key.
|
||||
fn bucketing_of_expected_group_size(expected_group_size: Option<u64>) -> Vec<u64> {
|
||||
let mut buckets = vec![];
|
||||
let mut current = 16;
|
||||
|
||||
// Plan for 4B records in the expected case if the user didn't specify a group size.
|
||||
let limit = expected_group_size.unwrap_or(4_000_000_000);
|
||||
|
||||
// Distribute buckets in powers of 16, so that we can strike a balance between how many inputs
|
||||
// each layer gets from the preceding layer, while also limiting the number of layers.
|
||||
while current < limit {
|
||||
buckets.push(current);
|
||||
current = current.saturating_mul(16);
|
||||
}
|
||||
|
||||
buckets.reverse();
|
||||
buckets
|
||||
}
|
||||
233
src/flow/src/compute/plan/reduce.rs
Normal file
233
src/flow/src/compute/plan/reduce.rs
Normal file
@@ -0,0 +1,233 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::{AggregateExpr, AggregateFunc, MapFilterProject, SafeMfpPlan};
|
||||
|
||||
/// This enum represents the three potential types of aggregations.
|
||||
#[derive(Copy, Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
|
||||
pub enum ReductionType {
|
||||
/// Accumulable functions can be subtracted from (are invertible), and associative.
|
||||
/// We can compute these results by moving some data to the diff field under arbitrary
|
||||
/// changes to inputs. Examples include sum or count.
|
||||
Accumulable,
|
||||
/// Hierarchical functions are associative, which means we can split up the work of
|
||||
/// computing them across subsets. Note that hierarchical reductions should also
|
||||
/// reduce the data in some way, as otherwise rendering them hierarchically is not
|
||||
/// worth it. Examples include min or max.
|
||||
Hierarchical,
|
||||
/// Basic, for lack of a better word, are functions that are neither accumulable
|
||||
/// nor hierarchical. Examples include jsonb_agg.
|
||||
Basic,
|
||||
}
|
||||
|
||||
/// Plan for extracting keys and values in preparation for a reduction.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct KeyValPlan {
|
||||
/// Extracts the columns used as the key.
|
||||
pub key_plan: SafeMfpPlan,
|
||||
/// Extracts the columns used to feed the aggregations.
|
||||
pub val_plan: SafeMfpPlan,
|
||||
}
|
||||
|
||||
/// Transforms a vector containing indexes of needed columns into one containing
|
||||
/// the "skips" an iterator over a Row would need to perform to see those values.
|
||||
///
|
||||
/// This function requires that all of the elements in `indexes` are strictly
|
||||
/// increasing.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// assert_eq!(convert_indexes_to_skips(vec![3, 6, 10, 15]), [3, 2, 3, 4])
|
||||
/// ```
|
||||
pub fn convert_indexes_to_skips(mut indexes: Vec<usize>) -> Vec<usize> {
|
||||
for i in 1..indexes.len() {
|
||||
assert!(
|
||||
indexes[i - 1] < indexes[i],
|
||||
"convert_indexes_to_skip needs indexes to be strictly increasing. Received: {:?}",
|
||||
indexes,
|
||||
);
|
||||
}
|
||||
|
||||
for i in (1..indexes.len()).rev() {
|
||||
indexes[i] -= indexes[i - 1];
|
||||
indexes[i] -= 1;
|
||||
}
|
||||
|
||||
indexes
|
||||
}
|
||||
|
||||
/// A `ReducePlan` provides a concise description for how we will
|
||||
/// execute a given reduce expression.
|
||||
///
|
||||
/// The provided reduce expression can have no
|
||||
/// aggregations, in which case its just a `Distinct` and otherwise
|
||||
/// it's composed of a combination of accumulable, hierarchical and
|
||||
/// basic aggregations.
|
||||
///
|
||||
/// We want to try to centralize as much decision making about the
|
||||
/// shape / general computation of the rendered dataflow graph
|
||||
/// in this plan, and then make actually rendering the graph
|
||||
/// be as simple (and compiler verifiable) as possible.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum ReducePlan {
|
||||
/// Plan for not computing any aggregations, just determining the set of
|
||||
/// distinct keys.
|
||||
Distinct,
|
||||
/// Plan for computing only accumulable aggregations.
|
||||
Accumulable(AccumulablePlan),
|
||||
/// Plan for computing only hierarchical aggregations.
|
||||
Hierarchical(HierarchicalPlan),
|
||||
/// Plan for computing only basic aggregations.
|
||||
Basic(BasicPlan),
|
||||
/// Plan for computing a mix of different kinds of aggregations.
|
||||
/// We need to do extra work here to reassemble results back in the
|
||||
/// requested order.
|
||||
Collation(CollationPlan),
|
||||
}
|
||||
|
||||
/// Plan for computing a set of accumulable aggregations.
|
||||
///
|
||||
/// We fuse all of the accumulable aggregations together
|
||||
/// and compute them with one dataflow fragment. We need to
|
||||
/// be careful to separate out the aggregations that
|
||||
/// apply only to the distinct set of values. We need
|
||||
/// to apply a distinct operator to those before we
|
||||
/// combine them with everything else.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct AccumulablePlan {
|
||||
/// All of the aggregations we were asked to compute, stored
|
||||
/// in order.
|
||||
pub full_aggrs: Vec<AggregateExpr>,
|
||||
/// All of the non-distinct accumulable aggregates.
|
||||
/// Each element represents:
|
||||
/// (index of the aggregation among accumulable aggregations,
|
||||
/// index of the datum among inputs, aggregation expr)
|
||||
/// These will all be rendered together in one dataflow fragment.
|
||||
pub simple_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||
/// Same as above but for all of the `DISTINCT` accumulable aggregations.
|
||||
pub distinct_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||
}
|
||||
|
||||
// TODO(discord9): others
|
||||
|
||||
/// Plan for computing a set of hierarchical aggregations.
|
||||
///
|
||||
/// In the append-only setting we can render them in-place
|
||||
/// with monotonic plans, but otherwise, we need to render
|
||||
/// them with a reduction tree that splits the inputs into
|
||||
/// small, and then progressively larger, buckets
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum HierarchicalPlan {
|
||||
/// Plan hierarchical aggregations under monotonic inputs.
|
||||
Monotonic(MonotonicPlan),
|
||||
/// Plan for hierarchical aggregations under non-monotonic inputs.
|
||||
Bucketed(BucketedPlan),
|
||||
}
|
||||
|
||||
/// Plan for computing a set of hierarchical aggregations with a
|
||||
/// monotonic input.
|
||||
///
|
||||
/// Here, the aggregations will be rendered in place. We don't
|
||||
/// need to worry about retractions because the inputs are
|
||||
/// append only, so we can change our computation to
|
||||
/// only retain the "best" value in the diff field, instead
|
||||
/// of holding onto all values.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct MonotonicPlan {
|
||||
/// All of the aggregations we were asked to compute.
|
||||
pub aggr_funcs: Vec<AggregateFunc>,
|
||||
/// Set of "skips" or calls to `nth()` an iterator needs to do over
|
||||
/// the input to extract the relevant datums.
|
||||
pub skips: Vec<usize>,
|
||||
/// True if the input is logically but not physically monotonic,
|
||||
/// and the operator must first consolidate the inputs to remove
|
||||
/// potential negations.
|
||||
pub must_consolidate: bool,
|
||||
}
|
||||
|
||||
/// Plan for computing a set of hierarchical aggregations
|
||||
/// with non-monotonic inputs.
|
||||
///
|
||||
/// To perform hierarchical aggregations with stable runtimes
|
||||
/// under updates we'll subdivide the group key into buckets, compute
|
||||
/// the reduction in each of those subdivided buckets and then combine
|
||||
/// the results into a coarser bucket (one that represents a larger
|
||||
/// fraction of the original input) and redo the reduction in another
|
||||
/// layer. Effectively, we'll construct a min / max heap out of a series
|
||||
/// of reduce operators (each one is a separate layer).
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct BucketedPlan {
|
||||
/// All of the aggregations we were asked to compute.
|
||||
pub aggr_funcs: Vec<AggregateFunc>,
|
||||
/// Set of "skips" or calls to `nth()` an iterator needs to do over
|
||||
/// the input to extract the relevant datums.
|
||||
pub skips: Vec<usize>,
|
||||
/// The number of buckets in each layer of the reduction tree. Should
|
||||
/// be decreasing, and ideally, a power of two so that we can easily
|
||||
/// distribute values to buckets with `value.hashed() % buckets[layer]`.
|
||||
pub buckets: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum BasicPlan {}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct CollationPlan {}
|
||||
|
||||
/// Determines whether a function can be accumulated in an update's "difference" field,
|
||||
/// and whether it can be subjected to recursive (hierarchical) aggregation.
|
||||
///
|
||||
/// Accumulable aggregations will be packed into differential dataflow's "difference" field,
|
||||
/// which can be accumulated in-place using the addition operation on the type. Aggregations
|
||||
/// that indicate they are accumulable will still need to provide an action that takes their
|
||||
/// data and introduces it as a difference, and the post-processing when the accumulated value
|
||||
/// is presented as data.
|
||||
///
|
||||
/// Hierarchical aggregations will be subjected to repeated aggregation on initially small but
|
||||
/// increasingly large subsets of each key. This has the intended property that no invocation
|
||||
/// is on a significantly large set of values (and so, no incremental update needs to reform
|
||||
/// significant input data). Hierarchical aggregates can be rendered more efficiently if the
|
||||
/// input stream is append-only as then we only need to retain the "currently winning" value.
|
||||
/// Every hierarchical aggregate needs to supply a corresponding ReductionMonoid implementation.
|
||||
fn reduction_type(func: &AggregateFunc) -> ReductionType {
|
||||
match func {
|
||||
AggregateFunc::SumInt16
|
||||
| AggregateFunc::SumInt32
|
||||
| AggregateFunc::SumInt64
|
||||
| AggregateFunc::SumUInt16
|
||||
| AggregateFunc::SumUInt32
|
||||
| AggregateFunc::SumUInt64
|
||||
| AggregateFunc::SumFloat32
|
||||
| AggregateFunc::SumFloat64
|
||||
| AggregateFunc::Count
|
||||
| AggregateFunc::Any
|
||||
| AggregateFunc::All => ReductionType::Accumulable,
|
||||
AggregateFunc::MaxInt16
|
||||
| AggregateFunc::MaxInt32
|
||||
| AggregateFunc::MaxInt64
|
||||
| AggregateFunc::MaxUInt16
|
||||
| AggregateFunc::MaxUInt32
|
||||
| AggregateFunc::MaxUInt64
|
||||
| AggregateFunc::MaxFloat32
|
||||
| AggregateFunc::MaxFloat64
|
||||
| AggregateFunc::MaxBool
|
||||
| AggregateFunc::MaxString
|
||||
| AggregateFunc::MaxDate
|
||||
| AggregateFunc::MaxTimestamp
|
||||
| AggregateFunc::MaxTimestampTz
|
||||
| AggregateFunc::MinInt16
|
||||
| AggregateFunc::MinInt32
|
||||
| AggregateFunc::MinInt64
|
||||
| AggregateFunc::MinUInt16
|
||||
| AggregateFunc::MinUInt32
|
||||
| AggregateFunc::MinUInt64
|
||||
| AggregateFunc::MinFloat32
|
||||
| AggregateFunc::MinFloat64
|
||||
| AggregateFunc::MinBool
|
||||
| AggregateFunc::MinString
|
||||
| AggregateFunc::MinDate
|
||||
| AggregateFunc::MinTimestamp
|
||||
| AggregateFunc::MinTimestampTz => ReductionType::Hierarchical,
|
||||
_ => ReductionType::Basic,
|
||||
}
|
||||
}
|
||||
60
src/flow/src/compute/render/error.rs
Normal file
60
src/flow/src/compute/render/error.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use differential_dataflow::ExchangeData;
|
||||
|
||||
use crate::repr::Row;
|
||||
|
||||
/// Used to make possibly-validating code generic: think of this as a kind of `MaybeResult`,
|
||||
/// specialized for use in compute. Validation code will only run when the error constructor is
|
||||
/// Some.
|
||||
pub(super) trait MaybeValidatingRow<T, E>: ExchangeData + Hash {
|
||||
fn ok(t: T) -> Self;
|
||||
fn into_error() -> Option<fn(E) -> Self>;
|
||||
}
|
||||
|
||||
impl<E> MaybeValidatingRow<Row, E> for Row {
|
||||
fn ok(t: Row) -> Self {
|
||||
t
|
||||
}
|
||||
|
||||
fn into_error() -> Option<fn(E) -> Self> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> MaybeValidatingRow<(), E> for () {
|
||||
fn ok(t: ()) -> Self {
|
||||
t
|
||||
}
|
||||
|
||||
fn into_error() -> Option<fn(E) -> Self> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<E, R> MaybeValidatingRow<Vec<R>, E> for Vec<R>
|
||||
where
|
||||
R: ExchangeData + Hash,
|
||||
{
|
||||
fn ok(t: Vec<R>) -> Self {
|
||||
t
|
||||
}
|
||||
|
||||
fn into_error() -> Option<fn(E) -> Self> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> MaybeValidatingRow<T, E> for Result<T, E>
|
||||
where
|
||||
T: ExchangeData + Hash,
|
||||
E: ExchangeData + Hash,
|
||||
{
|
||||
fn ok(row: T) -> Self {
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
fn into_error() -> Option<fn(E) -> Self> {
|
||||
Some(Err)
|
||||
}
|
||||
}
|
||||
626
src/flow/src/compute/render/mod.rs
Normal file
626
src/flow/src/compute/render/mod.rs
Normal file
@@ -0,0 +1,626 @@
|
||||
//! for building the flow graph from PLAN
|
||||
//! this is basically the last step before actually running the flow graph
|
||||
|
||||
use differential_dataflow::lattice::Lattice;
|
||||
use differential_dataflow::AsCollection;
|
||||
use timely::communication::Allocate;
|
||||
use timely::dataflow::operators::capture::Extract;
|
||||
use timely::dataflow::operators::{Capture, ToStream};
|
||||
use timely::dataflow::Scope;
|
||||
use timely::progress::timestamp::Refines;
|
||||
use timely::progress::Timestamp;
|
||||
use timely::worker::Worker as TimelyWorker;
|
||||
|
||||
use super::types::DataflowDescription;
|
||||
use crate::compute::compute_state::ComputeState;
|
||||
use crate::compute::context::CollectionBundle;
|
||||
use crate::compute::plan::Plan;
|
||||
use crate::compute::types::BuildDesc;
|
||||
use crate::compute::Context;
|
||||
use crate::expr::Id;
|
||||
use crate::repr::{self, Row};
|
||||
use crate::storage::errors::DataflowError;
|
||||
|
||||
mod error;
|
||||
mod reduce;
|
||||
|
||||
/// Assemble the "compute" side of a dataflow, i.e. all but the sources.
|
||||
///
|
||||
/// This method imports sources from provided assets, and then builds the remaining
|
||||
/// dataflow using "compute-local" assets like shared arrangements, and producing
|
||||
/// both arrangements and sinks.
|
||||
pub fn build_compute_dataflow<A: Allocate>(
|
||||
timely_worker: &mut TimelyWorker<A>,
|
||||
compute_state: &mut ComputeState,
|
||||
dataflow: DataflowDescription<Plan, ()>,
|
||||
) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub trait RenderTimestamp: Timestamp + Lattice + Refines<repr::Timestamp> {
|
||||
/// The system timestamp component of the timestamp.
|
||||
///
|
||||
/// This is useful for manipulating the system time, as when delaying
|
||||
/// updates for subsequent cancellation, as with monotonic reduction.
|
||||
fn system_time(&mut self) -> &mut repr::Timestamp;
|
||||
/// Effects a system delay in terms of the timestamp summary.
|
||||
fn system_delay(delay: repr::Timestamp) -> <Self as Timestamp>::Summary;
|
||||
/// The event timestamp component of the timestamp.
|
||||
fn event_time(&mut self) -> &mut repr::Timestamp;
|
||||
/// Effects an event delay in terms of the timestamp summary.
|
||||
fn event_delay(delay: repr::Timestamp) -> <Self as Timestamp>::Summary;
|
||||
/// Steps the timestamp back so that logical compaction to the output will
|
||||
/// not conflate `self` with any historical times.
|
||||
fn step_back(&self) -> Self;
|
||||
}
|
||||
|
||||
impl RenderTimestamp for repr::Timestamp {
|
||||
fn system_time(&mut self) -> &mut repr::Timestamp {
|
||||
self
|
||||
}
|
||||
fn system_delay(delay: repr::Timestamp) -> <Self as Timestamp>::Summary {
|
||||
delay
|
||||
}
|
||||
fn event_time(&mut self) -> &mut repr::Timestamp {
|
||||
self
|
||||
}
|
||||
fn event_delay(delay: repr::Timestamp) -> <Self as Timestamp>::Summary {
|
||||
delay
|
||||
}
|
||||
fn step_back(&self) -> Self {
|
||||
self.saturating_sub(1)
|
||||
}
|
||||
}
|
||||
|
||||
// This implementation block allows child timestamps to vary from parent timestamps.
|
||||
impl<G> Context<G, Row>
|
||||
where
|
||||
G: Scope,
|
||||
G::Timestamp: RenderTimestamp,
|
||||
{
|
||||
/// render plan and insert into context with given GlobalId
|
||||
pub(crate) fn build_object(&mut self, object: BuildDesc<Plan>) {
|
||||
// First, transform the relation expression into a render plan.
|
||||
let bundle = self.render_plan(object.plan);
|
||||
self.insert_id(Id::Global(object.id), bundle);
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Context<S, Row>
|
||||
where
|
||||
S: Scope,
|
||||
S::Timestamp: RenderTimestamp,
|
||||
{
|
||||
/// Renders a plan to a differential dataflow, producing the collection of results.
|
||||
///
|
||||
/// The return type reflects the uncertainty about the data representation, perhaps
|
||||
/// as a stream of data, perhaps as an arrangement, perhaps as a stream of batches.
|
||||
pub fn render_plan(&mut self, plan: Plan) -> CollectionBundle<S, Row> {
|
||||
match plan {
|
||||
Plan::Constant { rows } => {
|
||||
let (rows, errs) = match rows {
|
||||
Ok(rows) => (rows, Vec::new()),
|
||||
Err(err) => (Vec::new(), vec![err]),
|
||||
};
|
||||
let since_frontier = self.since_frontier.clone();
|
||||
let until = self.until_frontier.clone();
|
||||
let ok_collection = rows
|
||||
.into_iter()
|
||||
.filter_map(move |(row, mut time, diff)| {
|
||||
time.advance_by(since_frontier.borrow());
|
||||
if !until.less_equal(&time) {
|
||||
Some((
|
||||
row,
|
||||
<S::Timestamp as Refines<repr::Timestamp>>::to_inner(time),
|
||||
diff,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.to_stream(&mut self.scope)
|
||||
.as_collection();
|
||||
let mut error_time: repr::Timestamp = Timestamp::minimum();
|
||||
error_time.advance_by(self.since_frontier.borrow());
|
||||
let err_collection = errs
|
||||
.into_iter()
|
||||
.map(move |e| {
|
||||
(
|
||||
DataflowError::from(e),
|
||||
<S::Timestamp as Refines<repr::Timestamp>>::to_inner(error_time),
|
||||
1,
|
||||
)
|
||||
})
|
||||
.to_stream(&mut self.scope)
|
||||
.as_collection();
|
||||
CollectionBundle::from_collections(ok_collection, err_collection)
|
||||
}
|
||||
Plan::Get { id, keys, plan } => {
|
||||
// Recover the collection from `self` and then apply `mfp` to it.
|
||||
// If `mfp` happens to be trivial, we can just return the collection.
|
||||
let mut collection = self
|
||||
.lookup_id(id)
|
||||
.unwrap_or_else(|| panic!("Get({:?}) not found at render time", id));
|
||||
match plan {
|
||||
crate::compute::plan::GetPlan::PassArrangements => {
|
||||
// Assert that each of `keys` are present in `collection`.
|
||||
if !keys
|
||||
.arranged
|
||||
.iter()
|
||||
.all(|(key, _, _)| collection.arranged.contains_key(key))
|
||||
{
|
||||
let not_included: Vec<_> = keys
|
||||
.arranged
|
||||
.iter()
|
||||
.filter(|(key, _, _)| !collection.arranged.contains_key(key))
|
||||
.map(|(key, _, _)| key)
|
||||
.collect();
|
||||
panic!(
|
||||
"Those keys {:?} is not included in collections keys:{:?}",
|
||||
not_included,
|
||||
collection.arranged.keys().cloned().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
assert!(keys.raw <= collection.collection.is_some());
|
||||
// Retain only those keys we want to import.
|
||||
collection.arranged.retain(|key, _val| {
|
||||
keys.arranged.iter().any(|(key2, _, _)| key2 == key)
|
||||
});
|
||||
collection
|
||||
}
|
||||
crate::compute::plan::GetPlan::Arrangement(key, row, mfp) => {
|
||||
let (oks, errs) = collection.as_collection_core(
|
||||
mfp,
|
||||
Some((key, row)),
|
||||
self.until_frontier.clone(),
|
||||
);
|
||||
CollectionBundle::from_collections(oks, errs)
|
||||
}
|
||||
crate::compute::plan::GetPlan::Collection(mfp) => {
|
||||
let (oks, errs) =
|
||||
collection.as_collection_core(mfp, None, self.until_frontier.clone());
|
||||
CollectionBundle::from_collections(oks, errs)
|
||||
}
|
||||
}
|
||||
}
|
||||
Plan::Let { id, value, body } => {
|
||||
// Render `value` and bind it to `id`. Complain if this shadows an id.
|
||||
let value = self.render_plan(*value);
|
||||
let prebound = self.insert_id(Id::Local(id), value);
|
||||
assert!(prebound.is_none());
|
||||
|
||||
let body = self.render_plan(*body);
|
||||
self.remove_id(Id::Local(id));
|
||||
body
|
||||
}
|
||||
Plan::Mfp {
|
||||
input,
|
||||
mfp,
|
||||
input_key_val,
|
||||
} => {
|
||||
let input = self.render_plan(*input);
|
||||
// If `mfp` is non-trivial, we should apply it and produce a collection.
|
||||
if mfp.is_identity() {
|
||||
input
|
||||
} else {
|
||||
let (oks, errs) =
|
||||
input.as_collection_core(mfp, input_key_val, self.until_frontier.clone());
|
||||
CollectionBundle::from_collections(oks, errs)
|
||||
}
|
||||
}
|
||||
Plan::Reduce {
|
||||
input,
|
||||
key_val_plan,
|
||||
plan,
|
||||
input_key,
|
||||
} => {
|
||||
let input = self.render_plan(*input);
|
||||
self.render_reduce(input, key_val_plan, plan, input_key)
|
||||
}
|
||||
_ => todo!("To be implemented"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::any::Any;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::rc::Rc;
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use differential_dataflow::input::{Input, InputSession};
|
||||
use differential_dataflow::Collection;
|
||||
use timely::dataflow::scopes::Child;
|
||||
use timely::dataflow::Stream;
|
||||
use timely::Config;
|
||||
|
||||
use super::*;
|
||||
use crate::compute::plan::{
|
||||
AccumulablePlan, AvailableCollections, GetPlan, KeyValPlan, ReducePlan,
|
||||
};
|
||||
use crate::expr::{
|
||||
AggregateExpr, BinaryFunc, GlobalId, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
|
||||
UnaryFunc,
|
||||
};
|
||||
use crate::repr::Diff;
|
||||
type OkStream<G> = Stream<G, (Row, repr::Timestamp, Diff)>;
|
||||
type ErrStream<G> = Stream<G, (DataflowError, repr::Timestamp, Diff)>;
|
||||
type OkCollection<G> = Collection<G, Row, Diff>;
|
||||
type ErrCollection<G> = Collection<G, DataflowError, Diff>;
|
||||
/// used as a token to prevent certain resources from being dropped
|
||||
type AnyToken = Rc<dyn Any>;
|
||||
struct MockSourceToken {
|
||||
handle: InputSession<repr::Timestamp, Row, Diff>,
|
||||
err_handle: InputSession<repr::Timestamp, DataflowError, Diff>,
|
||||
}
|
||||
|
||||
fn mock_input_session(input: &mut InputSession<repr::Timestamp, Row, Diff>, cnt: i64) {
|
||||
// TODO: mock a cpu usage monotonic input with timestamp
|
||||
// cpu, mem, ts
|
||||
// f32, f32, DateTime
|
||||
let schema = [
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
];
|
||||
let arrs = (0..cnt).map(|i| (i as f32 / cnt as f32, i as f32 / cnt as f32, i));
|
||||
// need more mechanism to make timestamp also timestamp here
|
||||
for (cpu, mem, ts) in arrs {
|
||||
input.update(
|
||||
Row::pack(vec![cpu.into(), mem.into(), Value::DateTime(ts.into())]),
|
||||
1,
|
||||
);
|
||||
input.advance_to(ts as u64)
|
||||
}
|
||||
input.flush();
|
||||
}
|
||||
|
||||
// a simple test to see if the dataflow can be built and run
|
||||
fn exec_dataflow(
|
||||
input_id: Vec<Id>,
|
||||
dataflow: DataflowDescription<Plan>,
|
||||
sink_ids: Vec<GlobalId>,
|
||||
output_keys: Vec<Option<Vec<ScalarExpr>>>,
|
||||
input_mock_length: i64,
|
||||
) {
|
||||
timely::execute(Config::thread(), move |worker| {
|
||||
println!("worker: {:?}", worker.index());
|
||||
let mut input = InputSession::<repr::Timestamp, Row, Diff>::new();
|
||||
worker.dataflow_named(
|
||||
"ProofOfConcept",
|
||||
|scope: &mut Child<'_, _, repr::Timestamp>| {
|
||||
let mut test_ctx =
|
||||
Context::<_, Row, _>::for_dataflow_in(&dataflow, scope.clone());
|
||||
|
||||
let ok_collection = input.to_collection(scope);
|
||||
let (err_handle, err_collection) = scope.new_collection();
|
||||
let input_collection =
|
||||
CollectionBundle::<_, _, repr::Timestamp>::from_collections(
|
||||
ok_collection,
|
||||
err_collection,
|
||||
);
|
||||
|
||||
// TODO: generate `import_sources` from `dataflow.source_imports`
|
||||
let import_sources: Vec<_> = input_id
|
||||
.clone()
|
||||
.into_iter()
|
||||
.zip(vec![input_collection])
|
||||
.collect();
|
||||
|
||||
// import sources
|
||||
for (id, collection) in import_sources {
|
||||
test_ctx.insert_id(id, collection);
|
||||
}
|
||||
|
||||
for build_desc in &dataflow.objects_to_build {
|
||||
test_ctx.build_object(build_desc.clone());
|
||||
}
|
||||
|
||||
dbg!(test_ctx.bindings.keys());
|
||||
|
||||
// TODO: export sinks
|
||||
|
||||
for (sink, output_key) in sink_ids.iter().zip(output_keys.iter()) {
|
||||
let sink = *sink;
|
||||
println!("Inspecting sink {:?}", sink.clone());
|
||||
let inspect = test_ctx.lookup_id(Id::Global(sink)).unwrap();
|
||||
dbg!(inspect.collection.is_some());
|
||||
dbg!(inspect.arranged.keys());
|
||||
let inspect = inspect.as_specific_collection(output_key.as_deref());
|
||||
inspect
|
||||
.0
|
||||
.inspect(move |x| println!("inspect {:?} {:?}", sink.clone(), x));
|
||||
}
|
||||
},
|
||||
);
|
||||
mock_input_session(&mut input, input_mock_length);
|
||||
})
|
||||
.expect("Computation terminated abnormally");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_poc_reduce_group_by() {
|
||||
// 1. build dataflow with input collection connected
|
||||
// 2. give input
|
||||
// type annotation is needed to prevent rust-analyzer to give up type deduction
|
||||
|
||||
// simple give dataflow information
|
||||
// will be build by given dataflow information from other nodes later
|
||||
// key is the third column
|
||||
let place_holder =
|
||||
ScalarExpr::Literal(Ok(Value::Boolean(true)), ConcreteDataType::int64_datatype());
|
||||
|
||||
let count_col = |i: usize| AggregateExpr {
|
||||
func: crate::expr::AggregateFunc::Count,
|
||||
expr: ScalarExpr::Column(i),
|
||||
distinct: false,
|
||||
};
|
||||
let sum_col = |i: usize| AggregateExpr {
|
||||
func: crate::expr::AggregateFunc::SumFloat32,
|
||||
expr: ScalarExpr::Column(i),
|
||||
distinct: false,
|
||||
};
|
||||
// equal to `SELECT minute, SUM(cpu) FROM input GROUP BY ts/300 as minute;
|
||||
// cpu, mem, ts
|
||||
// --map--> cpu, mem, ts/300
|
||||
// --reduce--> ts/300, AVG(cpu), AVG(mem)
|
||||
let cast_datetime = ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::CastDatetimeToInt64,
|
||||
expr: Box::new(ScalarExpr::Column(2)),
|
||||
};
|
||||
let ts_div_5 = ScalarExpr::CallBinary {
|
||||
func: BinaryFunc::DivInt64,
|
||||
expr1: Box::new(cast_datetime),
|
||||
expr2: Box::new(ScalarExpr::Literal(
|
||||
Ok(Value::Int64(5.into())),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
)),
|
||||
};
|
||||
let cast_int64_to_float32 = |i: usize| ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::CastInt64ToFloat32,
|
||||
expr: Box::new(ScalarExpr::Column(i)),
|
||||
};
|
||||
let reduce_group_by_window = vec![
|
||||
// cpu, mem, ts
|
||||
// --reduce--> ts/300, SUM(cpu), SUM(mem), COUNT(cpu), COUNT(mem)
|
||||
// -- map --> ts/300, AVG(cpu), AVG(mem)
|
||||
BuildDesc {
|
||||
id: GlobalId::User(0),
|
||||
plan: Plan::Reduce {
|
||||
input: Box::new(Plan::Get {
|
||||
id: Id::Global(GlobalId::System(0)),
|
||||
keys: AvailableCollections::new_raw(),
|
||||
plan: GetPlan::Collection(
|
||||
MapFilterProject::new(3).map([ts_div_5]).project([0, 1, 3]),
|
||||
),
|
||||
}),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3).project([2]),
|
||||
},
|
||||
val_plan: SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3).project([0, 1]),
|
||||
},
|
||||
},
|
||||
// --reduce--> ts/300(key), SUM(cpu), SUM(mem), COUNT(cpu), COUNT(mem)
|
||||
plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: vec![sum_col(0), sum_col(1), count_col(0), count_col(1)],
|
||||
simple_aggrs: vec![
|
||||
(0, 0, sum_col(0)),
|
||||
(1, 1, sum_col(1)),
|
||||
(2, 0, count_col(0)),
|
||||
(3, 1, count_col(1)),
|
||||
],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
input_key: None,
|
||||
},
|
||||
},
|
||||
// 0 1 2 3 4
|
||||
// ts/300(key), SUM(cpu), SUM(mem), COUNT(cpu), COUNT(mem),
|
||||
// -- map --> AVG(cpu), AVG(mem), ts/300
|
||||
BuildDesc {
|
||||
id: GlobalId::User(1),
|
||||
plan: Plan::Get {
|
||||
id: Id::Global(GlobalId::User(0)),
|
||||
// not used since plan is GetPlan::Arrangement
|
||||
keys: AvailableCollections::new_raw(),
|
||||
plan: GetPlan::Arrangement(
|
||||
vec![ScalarExpr::Column(0)],
|
||||
None,
|
||||
MapFilterProject::new(5)
|
||||
.map([
|
||||
ScalarExpr::CallBinary {
|
||||
func: BinaryFunc::DivFloat32,
|
||||
expr1: Box::new(ScalarExpr::Column(1)),
|
||||
expr2: Box::new(cast_int64_to_float32(3)),
|
||||
},
|
||||
ScalarExpr::CallBinary {
|
||||
func: BinaryFunc::DivFloat32,
|
||||
expr1: Box::new(ScalarExpr::Column(2)),
|
||||
expr2: Box::new(cast_int64_to_float32(4)),
|
||||
},
|
||||
])
|
||||
.project([0, 5, 6]),
|
||||
),
|
||||
},
|
||||
},
|
||||
];
|
||||
let input_id = vec![Id::Global(GlobalId::System(0))];
|
||||
let dataflow = {
|
||||
let mut dataflow = DataflowDescription::<Plan, ()>::new("test".to_string());
|
||||
dataflow.objects_to_build = reduce_group_by_window;
|
||||
dataflow
|
||||
};
|
||||
let sink_ids = [GlobalId::User(0), GlobalId::User(1)];
|
||||
exec_dataflow(
|
||||
input_id.clone(),
|
||||
dataflow.clone(),
|
||||
sink_ids.to_vec(),
|
||||
vec![Some(vec![ScalarExpr::Column(0)]), None],
|
||||
10,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_poc_reduce_count() {
|
||||
// 1. build dataflow with input collection connected
|
||||
// 2. give input
|
||||
// type annotation is needed to prevent rust-analyzer to give up type deduction
|
||||
|
||||
// simple give dataflow information
|
||||
// will be build by given dataflow information from other nodes later
|
||||
// key is the third column
|
||||
let place_holder =
|
||||
ScalarExpr::Literal(Ok(Value::Boolean(true)), ConcreteDataType::int64_datatype());
|
||||
let key_plan = SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3)
|
||||
.map([place_holder.clone()])
|
||||
.project([3]),
|
||||
};
|
||||
let val_plan = SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3).project([0, 1, 2]),
|
||||
};
|
||||
let count = AggregateExpr {
|
||||
func: crate::expr::AggregateFunc::Count,
|
||||
expr: place_holder,
|
||||
distinct: false,
|
||||
};
|
||||
// equal to `SELECT COUNT(*) FROM input;`
|
||||
let reduce_group_by_window = vec![
|
||||
// count(true)
|
||||
BuildDesc {
|
||||
id: GlobalId::User(0),
|
||||
plan: Plan::Reduce {
|
||||
input: Box::new(Plan::Get {
|
||||
id: Id::Global(GlobalId::System(0)),
|
||||
keys: AvailableCollections::new_raw(),
|
||||
plan: GetPlan::Collection(MapFilterProject::new(3)),
|
||||
}),
|
||||
key_val_plan: KeyValPlan { key_plan, val_plan },
|
||||
plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: vec![count.clone()],
|
||||
simple_aggrs: vec![(0, 0, count)],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
input_key: None,
|
||||
},
|
||||
},
|
||||
// get second column
|
||||
BuildDesc {
|
||||
id: GlobalId::User(1),
|
||||
plan: Plan::Get {
|
||||
id: Id::Global(GlobalId::User(0)),
|
||||
// not used since plan is GetPlan::Arrangement
|
||||
keys: AvailableCollections::new_raw(),
|
||||
plan: GetPlan::Arrangement(
|
||||
vec![ScalarExpr::Column(0)],
|
||||
None,
|
||||
MapFilterProject::new(2).project([1]),
|
||||
),
|
||||
},
|
||||
},
|
||||
];
|
||||
let input_id = vec![Id::Global(GlobalId::System(0))];
|
||||
let dataflow = {
|
||||
let mut dataflow = DataflowDescription::<Plan, ()>::new("test".to_string());
|
||||
dataflow.objects_to_build = reduce_group_by_window;
|
||||
dataflow
|
||||
};
|
||||
let sink_ids = [GlobalId::User(1)];
|
||||
exec_dataflow(
|
||||
input_id.clone(),
|
||||
dataflow.clone(),
|
||||
sink_ids.to_vec(),
|
||||
vec![None],
|
||||
10,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_poc_reduce_distinct() {
|
||||
// 1. build dataflow with input collection connected
|
||||
// 2. give input
|
||||
// type annotation is needed to prevent rust-analyzer to give up type deduction
|
||||
|
||||
// simple give dataflow information
|
||||
// will be build by given dataflow information from other nodes later
|
||||
// window need date_trunc which is still WIP
|
||||
// key is the third column
|
||||
let key_plan = SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3).project([2]),
|
||||
};
|
||||
let val_plan = SafeMfpPlan {
|
||||
mfp: MapFilterProject::new(3).project([0, 1]),
|
||||
};
|
||||
// equal to `SELECT ts, COUNT(*) FROM input GROUP BY ts;`
|
||||
let reduce_plan = vec![BuildDesc {
|
||||
id: GlobalId::User(0),
|
||||
plan: Plan::Reduce {
|
||||
input: Box::new(Plan::Get {
|
||||
id: Id::Global(GlobalId::System(0)),
|
||||
keys: AvailableCollections::new_raw(),
|
||||
plan: GetPlan::Collection(MapFilterProject::new(3)),
|
||||
}),
|
||||
key_val_plan: KeyValPlan { key_plan, val_plan },
|
||||
plan: ReducePlan::Distinct,
|
||||
input_key: None,
|
||||
},
|
||||
}];
|
||||
let input_id = vec![Id::Global(GlobalId::System(0))];
|
||||
let dataflow = {
|
||||
let mut dataflow = DataflowDescription::<Plan, ()>::new("test".to_string());
|
||||
dataflow.objects_to_build = reduce_plan;
|
||||
dataflow
|
||||
};
|
||||
let sink_ids = [GlobalId::User(0)];
|
||||
exec_dataflow(
|
||||
input_id.clone(),
|
||||
dataflow.clone(),
|
||||
sink_ids.to_vec(),
|
||||
vec![Some(vec![ScalarExpr::Column(0)])],
|
||||
10,
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
#[allow(clippy::print_stdout)]
|
||||
fn test_constant_plan_render() {
|
||||
let build_descs = vec![BuildDesc {
|
||||
id: GlobalId::User(0),
|
||||
plan: Plan::Constant {
|
||||
rows: Ok(vec![(Row::default(), 0, 1)]),
|
||||
},
|
||||
}];
|
||||
let dataflow = DataflowDescription::<Plan, ()>::new("test".to_string());
|
||||
|
||||
timely::execute_from_args(std::iter::empty::<String>(), move |worker| {
|
||||
println!("worker: {:?}", worker.index());
|
||||
let mut input = InputSession::<repr::Timestamp, Row, Diff>::new();
|
||||
worker.dataflow(|scope: &mut Child<'_, _, repr::Timestamp>| {
|
||||
let mut test_ctx = Context::<_, Row, _>::for_dataflow_in(&dataflow, scope.clone());
|
||||
for build_desc in &build_descs {
|
||||
test_ctx.build_object(build_desc.clone());
|
||||
}
|
||||
let input_collection = input.to_collection(scope);
|
||||
let err_collection = InputSession::new().to_collection(scope);
|
||||
let input_collection =
|
||||
CollectionBundle::from_collections(input_collection, err_collection);
|
||||
|
||||
// insert collection
|
||||
test_ctx.insert_id(Id::Local(LocalId(0)), input_collection);
|
||||
|
||||
let inspect = test_ctx
|
||||
.lookup_id(Id::Global(GlobalId::User(0)))
|
||||
.unwrap()
|
||||
.as_specific_collection(None);
|
||||
inspect.0.inspect(|x| println!("inspect {:?}", x));
|
||||
});
|
||||
// input.insert(Row::default());
|
||||
input.update(Row::default(), 1);
|
||||
input.advance_to(1);
|
||||
})
|
||||
.expect("Computation terminated abnormally");
|
||||
}
|
||||
}
|
||||
1001
src/flow/src/compute/render/reduce.rs
Normal file
1001
src/flow/src/compute/render/reduce.rs
Normal file
File diff suppressed because it is too large
Load Diff
20
src/flow/src/compute/typedefs.rs
Normal file
20
src/flow/src/compute/typedefs.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use differential_dataflow::operators::arrange::TraceAgent;
|
||||
use differential_dataflow::trace::implementations::ord::{OrdKeySpine, OrdValSpine};
|
||||
|
||||
use crate::repr::{Diff, Row, Timestamp};
|
||||
use crate::storage::errors::DataflowError;
|
||||
|
||||
// TODO(discord9): consider use ColValSpine for columnation storage
|
||||
|
||||
/// T: Time, R: Diff, O: Offset
|
||||
pub type RowSpine<K, V, T, R, O = usize> = OrdValSpine<K, V, T, R, O>;
|
||||
/// T: Time, R: Diff, O: Offset
|
||||
pub type RowKeySpine<K, T, R, O = usize> = OrdKeySpine<K, T, R, O>;
|
||||
/// T: Time, R: Diff, O: Offset
|
||||
pub type ErrSpine<K, T, R, O = usize> = OrdKeySpine<K, T, R, O>;
|
||||
/// T: Time, R: Diff, O: Offset
|
||||
pub type ErrValSpine<K, T, R, O = usize> = OrdValSpine<K, DataflowError, T, R, O>;
|
||||
pub type TraceRowHandle<K, V, T, R> = TraceAgent<RowSpine<K, V, T, R>>;
|
||||
pub type TraceErrHandle<K, T, R> = TraceAgent<ErrSpine<K, T, R>>;
|
||||
pub type KeysValsHandle = TraceRowHandle<Row, Row, Timestamp, Diff>;
|
||||
pub type ErrsHandle = TraceErrHandle<DataflowError, Timestamp, Diff>;
|
||||
75
src/flow/src/compute/types/dataflow.rs
Normal file
75
src/flow/src/compute/types/dataflow.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use timely::progress::Antichain;
|
||||
|
||||
use crate::compute::plan::Plan;
|
||||
use crate::compute::types::sinks::ComputeSinkDesc;
|
||||
use crate::compute::types::sources::SourceInstanceDesc;
|
||||
use crate::expr::{GlobalId, ScalarExpr};
|
||||
use crate::repr::{self, RelationType};
|
||||
|
||||
/// A description of a dataflow to construct and results to surface.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct DataflowDescription<P, S: 'static = (), T = repr::Timestamp> {
|
||||
/// Sources instantiations made available to the dataflow pair with monotonicity information.
|
||||
pub source_imports: BTreeMap<GlobalId, (SourceInstanceDesc<S>, bool)>,
|
||||
/// Indexes made available to the dataflow.
|
||||
/// (id of new index, description of index, relationtype of base source/view, monotonic)
|
||||
pub index_imports: BTreeMap<GlobalId, (IndexDesc, RelationType, bool)>,
|
||||
/// Views and indexes to be built and stored in the local context.
|
||||
/// Objects must be built in the specific order, as there may be
|
||||
/// dependencies of later objects on prior identifiers.
|
||||
pub objects_to_build: Vec<BuildDesc<P>>,
|
||||
/// Indexes to be made available to be shared with other dataflows
|
||||
/// (id of new index, description of index, relationtype of base source/view)
|
||||
pub index_exports: BTreeMap<GlobalId, (IndexDesc, RelationType)>,
|
||||
/// sinks to be created
|
||||
/// (id of new sink, description of sink)
|
||||
pub sink_exports: BTreeMap<GlobalId, ComputeSinkDesc<S, T>>,
|
||||
/// An optional frontier to which inputs should be advanced.
|
||||
///
|
||||
/// If this is set, it should override the default setting determined by
|
||||
/// the upper bound of `since` frontiers contributing to the dataflow.
|
||||
/// It is an error for this to be set to a frontier not beyond that default.
|
||||
pub as_of: Option<Antichain<T>>,
|
||||
/// Frontier beyond which the dataflow should not execute.
|
||||
/// Specifically, updates at times greater or equal to this frontier are suppressed.
|
||||
/// This is often set to `as_of + 1` to enable "batch" computations.
|
||||
pub until: Antichain<T>,
|
||||
/// Human readable name
|
||||
pub debug_name: String,
|
||||
}
|
||||
|
||||
impl<P, T> DataflowDescription<P, (), T> {
|
||||
/// Creates a new dataflow description with a human-readable name.
|
||||
pub fn new(name: String) -> Self {
|
||||
Self {
|
||||
source_imports: Default::default(),
|
||||
index_imports: Default::default(),
|
||||
objects_to_build: Vec::new(),
|
||||
index_exports: Default::default(),
|
||||
sink_exports: Default::default(),
|
||||
as_of: Default::default(),
|
||||
until: Antichain::new(),
|
||||
debug_name: name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An association of a global identifier to an expression.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct BuildDesc<P = Plan> {
|
||||
pub id: GlobalId,
|
||||
pub plan: P,
|
||||
}
|
||||
|
||||
/// An index storing processed updates so they can be queried
|
||||
/// or reused in other computations
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)]
|
||||
pub struct IndexDesc {
|
||||
/// Identity of the collection the index is on.
|
||||
pub on_id: GlobalId,
|
||||
/// Expressions to be arranged, in order of decreasing primacy.
|
||||
pub key: Vec<ScalarExpr>,
|
||||
}
|
||||
8
src/flow/src/compute/types/mod.rs
Normal file
8
src/flow/src/compute/types/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::GlobalId;
|
||||
mod dataflow;
|
||||
mod sinks;
|
||||
mod sources;
|
||||
|
||||
pub(crate) use dataflow::{BuildDesc, DataflowDescription, IndexDesc};
|
||||
28
src/flow/src/compute/types/sinks.rs
Normal file
28
src/flow/src/compute/types/sinks.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use timely::progress::Antichain;
|
||||
|
||||
use crate::expr::GlobalId;
|
||||
use crate::repr::{self, RelationDesc};
|
||||
|
||||
/// A sink for updates to a relational collection.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct ComputeSinkDesc<S: 'static = (), T = repr::Timestamp> {
|
||||
pub from: GlobalId,
|
||||
pub from_desc: RelationDesc,
|
||||
pub connection: ComputeSinkConnection<S>,
|
||||
pub with_snapshot: bool,
|
||||
pub up_to: Antichain<T>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum ComputeSinkConnection<S: 'static = ()> {
|
||||
// TODO(discord9): consider if ever needed
|
||||
Subscribe,
|
||||
Persist(PersistSinkConnection<S>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct PersistSinkConnection<S> {
|
||||
pub value_desc: RelationDesc,
|
||||
pub storage_metadata: S,
|
||||
}
|
||||
26
src/flow/src/compute/types/sources.rs
Normal file
26
src/flow/src/compute/types/sources.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::MapFilterProject;
|
||||
use crate::repr::RelationType;
|
||||
|
||||
/// A description of an instantiation of a source.
|
||||
///
|
||||
/// This includes a description of the source, but additionally any
|
||||
/// context-dependent options like the ability to apply filtering and
|
||||
/// projection to the records as they emerge.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SourceInstanceDesc<M> {
|
||||
/// Arguments for this instantiation of the source.
|
||||
pub arguments: SourceInstanceArguments,
|
||||
/// Additional metadata used by the storage client of a compute instance to read it.
|
||||
pub storage_metadata: M,
|
||||
/// The relation type of this source
|
||||
pub typ: RelationType,
|
||||
}
|
||||
|
||||
/// Per-source construction arguments.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SourceInstanceArguments {
|
||||
/// Linear operators to be applied record-by-record.
|
||||
pub operators: Option<MapFilterProject>,
|
||||
}
|
||||
224
src/flow/src/expr/func.rs
Normal file
224
src/flow/src/expr/func.rs
Normal file
@@ -0,0 +1,224 @@
|
||||
use datatypes::value::Value;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::ScalarExpr;
|
||||
// TODO(discord9): more function & eval
|
||||
use crate::{repr::Row, storage::errors::EvalError};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
pub enum UnaryFunc {
|
||||
Not,
|
||||
IsNull,
|
||||
IsTrue,
|
||||
IsFalse,
|
||||
CastDatetimeToInt64,
|
||||
CastInt64ToFloat32,
|
||||
}
|
||||
|
||||
impl UnaryFunc {
|
||||
pub fn eval(&self, values: &[Value], expr: &ScalarExpr) -> Result<Value, EvalError> {
|
||||
let arg = expr.eval(values)?;
|
||||
match self {
|
||||
Self::CastDatetimeToInt64 => {
|
||||
let datetime = if let Value::DateTime(datetime) = arg {
|
||||
Ok(datetime.val())
|
||||
} else {
|
||||
Err(EvalError::TypeMismatch(format!(
|
||||
"cannot cast {:?} to datetime",
|
||||
arg
|
||||
)))
|
||||
}?;
|
||||
Ok(Value::from(datetime))
|
||||
}
|
||||
Self::CastInt64ToFloat32 => {
|
||||
let int64 = if let Value::Int64(int64) = arg {
|
||||
Ok(int64)
|
||||
} else {
|
||||
Err(EvalError::TypeMismatch(format!(
|
||||
"cannot cast {:?} to int64",
|
||||
arg
|
||||
)))
|
||||
}?;
|
||||
Ok(Value::from(int64 as f32))
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO: support more binary functions for more types
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
pub enum BinaryFunc {
|
||||
Eq,
|
||||
NotEq,
|
||||
Lt,
|
||||
Lte,
|
||||
Gt,
|
||||
Gte,
|
||||
AddInt16,
|
||||
AddInt32,
|
||||
AddInt64,
|
||||
AddUInt16,
|
||||
AddUInt32,
|
||||
AddUInt64,
|
||||
AddFloat32,
|
||||
AddFloat64,
|
||||
SubInt16,
|
||||
SubInt32,
|
||||
SubInt64,
|
||||
SubUInt16,
|
||||
SubUInt32,
|
||||
SubUInt64,
|
||||
SubFloat32,
|
||||
SubFloat64,
|
||||
MulInt16,
|
||||
MulInt32,
|
||||
MulInt64,
|
||||
MulUInt16,
|
||||
MulUInt32,
|
||||
MulUInt64,
|
||||
MulFloat32,
|
||||
MulFloat64,
|
||||
DivInt16,
|
||||
DivInt32,
|
||||
DivInt64,
|
||||
DivUInt16,
|
||||
DivUInt32,
|
||||
DivUInt64,
|
||||
DivFloat32,
|
||||
DivFloat64,
|
||||
ModInt16,
|
||||
ModInt32,
|
||||
ModInt64,
|
||||
ModUInt16,
|
||||
ModUInt32,
|
||||
ModUInt64,
|
||||
}
|
||||
|
||||
impl BinaryFunc {
|
||||
pub fn eval(
|
||||
&self,
|
||||
values: &[Value],
|
||||
expr1: &ScalarExpr,
|
||||
expr2: &ScalarExpr,
|
||||
) -> Result<Value, EvalError> {
|
||||
let left = expr1.eval(values)?;
|
||||
let right = expr2.eval(values)?;
|
||||
match self {
|
||||
Self::Eq => Ok(Value::from(left == right)),
|
||||
Self::NotEq => Ok(Value::from(left != right)),
|
||||
Self::Lt => Ok(Value::from(left < right)),
|
||||
Self::Lte => Ok(Value::from(left <= right)),
|
||||
Self::Gt => Ok(Value::from(left > right)),
|
||||
Self::Gte => Ok(Value::from(left >= right)),
|
||||
Self::AddInt16 => Ok(add::<i16>(left, right)?),
|
||||
Self::AddInt32 => Ok(add::<i32>(left, right)?),
|
||||
Self::AddInt64 => Ok(add::<i64>(left, right)?),
|
||||
Self::AddUInt16 => Ok(add::<u16>(left, right)?),
|
||||
Self::AddUInt32 => Ok(add::<u32>(left, right)?),
|
||||
Self::AddUInt64 => Ok(add::<u64>(left, right)?),
|
||||
Self::AddFloat32 => Ok(add::<f32>(left, right)?),
|
||||
Self::AddFloat64 => Ok(add::<f64>(left, right)?),
|
||||
|
||||
Self::SubInt16 => Ok(sub::<i16>(left, right)?),
|
||||
Self::SubInt32 => Ok(sub::<i32>(left, right)?),
|
||||
Self::SubInt64 => Ok(sub::<i64>(left, right)?),
|
||||
Self::SubUInt16 => Ok(sub::<u16>(left, right)?),
|
||||
Self::SubUInt32 => Ok(sub::<u32>(left, right)?),
|
||||
Self::SubUInt64 => Ok(sub::<u64>(left, right)?),
|
||||
Self::SubFloat32 => Ok(sub::<f32>(left, right)?),
|
||||
Self::SubFloat64 => Ok(sub::<f64>(left, right)?),
|
||||
|
||||
Self::MulInt16 => Ok(mul::<i16>(left, right)?),
|
||||
Self::MulInt32 => Ok(mul::<i32>(left, right)?),
|
||||
Self::MulInt64 => Ok(mul::<i64>(left, right)?),
|
||||
Self::MulUInt16 => Ok(mul::<u16>(left, right)?),
|
||||
Self::MulUInt32 => Ok(mul::<u32>(left, right)?),
|
||||
Self::MulUInt64 => Ok(mul::<u64>(left, right)?),
|
||||
Self::MulFloat32 => Ok(mul::<f32>(left, right)?),
|
||||
Self::MulFloat64 => Ok(mul::<f64>(left, right)?),
|
||||
|
||||
Self::DivInt16 => Ok(div::<i16>(left, right)?),
|
||||
Self::DivInt32 => Ok(div::<i32>(left, right)?),
|
||||
Self::DivInt64 => Ok(div::<i64>(left, right)?),
|
||||
Self::DivUInt16 => Ok(div::<u16>(left, right)?),
|
||||
Self::DivUInt32 => Ok(div::<u32>(left, right)?),
|
||||
Self::DivUInt64 => Ok(div::<u64>(left, right)?),
|
||||
Self::DivFloat32 => Ok(div::<f32>(left, right)?),
|
||||
Self::DivFloat64 => Ok(div::<f64>(left, right)?),
|
||||
|
||||
Self::ModInt16 => Ok(rem::<i16>(left, right)?),
|
||||
Self::ModInt32 => Ok(rem::<i32>(left, right)?),
|
||||
Self::ModInt64 => Ok(rem::<i64>(left, right)?),
|
||||
Self::ModUInt16 => Ok(rem::<u16>(left, right)?),
|
||||
Self::ModUInt32 => Ok(rem::<u32>(left, right)?),
|
||||
Self::ModUInt64 => Ok(rem::<u64>(left, right)?),
|
||||
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
pub enum VariadicFunc {}
|
||||
|
||||
impl VariadicFunc {
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
fn add<T>(left: Value, right: Value) -> Result<Value, EvalError>
|
||||
where
|
||||
T: TryFrom<Value> + std::ops::Add<Output = T>,
|
||||
<T as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<T>,
|
||||
{
|
||||
let left = T::try_from(left).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
let right = T::try_from(right).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
Ok(Value::from(left + right))
|
||||
}
|
||||
|
||||
fn sub<T>(left: Value, right: Value) -> Result<Value, EvalError>
|
||||
where
|
||||
T: TryFrom<Value> + std::ops::Sub<Output = T>,
|
||||
<T as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<T>,
|
||||
{
|
||||
let left = T::try_from(left).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
let right = T::try_from(right).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
Ok(Value::from(left - right))
|
||||
}
|
||||
|
||||
fn mul<T>(left: Value, right: Value) -> Result<Value, EvalError>
|
||||
where
|
||||
T: TryFrom<Value> + std::ops::Mul<Output = T>,
|
||||
<T as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<T>,
|
||||
{
|
||||
let left = T::try_from(left).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
let right = T::try_from(right).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
Ok(Value::from(left * right))
|
||||
}
|
||||
|
||||
fn div<T>(left: Value, right: Value) -> Result<Value, EvalError>
|
||||
where
|
||||
T: TryFrom<Value> + std::ops::Div<Output = T>,
|
||||
<T as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<T>,
|
||||
{
|
||||
let left = T::try_from(left).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
let right = T::try_from(right).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
Ok(Value::from(left / right))
|
||||
}
|
||||
|
||||
fn rem<T>(left: Value, right: Value) -> Result<Value, EvalError>
|
||||
where
|
||||
T: TryFrom<Value> + std::ops::Rem<Output = T>,
|
||||
<T as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<T>,
|
||||
{
|
||||
let left = T::try_from(left).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
let right = T::try_from(right).map_err(|e| EvalError::TypeMismatch(format!("{:?}", e)))?;
|
||||
Ok(Value::from(left % right))
|
||||
}
|
||||
24
src/flow/src/expr/id.rs
Normal file
24
src/flow/src/expr/id.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub enum GlobalId {
|
||||
/// System namespace.
|
||||
System(u64),
|
||||
/// User namespace.
|
||||
User(u64),
|
||||
/// Transient namespace.
|
||||
Transient(u64),
|
||||
/// Dummy id for query being explained
|
||||
Explain,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub struct LocalId(pub(crate) u64);
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub enum Id {
|
||||
/// An identifier that refers to a local component of a dataflow.
|
||||
Local(LocalId),
|
||||
/// An identifier that refers to a global dataflow.
|
||||
Global(GlobalId),
|
||||
}
|
||||
381
src/flow/src/expr/linear.rs
Normal file
381
src/flow/src/expr/linear.rs
Normal file
@@ -0,0 +1,381 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use datatypes::value::Value;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::{Id, LocalId, ScalarExpr};
|
||||
use crate::repr::{self, Diff, Row};
|
||||
use crate::storage::errors::EvalError;
|
||||
|
||||
/// A compound operator that can be applied row-by-row.
|
||||
///
|
||||
/// This operator integrates the map, filter, and project operators.
|
||||
/// It applies a sequences of map expressions, which are allowed to
|
||||
/// refer to previous expressions, interleaved with predicates which
|
||||
/// must be satisfied for an output to be produced. If all predicates
|
||||
/// evaluate to `Datum::True` the data at the identified columns are
|
||||
/// collected and produced as output in a packed `Row`.
|
||||
///
|
||||
/// This operator is a "builder" and its contents may contain expressions
|
||||
/// that are not yet executable. For example, it may contain temporal
|
||||
/// expressions in `self.expressions`, even though this is not something
|
||||
/// we can directly evaluate. The plan creation methods will defensively
|
||||
/// ensure that the right thing happens.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct MapFilterProject {
|
||||
/// A sequence of expressions that should be appended to the row.
|
||||
///
|
||||
/// Many of these expressions may not be produced in the output,
|
||||
/// and may only be present as common subexpressions.
|
||||
pub expressions: Vec<ScalarExpr>,
|
||||
/// Expressions that must evaluate to `Datum::True` for the output
|
||||
/// row to be produced.
|
||||
///
|
||||
/// Each entry is prepended with a column identifier indicating
|
||||
/// the column *before* which the predicate should first be applied.
|
||||
/// Most commonly this would be one plus the largest column identifier
|
||||
/// in the predicate's support, but it could be larger to implement
|
||||
/// guarded evaluation of predicates.
|
||||
///
|
||||
/// This list should be sorted by the first field.
|
||||
pub predicates: Vec<(usize, ScalarExpr)>,
|
||||
/// A sequence of column identifiers whose data form the output row.
|
||||
pub projection: Vec<usize>,
|
||||
/// The expected number of input columns.
|
||||
///
|
||||
/// This is needed to ensure correct identification of newly formed
|
||||
/// columns in the output.
|
||||
pub input_arity: usize,
|
||||
}
|
||||
|
||||
impl MapFilterProject {
|
||||
/// Create a no-op operator for an input of a supplied arity.
|
||||
pub fn new(input_arity: usize) -> Self {
|
||||
Self {
|
||||
expressions: Vec::new(),
|
||||
predicates: Vec::new(),
|
||||
projection: (0..input_arity).collect(),
|
||||
input_arity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Given two mfps, return an mfp that applies one
|
||||
/// followed by the other.
|
||||
/// Note that the arguments are in the opposite order
|
||||
/// from how function composition is usually written in mathematics.
|
||||
pub fn compose(before: Self, after: Self) -> Self {
|
||||
let (m, f, p) = after.into_map_filter_project();
|
||||
before.map(m).filter(f).project(p)
|
||||
}
|
||||
|
||||
/// True if the operator describes the identity transformation.
|
||||
pub fn is_identity(&self) -> bool {
|
||||
self.expressions.is_empty()
|
||||
&& self.predicates.is_empty()
|
||||
&& self.projection.len() == self.input_arity
|
||||
&& self.projection.iter().enumerate().all(|(i, p)| i == *p)
|
||||
}
|
||||
|
||||
/// Retain only the indicated columns in the presented order.
|
||||
pub fn project<I>(mut self, columns: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = usize> + std::fmt::Debug,
|
||||
{
|
||||
self.projection = columns.into_iter().map(|c| self.projection[c]).collect();
|
||||
self
|
||||
}
|
||||
|
||||
/// Retain only rows satisfying these predicates.
|
||||
///
|
||||
/// This method introduces predicates as eagerly as they can be evaluated,
|
||||
/// which may not be desired for predicates that may cause exceptions.
|
||||
/// If fine manipulation is required, the predicates can be added manually.
|
||||
pub fn filter<I>(mut self, predicates: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = ScalarExpr>,
|
||||
{
|
||||
for mut predicate in predicates {
|
||||
// Correct column references.
|
||||
predicate.permute(&self.projection[..]);
|
||||
|
||||
// Validate column references.
|
||||
assert!(predicate
|
||||
.support()
|
||||
.into_iter()
|
||||
.all(|c| c < self.input_arity + self.expressions.len()));
|
||||
|
||||
// Insert predicate as eagerly as it can be evaluated:
|
||||
// just after the largest column in its support is formed.
|
||||
let max_support = predicate
|
||||
.support()
|
||||
.into_iter()
|
||||
.max()
|
||||
.map(|c| c + 1)
|
||||
.unwrap_or(0);
|
||||
self.predicates.push((max_support, predicate))
|
||||
}
|
||||
// Stable sort predicates by position at which they take effect.
|
||||
// We put literal errors at the end as a stop-gap to avoid erroring
|
||||
// before we are able to evaluate any predicates that might prevent it.
|
||||
self.predicates
|
||||
.sort_by_key(|(position, predicate)| (predicate.is_literal_err(), *position));
|
||||
self
|
||||
}
|
||||
|
||||
/// Append the result of evaluating expressions to each row.
|
||||
pub fn map<I>(mut self, expressions: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = ScalarExpr>,
|
||||
{
|
||||
for mut expression in expressions {
|
||||
// Correct column references.
|
||||
expression.permute(&self.projection[..]);
|
||||
|
||||
// Validate column references.
|
||||
assert!(expression
|
||||
.support()
|
||||
.into_iter()
|
||||
.all(|c| c < self.input_arity + self.expressions.len()));
|
||||
|
||||
// Introduce expression and produce as output.
|
||||
self.expressions.push(expression);
|
||||
self.projection
|
||||
.push(self.input_arity + self.expressions.len() - 1);
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Like [`MapFilterProject::as_map_filter_project`], but consumes `self` rather than cloning.
|
||||
pub fn into_map_filter_project(self) -> (Vec<ScalarExpr>, Vec<ScalarExpr>, Vec<usize>) {
|
||||
let predicates = self
|
||||
.predicates
|
||||
.into_iter()
|
||||
.map(|(_pos, predicate)| predicate)
|
||||
.collect();
|
||||
(self.expressions, predicates, self.projection)
|
||||
}
|
||||
|
||||
/// As the arguments to `Map`, `Filter`, and `Project` operators.
|
||||
///
|
||||
/// In principle, this operator can be implemented as a sequence of
|
||||
/// more elemental operators, likely less efficiently.
|
||||
pub fn as_map_filter_project(&self) -> (Vec<ScalarExpr>, Vec<ScalarExpr>, Vec<usize>) {
|
||||
self.clone().into_map_filter_project()
|
||||
}
|
||||
}
|
||||
|
||||
impl MapFilterProject {
|
||||
pub fn optimize(&mut self) {
|
||||
// TODO(discord9): optimize later
|
||||
}
|
||||
|
||||
/// Convert the `MapFilterProject` into a staged evaluation plan.
|
||||
///
|
||||
/// The main behavior is extract temporal predicates, which cannot be evaluated
|
||||
/// using the standard machinery.
|
||||
pub fn into_plan(self) -> Result<MfpPlan, String> {
|
||||
MfpPlan::create_from(self)
|
||||
}
|
||||
|
||||
/// Lists input columns whose values are used in outputs.
|
||||
///
|
||||
/// It is entirely appropriate to determine the demand of an instance
|
||||
/// and then both apply a projection to the subject of the instance and
|
||||
/// `self.permute` this instance.
|
||||
pub fn demand(&self) -> BTreeSet<usize> {
|
||||
let mut demanded = BTreeSet::new();
|
||||
for (_index, pred) in self.predicates.iter() {
|
||||
demanded.extend(pred.support());
|
||||
}
|
||||
demanded.extend(self.projection.iter().cloned());
|
||||
for index in (0..self.expressions.len()).rev() {
|
||||
if demanded.contains(&(self.input_arity + index)) {
|
||||
demanded.extend(self.expressions[index].support());
|
||||
}
|
||||
}
|
||||
demanded.retain(|col| col < &self.input_arity);
|
||||
demanded
|
||||
}
|
||||
|
||||
/// Update input column references, due to an input projection or permutation.
|
||||
///
|
||||
/// The `shuffle` argument remaps expected column identifiers to new locations,
|
||||
/// with the expectation that `shuffle` describes all input columns, and so the
|
||||
/// intermediate results will be able to start at position `shuffle.len()`.
|
||||
///
|
||||
/// The supplied `shuffle` may not list columns that are not "demanded" by the
|
||||
/// instance, and so we should ensure that `self` is optimized to not reference
|
||||
/// columns that are not demanded.
|
||||
pub fn permute(&mut self, mut shuffle: BTreeMap<usize, usize>, new_input_arity: usize) {
|
||||
let (mut map, mut filter, mut project) = self.as_map_filter_project();
|
||||
for index in 0..map.len() {
|
||||
// Intermediate columns are just shifted.
|
||||
shuffle.insert(self.input_arity + index, new_input_arity + index);
|
||||
}
|
||||
for expr in map.iter_mut() {
|
||||
expr.permute_map(&shuffle);
|
||||
}
|
||||
for pred in filter.iter_mut() {
|
||||
pred.permute_map(&shuffle);
|
||||
}
|
||||
for proj in project.iter_mut() {
|
||||
assert!(shuffle[proj] < new_input_arity + map.len());
|
||||
*proj = shuffle[proj];
|
||||
}
|
||||
*self = Self::new(new_input_arity)
|
||||
.map(map)
|
||||
.filter(filter)
|
||||
.project(project)
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper type which indicates it is safe to simply evaluate all expressions.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct SafeMfpPlan {
|
||||
pub(crate) mfp: MapFilterProject,
|
||||
}
|
||||
|
||||
impl SafeMfpPlan {
|
||||
pub fn permute(&mut self, map: BTreeMap<usize, usize>, new_arity: usize) {
|
||||
self.mfp.permute(map, new_arity);
|
||||
}
|
||||
|
||||
/// Evaluates the linear operator on a supplied list of datums.
|
||||
///
|
||||
/// The arguments are the initial datums associated with the row,
|
||||
/// and an appropriately lifetimed arena for temporary allocations
|
||||
/// needed by scalar evaluation.
|
||||
///
|
||||
/// An `Ok` result will either be `None` if any predicate did not
|
||||
/// evaluate to `Value::Boolean(true)`, or the values of the columns listed
|
||||
/// by `self.projection` if all predicates passed. If an error
|
||||
/// occurs in the evaluation it is returned as an `Err` variant.
|
||||
/// As the evaluation exits early with failed predicates, it may
|
||||
/// miss some errors that would occur later in evaluation.
|
||||
///
|
||||
/// The `row` is not cleared first, but emptied if the function
|
||||
/// returns `Ok(Some(row)).
|
||||
#[inline(always)]
|
||||
pub fn evaluate_into(
|
||||
&self,
|
||||
values: &mut Vec<Value>,
|
||||
row_buf: &mut Row,
|
||||
) -> Result<Option<Row>, EvalError> {
|
||||
let passed_predicates = self.evaluate_inner(values)?;
|
||||
if !passed_predicates {
|
||||
Ok(None)
|
||||
} else {
|
||||
row_buf.clear();
|
||||
row_buf.extend(self.mfp.projection.iter().map(|c| values[*c].clone()));
|
||||
Ok(Some(row_buf.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
/// A version of `evaluate` which produces an iterator over `Datum`
|
||||
/// as output.
|
||||
///
|
||||
/// This version can be useful when one wants to capture the resulting
|
||||
/// datums without packing and then unpacking a row.
|
||||
#[inline(always)]
|
||||
pub fn evaluate_iter<'a>(
|
||||
&'a self,
|
||||
datums: &'a mut Vec<Value>,
|
||||
) -> Result<Option<impl Iterator<Item = Value> + 'a>, EvalError> {
|
||||
let passed_predicates = self.evaluate_inner(datums)?;
|
||||
if !passed_predicates {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(
|
||||
self.mfp.projection.iter().map(move |i| datums[*i].clone()),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Populates `datums` with `self.expressions` and tests `self.predicates`.
|
||||
///
|
||||
/// This does not apply `self.projection`, which is up to the calling method.
|
||||
pub fn evaluate_inner(&self, values: &mut Vec<Value>) -> Result<bool, EvalError> {
|
||||
let mut expression = 0;
|
||||
for (support, predicate) in self.mfp.predicates.iter() {
|
||||
while self.mfp.input_arity + expression < *support {
|
||||
values.push(self.mfp.expressions[expression].eval(&values[..])?);
|
||||
expression += 1;
|
||||
}
|
||||
if predicate.eval(&values[..])? != Value::Boolean(true) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
while expression < self.mfp.expressions.len() {
|
||||
values.push(self.mfp.expressions[expression].eval(&values[..])?);
|
||||
expression += 1;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SafeMfpPlan {
|
||||
type Target = MapFilterProject;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.mfp
|
||||
}
|
||||
}
|
||||
|
||||
/// Predicates partitioned into temporal and non-temporal.
|
||||
///
|
||||
/// Temporal predicates require some recognition to determine their
|
||||
/// structure, and it is best to do that once and re-use the results.
|
||||
///
|
||||
/// There are restrictions on the temporal predicates we currently support.
|
||||
/// They must directly constrain `MzNow` from below or above,
|
||||
/// by expressions that do not themselves contain `MzNow`.
|
||||
/// Conjunctions of such constraints are also ok.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct MfpPlan {
|
||||
/// Normal predicates to evaluate on `&[Datum]` and expect `Ok(Datum::True)`.
|
||||
pub(crate) mfp: SafeMfpPlan,
|
||||
/// TODO(discord9): impl temporal filter later
|
||||
/// Expressions that when evaluated lower-bound `MzNow`.
|
||||
pub(crate) lower_bounds: Vec<ScalarExpr>,
|
||||
/// Expressions that when evaluated upper-bound `MzNow`.
|
||||
pub(crate) upper_bounds: Vec<ScalarExpr>,
|
||||
}
|
||||
|
||||
impl MfpPlan {
|
||||
pub fn create_from(mut mfp: MapFilterProject) -> Result<Self, String> {
|
||||
Ok(Self {
|
||||
mfp: SafeMfpPlan { mfp },
|
||||
lower_bounds: Vec::new(),
|
||||
upper_bounds: Vec::new(),
|
||||
})
|
||||
}
|
||||
pub fn evaluate<E: From<EvalError>, V: Fn(&repr::Timestamp) -> bool>(
|
||||
&self,
|
||||
values: &mut Vec<Value>,
|
||||
time: repr::Timestamp,
|
||||
diff: Diff,
|
||||
valid_time: V,
|
||||
) -> impl Iterator<Item = Result<(Row, repr::Timestamp, Diff), (E, repr::Timestamp, Diff)>>
|
||||
{
|
||||
match self.mfp.evaluate_inner(values) {
|
||||
Err(e) => {
|
||||
return Some(Err((e.into(), time, diff)))
|
||||
.into_iter()
|
||||
.chain(None.into_iter());
|
||||
}
|
||||
Ok(true) => {}
|
||||
Ok(false) => {
|
||||
return None.into_iter().chain(None.into_iter());
|
||||
}
|
||||
}
|
||||
// TODO(discord9): Temporal filter
|
||||
let ret = Row::pack(self.mfp.mfp.projection.iter().map(|c| values[*c].clone()));
|
||||
Some(Ok((ret, time, diff)))
|
||||
.into_iter()
|
||||
.chain(None.into_iter())
|
||||
}
|
||||
/// Indicates if the planned `MapFilterProject` emits exactly its inputs as outputs.
|
||||
pub fn is_identity(&self) -> bool {
|
||||
self.mfp.mfp.is_identity() && self.lower_bounds.is_empty() && self.upper_bounds.is_empty()
|
||||
}
|
||||
}
|
||||
207
src/flow/src/expr/mod.rs
Normal file
207
src/flow/src/expr/mod.rs
Normal file
@@ -0,0 +1,207 @@
|
||||
//! for declare dataflow description that is the last step before build dataflow
|
||||
|
||||
mod func;
|
||||
mod id;
|
||||
mod linear;
|
||||
mod relation;
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
pub use id::{GlobalId, Id, LocalId};
|
||||
pub use linear::{MapFilterProject, SafeMfpPlan};
|
||||
pub(crate) use relation::{AggregateExpr, AggregateFunc, TableFunc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub(crate) use crate::expr::func::{BinaryFunc, UnaryFunc, VariadicFunc};
|
||||
use crate::storage::errors::EvalError;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum ScalarExpr {
|
||||
/// A column of the input row
|
||||
Column(usize),
|
||||
/// A literal value.
|
||||
Literal(Result<Value, EvalError>, ConcreteDataType),
|
||||
CallUnary {
|
||||
func: UnaryFunc,
|
||||
expr: Box<ScalarExpr>,
|
||||
},
|
||||
CallBinary {
|
||||
func: BinaryFunc,
|
||||
expr1: Box<ScalarExpr>,
|
||||
expr2: Box<ScalarExpr>,
|
||||
},
|
||||
CallVariadic {
|
||||
func: VariadicFunc,
|
||||
exprs: Vec<ScalarExpr>,
|
||||
},
|
||||
/// Conditionally evaluated expressions.
|
||||
///
|
||||
/// It is important that `then` and `els` only be evaluated if
|
||||
/// `cond` is true or not, respectively. This is the only way
|
||||
/// users can guard execution (other logical operator do not
|
||||
/// short-circuit) and we need to preserve that.
|
||||
If {
|
||||
cond: Box<ScalarExpr>,
|
||||
then: Box<ScalarExpr>,
|
||||
els: Box<ScalarExpr>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ScalarExpr {
|
||||
pub fn eval(&self, values: &[Value]) -> Result<Value, EvalError> {
|
||||
match self {
|
||||
ScalarExpr::Column(index) => Ok(values[*index].clone()),
|
||||
ScalarExpr::Literal(row_res, _ty) => row_res.clone(),
|
||||
ScalarExpr::CallUnary { func, expr } => func.eval(values, expr),
|
||||
ScalarExpr::CallBinary { func, expr1, expr2 } => func.eval(values, expr1, expr2),
|
||||
ScalarExpr::CallVariadic { func, exprs } => func.eval(values, exprs),
|
||||
ScalarExpr::If { cond, then, els } => match cond.eval(values) {
|
||||
Ok(Value::Boolean(true)) => then.eval(values),
|
||||
Ok(Value::Boolean(false)) => els.eval(values),
|
||||
_ => Err(EvalError::InvalidArgument(
|
||||
"if condition must be boolean".to_string(),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewrites column indices with their value in `permutation`.
|
||||
///
|
||||
/// This method is applicable even when `permutation` is not a
|
||||
/// strict permutation, and it only needs to have entries for
|
||||
/// each column referenced in `self`.
|
||||
pub fn permute(&mut self, permutation: &[usize]) {
|
||||
#[allow(deprecated)]
|
||||
self.visit_mut_post_nolimit(&mut |e| {
|
||||
if let ScalarExpr::Column(old_i) = e {
|
||||
*old_i = permutation[*old_i];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Rewrites column indices with their value in `permutation`.
|
||||
///
|
||||
/// This method is applicable even when `permutation` is not a
|
||||
/// strict permutation, and it only needs to have entries for
|
||||
/// each column referenced in `self`.
|
||||
pub fn permute_map(&mut self, permutation: &BTreeMap<usize, usize>) {
|
||||
#[allow(deprecated)]
|
||||
self.visit_mut_post_nolimit(&mut |e| {
|
||||
if let ScalarExpr::Column(old_i) = e {
|
||||
*old_i = permutation[old_i];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub fn support(&self) -> BTreeSet<usize> {
|
||||
let mut support = BTreeSet::new();
|
||||
#[allow(deprecated)]
|
||||
self.visit_post_nolimit(&mut |e| {
|
||||
if let ScalarExpr::Column(i) = e {
|
||||
support.insert(*i);
|
||||
}
|
||||
});
|
||||
support
|
||||
}
|
||||
|
||||
pub fn as_literal(&self) -> Option<Result<Value, &EvalError>> {
|
||||
if let ScalarExpr::Literal(lit, _column_type) = self {
|
||||
Some(lit.as_ref().map(|row| row.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_literal(&self) -> bool {
|
||||
matches!(self, ScalarExpr::Literal(_, _))
|
||||
}
|
||||
|
||||
pub fn is_literal_true(&self) -> bool {
|
||||
Some(Ok(Value::Boolean(true))) == self.as_literal()
|
||||
}
|
||||
|
||||
pub fn is_literal_false(&self) -> bool {
|
||||
Some(Ok(Value::Boolean(false))) == self.as_literal()
|
||||
}
|
||||
|
||||
pub fn is_literal_null(&self) -> bool {
|
||||
Some(Ok(Value::Null)) == self.as_literal()
|
||||
}
|
||||
|
||||
pub fn is_literal_ok(&self) -> bool {
|
||||
matches!(self, ScalarExpr::Literal(Ok(_), _typ))
|
||||
}
|
||||
|
||||
pub fn is_literal_err(&self) -> bool {
|
||||
matches!(self, ScalarExpr::Literal(Err(_), _typ))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarExpr {
|
||||
/// visit post-order without stack call limit, but may cause stack overflow
|
||||
fn visit_post_nolimit<F>(&self, f: &mut F)
|
||||
where
|
||||
F: FnMut(&Self),
|
||||
{
|
||||
self.visit_children(|e| e.visit_post_nolimit(f));
|
||||
f(self);
|
||||
}
|
||||
|
||||
fn visit_children<F>(&self, mut f: F)
|
||||
where
|
||||
F: FnMut(&Self),
|
||||
{
|
||||
match self {
|
||||
ScalarExpr::Column(_) | ScalarExpr::Literal(_, _) => (),
|
||||
ScalarExpr::CallUnary { func, expr } => f(expr),
|
||||
ScalarExpr::CallBinary { func, expr1, expr2 } => {
|
||||
f(expr1);
|
||||
f(expr2);
|
||||
}
|
||||
ScalarExpr::CallVariadic { func, exprs } => {
|
||||
for expr in exprs {
|
||||
f(expr);
|
||||
}
|
||||
}
|
||||
ScalarExpr::If { cond, then, els } => {
|
||||
f(cond);
|
||||
f(then);
|
||||
f(els);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_mut_post_nolimit<F>(&mut self, f: &mut F)
|
||||
where
|
||||
F: FnMut(&mut Self),
|
||||
{
|
||||
self.visit_mut_children(|e: &mut Self| e.visit_mut_post_nolimit(f));
|
||||
f(self);
|
||||
}
|
||||
|
||||
fn visit_mut_children<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(&mut Self),
|
||||
{
|
||||
match self {
|
||||
ScalarExpr::Column(_) | ScalarExpr::Literal(_, _) => (),
|
||||
ScalarExpr::CallUnary { func, expr } => f(expr),
|
||||
ScalarExpr::CallBinary { func, expr1, expr2 } => {
|
||||
f(expr1);
|
||||
f(expr2);
|
||||
}
|
||||
ScalarExpr::CallVariadic { func, exprs } => {
|
||||
for expr in exprs {
|
||||
f(expr);
|
||||
}
|
||||
}
|
||||
ScalarExpr::If { cond, then, els } => {
|
||||
f(cond);
|
||||
f(then);
|
||||
f(els);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
206
src/flow/src/expr/relation/func.rs
Normal file
206
src/flow/src/expr/relation/func.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
pub enum AggregateFunc {
|
||||
MaxInt16,
|
||||
MaxInt32,
|
||||
MaxInt64,
|
||||
MaxUInt16,
|
||||
MaxUInt32,
|
||||
MaxUInt64,
|
||||
MaxFloat32,
|
||||
MaxFloat64,
|
||||
MaxBool,
|
||||
MaxString,
|
||||
MaxDate,
|
||||
MaxTimestamp,
|
||||
MaxTimestampTz,
|
||||
MinInt16,
|
||||
MinInt32,
|
||||
MinInt64,
|
||||
MinUInt16,
|
||||
MinUInt32,
|
||||
MinUInt64,
|
||||
MinFloat32,
|
||||
MinFloat64,
|
||||
MinBool,
|
||||
MinString,
|
||||
MinDate,
|
||||
MinTimestamp,
|
||||
MinTimestampTz,
|
||||
SumInt16,
|
||||
SumInt32,
|
||||
SumInt64,
|
||||
SumUInt16,
|
||||
SumUInt32,
|
||||
SumUInt64,
|
||||
SumFloat32,
|
||||
SumFloat64,
|
||||
Count,
|
||||
Any,
|
||||
All,
|
||||
}
|
||||
|
||||
impl AggregateFunc {
|
||||
pub fn eval<I>(&self, values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
// TODO: impl more functions like min/max/sumTimestamp etc.
|
||||
match self {
|
||||
AggregateFunc::MaxInt16 => max_value::<I, i16>(values),
|
||||
AggregateFunc::MaxInt32 => max_value::<I, i32>(values),
|
||||
AggregateFunc::MaxInt64 => max_value::<I, i64>(values),
|
||||
AggregateFunc::MaxUInt16 => max_value::<I, u16>(values),
|
||||
AggregateFunc::MaxUInt32 => max_value::<I, u32>(values),
|
||||
AggregateFunc::MaxUInt64 => max_value::<I, u64>(values),
|
||||
AggregateFunc::MaxFloat32 => max_value::<I, OrderedF32>(values),
|
||||
AggregateFunc::MaxFloat64 => max_value::<I, OrderedF64>(values),
|
||||
AggregateFunc::MaxBool => max_value::<I, bool>(values),
|
||||
AggregateFunc::MaxString => max_string(values),
|
||||
|
||||
AggregateFunc::MinInt16 => min_value::<I, i16>(values),
|
||||
AggregateFunc::MinInt32 => min_value::<I, i32>(values),
|
||||
AggregateFunc::MinInt64 => min_value::<I, i64>(values),
|
||||
AggregateFunc::MinUInt16 => min_value::<I, u16>(values),
|
||||
AggregateFunc::MinUInt32 => min_value::<I, u32>(values),
|
||||
AggregateFunc::MinUInt64 => min_value::<I, u16>(values),
|
||||
AggregateFunc::MinFloat32 => min_value::<I, OrderedF32>(values),
|
||||
AggregateFunc::MinFloat64 => min_value::<I, OrderedF64>(values),
|
||||
AggregateFunc::MinBool => min_value::<I, bool>(values),
|
||||
AggregateFunc::MinString => min_string(values),
|
||||
|
||||
AggregateFunc::SumInt16 => sum_value::<I, i16, i64>(values),
|
||||
AggregateFunc::SumInt32 => sum_value::<I, i32, i64>(values),
|
||||
AggregateFunc::SumInt64 => sum_value::<I, i64, i64>(values),
|
||||
AggregateFunc::SumUInt16 => sum_value::<I, u16, u64>(values),
|
||||
AggregateFunc::SumUInt32 => sum_value::<I, u32, u64>(values),
|
||||
AggregateFunc::SumUInt64 => sum_value::<I, u64, u64>(values),
|
||||
AggregateFunc::SumFloat32 => sum_value::<I, f32, f32>(values),
|
||||
AggregateFunc::SumFloat64 => sum_value::<I, f64, f64>(values),
|
||||
|
||||
AggregateFunc::Count => count(values),
|
||||
AggregateFunc::All => all(values),
|
||||
AggregateFunc::Any => any(values),
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn max_string<I>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
match values.into_iter().filter(|d| !d.is_null()).max_by(|a, b| {
|
||||
let a = a.as_value_ref();
|
||||
let a = a.as_string().expect("unexpected type").unwrap();
|
||||
let b = b.as_value_ref();
|
||||
let b = b.as_string().expect("unexpected type").unwrap();
|
||||
a.cmp(b)
|
||||
}) {
|
||||
Some(v) => v,
|
||||
None => Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_value<I, TypedValue>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
TypedValue: TryFrom<Value> + Ord,
|
||||
<TypedValue as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<Option<TypedValue>>,
|
||||
{
|
||||
let x: Option<TypedValue> = values
|
||||
.into_iter()
|
||||
.filter(|v| !v.is_null())
|
||||
.map(|v| TypedValue::try_from(v).expect("unexpected type"))
|
||||
.max();
|
||||
x.into()
|
||||
}
|
||||
|
||||
fn min_string<I>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
match values.into_iter().filter(|d| !d.is_null()).min_by(|a, b| {
|
||||
let a = a.as_value_ref();
|
||||
let a = a.as_string().expect("unexpected type").unwrap();
|
||||
let b = b.as_value_ref();
|
||||
let b = b.as_string().expect("unexpected type").unwrap();
|
||||
a.cmp(b)
|
||||
}) {
|
||||
Some(v) => v,
|
||||
None => Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn min_value<I, TypedValue>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
TypedValue: TryFrom<Value> + Ord,
|
||||
<TypedValue as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<Option<TypedValue>>,
|
||||
{
|
||||
let x: Option<TypedValue> = values
|
||||
.into_iter()
|
||||
.filter(|v| !v.is_null())
|
||||
.map(|v| TypedValue::try_from(v).expect("unexpected type"))
|
||||
.min();
|
||||
x.into()
|
||||
}
|
||||
|
||||
fn sum_value<I, ValueType, ResultType>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
ValueType: TryFrom<Value>,
|
||||
<ValueType as TryFrom<Value>>::Error: std::fmt::Debug,
|
||||
Value: From<Option<ValueType>>,
|
||||
ResultType: From<ValueType> + std::iter::Sum + Into<Value>,
|
||||
{
|
||||
// If no row qualifies, then the result of COUNT is 0 (zero), and the result of any other aggregate function is the null value.
|
||||
let mut values = values.into_iter().filter(|v| !v.is_null()).peekable();
|
||||
if values.peek().is_none() {
|
||||
Value::Null
|
||||
} else {
|
||||
let x = values
|
||||
.map(|v| ResultType::from(ValueType::try_from(v).expect("unexpected type")))
|
||||
.sum::<ResultType>();
|
||||
x.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn count<I>(values: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
let x = values.into_iter().filter(|v| !v.is_null()).count() as i64;
|
||||
Value::from(x)
|
||||
}
|
||||
|
||||
fn any<I>(datums: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
datums
|
||||
.into_iter()
|
||||
.fold(Value::Boolean(false), |state, next| match (state, next) {
|
||||
(Value::Boolean(true), _) | (_, Value::Boolean(true)) => Value::Boolean(true),
|
||||
(Value::Null, _) | (_, Value::Null) => Value::Null,
|
||||
_ => Value::Boolean(false),
|
||||
})
|
||||
}
|
||||
|
||||
fn all<I>(datums: I) -> Value
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
datums
|
||||
.into_iter()
|
||||
.fold(Value::Boolean(true), |state, next| match (state, next) {
|
||||
(Value::Boolean(false), _) | (_, Value::Boolean(false)) => Value::Boolean(false),
|
||||
(Value::Null, _) | (_, Value::Null) => Value::Null,
|
||||
_ => Value::Boolean(true),
|
||||
})
|
||||
}
|
||||
22
src/flow/src/expr/relation/mod.rs
Normal file
22
src/flow/src/expr/relation/mod.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
pub(crate) use func::AggregateFunc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::ScalarExpr;
|
||||
|
||||
mod func;
|
||||
|
||||
/// function that might emit multiple output record for one input row
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
pub enum TableFunc {}
|
||||
|
||||
/// Describes an aggregation expression.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct AggregateExpr {
|
||||
/// Names the aggregation function.
|
||||
pub func: AggregateFunc,
|
||||
/// An expression which extracts from each row the input to `func`.
|
||||
pub expr: ScalarExpr,
|
||||
/// Should the aggregation be applied only to distinct results in each group.
|
||||
#[serde(default)]
|
||||
pub distinct: bool,
|
||||
}
|
||||
9
src/flow/src/lib.rs
Normal file
9
src/flow/src/lib.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
#![allow(unused)]
|
||||
#![allow(clippy::mutable_key_type)]
|
||||
|
||||
mod adapter;
|
||||
mod compute;
|
||||
mod expr;
|
||||
mod repr;
|
||||
mod storage;
|
||||
mod util;
|
||||
62
src/flow/src/repr/mod.rs
Normal file
62
src/flow/src/repr/mod.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
//! basically a wrapper around the `datatype` crate
|
||||
//! for basic Data Representation
|
||||
use std::borrow::Borrow;
|
||||
use std::slice::SliceIndex;
|
||||
|
||||
use datatypes::value::Value;
|
||||
pub(crate) use relation::{RelationDesc, RelationType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
/// System-wide Record count difference type.
|
||||
pub type Diff = i64;
|
||||
|
||||
mod relation;
|
||||
mod timestamp;
|
||||
|
||||
/// A row is a vector of values.
|
||||
///
|
||||
/// TODO(discord9): use a more efficient representation
|
||||
///i.e. more compact like raw u8 of \[tag0, value0, tag1, value1, ...\]
|
||||
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
|
||||
pub struct Row {
|
||||
inner: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Row {
|
||||
pub fn get(&self, idx: usize) -> Option<&Value> {
|
||||
self.inner.get(idx)
|
||||
}
|
||||
pub fn clear(&mut self) {
|
||||
self.inner.clear();
|
||||
}
|
||||
pub fn packer(&mut self) -> &mut Vec<Value> {
|
||||
self.inner.clear();
|
||||
&mut self.inner
|
||||
}
|
||||
pub fn pack<I>(iter: I) -> Row
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
Self {
|
||||
inner: iter.into_iter().collect(),
|
||||
}
|
||||
}
|
||||
pub fn unpack(&self) -> Vec<Value> {
|
||||
self.inner.clone()
|
||||
}
|
||||
pub fn extend<I>(&mut self, iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = Value>,
|
||||
{
|
||||
self.inner.extend(iter);
|
||||
}
|
||||
pub fn into_iter(self) -> impl Iterator<Item = Value> {
|
||||
self.inner.into_iter()
|
||||
}
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Value> {
|
||||
self.inner.iter()
|
||||
}
|
||||
}
|
||||
|
||||
/// System-wide default timestamp type
|
||||
pub type Timestamp = u64;
|
||||
342
src/flow/src/repr/relation.rs
Normal file
342
src/flow/src/repr/relation.rs
Normal file
@@ -0,0 +1,342 @@
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The type of a relation.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
pub struct RelationType {
|
||||
/// The type for each column, in order.
|
||||
pub column_types: Vec<ColumnType>,
|
||||
/// Sets of indices that are "keys" for the collection.
|
||||
///
|
||||
/// Each element in this list is a set of column indices, each with the
|
||||
/// property that the collection contains at most one record with each
|
||||
/// distinct set of values for each column. Alternately, for a specific set
|
||||
/// of values assigned to the these columns there is at most one record.
|
||||
///
|
||||
/// A collection can contain multiple sets of keys, although it is common to
|
||||
/// have either zero or one sets of key indices.
|
||||
#[serde(default)]
|
||||
pub keys: Vec<Vec<usize>>,
|
||||
}
|
||||
|
||||
impl RelationType {
|
||||
/// Constructs a `RelationType` representing the relation with no columns and
|
||||
/// no keys.
|
||||
pub fn empty() -> Self {
|
||||
RelationType::new(vec![])
|
||||
}
|
||||
|
||||
/// Constructs a new `RelationType` from specified column types.
|
||||
///
|
||||
/// The `RelationType` will have no keys.
|
||||
pub fn new(column_types: Vec<ColumnType>) -> Self {
|
||||
RelationType {
|
||||
column_types,
|
||||
keys: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a new key for the relation.
|
||||
pub fn with_key(mut self, mut indices: Vec<usize>) -> Self {
|
||||
indices.sort_unstable();
|
||||
if !self.keys.contains(&indices) {
|
||||
self.keys.push(indices);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_keys(mut self, keys: Vec<Vec<usize>>) -> Self {
|
||||
for key in keys {
|
||||
self = self.with_key(key)
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Computes the number of columns in the relation.
|
||||
pub fn arity(&self) -> usize {
|
||||
self.column_types.len()
|
||||
}
|
||||
|
||||
/// Gets the index of the columns used when creating a default index.
|
||||
pub fn default_key(&self) -> Vec<usize> {
|
||||
if let Some(key) = self.keys.first() {
|
||||
if key.is_empty() {
|
||||
(0..self.column_types.len()).collect()
|
||||
} else {
|
||||
key.clone()
|
||||
}
|
||||
} else {
|
||||
(0..self.column_types.len()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// True if any collection described by `self` could safely be described by `other`.
|
||||
///
|
||||
/// In practice this means checking that the scalar types match exactly, and that the
|
||||
/// nullability of `self` is at least as strict as `other`, and that all keys of `other`
|
||||
/// contain some key of `self` (as a set of key columns is less strict than any subset).
|
||||
pub fn subtypes(&self, other: &RelationType) -> bool {
|
||||
let all_keys = other.keys.iter().all(|key1| {
|
||||
self.keys
|
||||
.iter()
|
||||
.any(|key2| key1.iter().all(|k| key2.contains(k)))
|
||||
});
|
||||
if !all_keys {
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.column_types.len() != other.column_types.len() {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (col1, col2) in self.column_types.iter().zip(other.column_types.iter()) {
|
||||
if col1.nullable && !col2.nullable {
|
||||
return false;
|
||||
}
|
||||
if col1.scalar_type != col2.scalar_type {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// The type of a `Value`
|
||||
///
|
||||
/// [`ColumnType`] bundles information about the scalar type of a datum (e.g.,
|
||||
/// Int32 or String) with its nullability.
|
||||
///
|
||||
/// To construct a column type, either initialize the struct directly, or
|
||||
/// use the [`ScalarType::nullable`] method.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
pub struct ColumnType {
|
||||
/// The underlying scalar type (e.g., Int32 or String) of this column.
|
||||
pub scalar_type: ConcreteDataType,
|
||||
/// Whether this datum can be null.`
|
||||
#[serde(default = "return_true")]
|
||||
pub nullable: bool,
|
||||
}
|
||||
|
||||
/// This method exists solely for the purpose of making ColumnType nullable by
|
||||
/// default in unit tests. The default value of a bool is false, and the only
|
||||
/// way to make an object take on any other value by default is to pass it a
|
||||
/// function that returns the desired default value. See
|
||||
/// <https://github.com/serde-rs/serde/issues/1030>
|
||||
#[inline(always)]
|
||||
fn return_true() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// A description of the shape of a relation.
|
||||
///
|
||||
/// It bundles a [`RelationType`] with the name of each column in the relation.
|
||||
/// Individual column names are optional.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// A `RelationDesc`s is typically constructed via its builder API:
|
||||
///
|
||||
/// ```
|
||||
/// use mz_repr::{ColumnType, RelationDesc, ScalarType};
|
||||
///
|
||||
/// let desc = RelationDesc::empty()
|
||||
/// .with_column("id", ScalarType::Int64.nullable(false))
|
||||
/// .with_column("price", ScalarType::Float64.nullable(true));
|
||||
/// ```
|
||||
///
|
||||
/// In more complicated cases, like when constructing a `RelationDesc` in
|
||||
/// response to user input, it may be more convenient to construct a relation
|
||||
/// type first, and imbue it with column names to form a `RelationDesc` later:
|
||||
///
|
||||
/// ```
|
||||
/// use mz_repr::RelationDesc;
|
||||
///
|
||||
/// # fn plan_query(_: &str) -> mz_repr::RelationType { mz_repr::RelationType::new(vec![]) }
|
||||
/// let relation_type = plan_query("SELECT * FROM table");
|
||||
/// let names = (0..relation_type.arity()).map(|i| match i {
|
||||
/// 0 => "first",
|
||||
/// 1 => "second",
|
||||
/// _ => "unknown",
|
||||
/// });
|
||||
/// let desc = RelationDesc::new(relation_type, names);
|
||||
/// ```
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)]
|
||||
pub struct RelationDesc {
|
||||
typ: RelationType,
|
||||
names: Vec<ColumnName>,
|
||||
}
|
||||
|
||||
impl RelationDesc {
|
||||
/// Constructs a new `RelationDesc` that represents the empty relation
|
||||
/// with no columns and no keys.
|
||||
pub fn empty() -> Self {
|
||||
RelationDesc {
|
||||
typ: RelationType::empty(),
|
||||
names: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new `RelationDesc` from a `RelationType` and an iterator
|
||||
/// over column names.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the arity of the `RelationType` is not equal to the number of
|
||||
/// items in `names`.
|
||||
pub fn new<I, N>(typ: RelationType, names: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = N>,
|
||||
N: Into<ColumnName>,
|
||||
{
|
||||
let names: Vec<_> = names.into_iter().map(|name| name.into()).collect();
|
||||
assert_eq!(typ.column_types.len(), names.len());
|
||||
RelationDesc { typ, names }
|
||||
}
|
||||
|
||||
pub fn from_names_and_types<I, T, N>(iter: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = (N, T)>,
|
||||
T: Into<ColumnType>,
|
||||
N: Into<ColumnName>,
|
||||
{
|
||||
let (names, types): (Vec<_>, Vec<_>) = iter.into_iter().unzip();
|
||||
let types = types.into_iter().map(Into::into).collect();
|
||||
let typ = RelationType::new(types);
|
||||
Self::new(typ, names)
|
||||
}
|
||||
/// Concatenates a `RelationDesc` onto the end of this `RelationDesc`.
|
||||
pub fn concat(mut self, other: Self) -> Self {
|
||||
let self_len = self.typ.column_types.len();
|
||||
self.names.extend(other.names);
|
||||
self.typ.column_types.extend(other.typ.column_types);
|
||||
for k in other.typ.keys {
|
||||
let k = k.into_iter().map(|idx| idx + self_len).collect();
|
||||
self = self.with_key(k);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Appends a column with the specified name and type.
|
||||
pub fn with_column<N>(mut self, name: N, column_type: ColumnType) -> Self
|
||||
where
|
||||
N: Into<ColumnName>,
|
||||
{
|
||||
self.typ.column_types.push(column_type);
|
||||
self.names.push(name.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a new key for the relation.
|
||||
pub fn with_key(mut self, indices: Vec<usize>) -> Self {
|
||||
self.typ = self.typ.with_key(indices);
|
||||
self
|
||||
}
|
||||
|
||||
/// Drops all existing keys.
|
||||
pub fn without_keys(mut self) -> Self {
|
||||
self.typ.keys.clear();
|
||||
self
|
||||
}
|
||||
|
||||
/// Builds a new relation description with the column names replaced with
|
||||
/// new names.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the arity of the relation type does not match the number of
|
||||
/// items in `names`.
|
||||
pub fn with_names<I, N>(self, names: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = N>,
|
||||
N: Into<ColumnName>,
|
||||
{
|
||||
Self::new(self.typ, names)
|
||||
}
|
||||
|
||||
/// Computes the number of columns in the relation.
|
||||
pub fn arity(&self) -> usize {
|
||||
self.typ.arity()
|
||||
}
|
||||
|
||||
/// Returns the relation type underlying this relation description.
|
||||
pub fn typ(&self) -> &RelationType {
|
||||
&self.typ
|
||||
}
|
||||
|
||||
/// Returns an iterator over the columns in this relation.
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&ColumnName, &ColumnType)> {
|
||||
self.iter_names().zip(self.iter_types())
|
||||
}
|
||||
|
||||
/// Returns an iterator over the types of the columns in this relation.
|
||||
pub fn iter_types(&self) -> impl Iterator<Item = &ColumnType> {
|
||||
self.typ.column_types.iter()
|
||||
}
|
||||
|
||||
/// Returns an iterator over the names of the columns in this relation.
|
||||
pub fn iter_names(&self) -> impl Iterator<Item = &ColumnName> {
|
||||
self.names.iter()
|
||||
}
|
||||
|
||||
/// Finds a column by name.
|
||||
///
|
||||
/// Returns the index and type of the column named `name`. If no column with
|
||||
/// the specified name exists, returns `None`. If multiple columns have the
|
||||
/// specified name, the leftmost column is returned.
|
||||
pub fn get_by_name(&self, name: &ColumnName) -> Option<(usize, &ColumnType)> {
|
||||
self.iter_names()
|
||||
.position(|n| n == name)
|
||||
.map(|i| (i, &self.typ.column_types[i]))
|
||||
}
|
||||
|
||||
/// Gets the name of the `i`th column.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `i` is not a valid column index.
|
||||
pub fn get_name(&self, i: usize) -> &ColumnName {
|
||||
&self.names[i]
|
||||
}
|
||||
|
||||
/// Mutably gets the name of the `i`th column.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `i` is not a valid column index.
|
||||
pub fn get_name_mut(&mut self, i: usize) -> &mut ColumnName {
|
||||
&mut self.names[i]
|
||||
}
|
||||
|
||||
/// Gets the name of the `i`th column if that column name is unambiguous.
|
||||
///
|
||||
/// If at least one other column has the same name as the `i`th column,
|
||||
/// returns `None`. If the `i`th column has no name, returns `None`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `i` is not a valid column index.
|
||||
pub fn get_unambiguous_name(&self, i: usize) -> Option<&ColumnName> {
|
||||
let name = &self.names[i];
|
||||
if self.iter_names().filter(|n| *n == name).count() == 1 {
|
||||
Some(name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of a column in a [`RelationDesc`].
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
pub struct ColumnName(pub(crate) String);
|
||||
|
||||
impl ColumnName {
|
||||
/// Returns this column name as a `str`.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Returns a mutable reference to the string underlying this column name.
|
||||
pub fn as_mut_str(&mut self) -> &mut String {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
1
src/flow/src/repr/timestamp.rs
Normal file
1
src/flow/src/repr/timestamp.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
28
src/flow/src/storage/errors.rs
Normal file
28
src/flow/src/storage/errors.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// TODO(discord9): more error types
|
||||
#[derive(Ord, PartialOrd, Clone, Debug, Eq, Deserialize, Serialize, PartialEq, Hash)]
|
||||
pub enum DataflowError {
|
||||
EvalError(Box<EvalError>),
|
||||
}
|
||||
|
||||
impl From<EvalError> for DataflowError {
|
||||
fn from(e: EvalError) -> Self {
|
||||
DataflowError::EvalError(Box::new(e))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Ord, PartialOrd, Clone, Debug, Eq, Deserialize, Serialize, PartialEq, Hash)]
|
||||
pub enum EvalError {
|
||||
DivisionByZero,
|
||||
TypeMismatch(String),
|
||||
InvalidArgument(String),
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tell_goal() {
|
||||
use differential_dataflow::ExchangeData;
|
||||
fn a<T: ExchangeData>(_: T) {}
|
||||
a(DataflowError::from(EvalError::DivisionByZero));
|
||||
}
|
||||
4
src/flow/src/storage/mod.rs
Normal file
4
src/flow/src/storage/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! TODO: Storage Layer: wrap grpc write request for providing definite collection for streaming process, and able to send read request should random access is needed
|
||||
//! and store result of stream processing
|
||||
|
||||
pub(crate) mod errors;
|
||||
0
src/flow/src/storage/source.rs
Normal file
0
src/flow/src/storage/source.rs
Normal file
150
src/flow/src/util/buffer.rs
Normal file
150
src/flow/src/util/buffer.rs
Normal file
@@ -0,0 +1,150 @@
|
||||
use differential_dataflow::consolidation::consolidate_updates;
|
||||
use differential_dataflow::difference::Semigroup;
|
||||
use differential_dataflow::Data;
|
||||
use timely::communication::Push;
|
||||
use timely::dataflow::channels::Bundle;
|
||||
use timely::dataflow::operators::generic::OutputHandle;
|
||||
use timely::dataflow::operators::{Capability, InputCapability};
|
||||
use timely::progress::Timestamp;
|
||||
|
||||
/// A buffer that consolidates updates
|
||||
///
|
||||
/// The buffer implements a wrapper around [OutputHandle] consolidating elements pushed to it. It is
|
||||
/// backed by a capacity-limited buffer, which means that compaction only occurs within the
|
||||
/// dimensions of the buffer, i.e. the number of unique keys is less than half of the buffer's
|
||||
/// capacity.
|
||||
///
|
||||
/// A cap is retained whenever the current time changes to be able to flush on drop or when the time
|
||||
/// changes again.
|
||||
///
|
||||
/// The buffer is filled with updates until it reaches its capacity. At this point, the updates are
|
||||
/// consolidated to free up space. This process repeats until the consolidation recovered less than
|
||||
/// half of the buffer's capacity, at which point the buffer will be shipped.
|
||||
///
|
||||
/// The buffer retains a capability to send data on flush. It will flush all data once dropped, if
|
||||
/// time changes, or if the buffer capacity is reached.
|
||||
pub struct ConsolidateBuffer<'a, 'b, T, D: Data, R: Semigroup, P>
|
||||
where
|
||||
P: Push<Bundle<T, (D, T, R)>> + 'a,
|
||||
T: Data + Timestamp + 'a,
|
||||
D: 'a,
|
||||
{
|
||||
// a buffer for records, to send at self.cap
|
||||
// Invariant: Buffer only contains data if cap is Some.
|
||||
buffer: Vec<(D, T, R)>,
|
||||
output_handle: &'b mut OutputHandle<'a, T, (D, T, R), P>,
|
||||
cap: Option<Capability<T>>,
|
||||
port: usize,
|
||||
previous_len: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'b, T, D: Data, R: Semigroup, P> ConsolidateBuffer<'a, 'b, T, D, R, P>
|
||||
where
|
||||
T: Data + Timestamp + 'a,
|
||||
P: Push<Bundle<T, (D, T, R)>> + 'a,
|
||||
{
|
||||
/// Create a new [ConsolidateBuffer], wrapping the provided session.
|
||||
///
|
||||
/// * `output_handle`: The output to send data to.
|
||||
/// * 'port': The output port to retain capabilities for.
|
||||
pub fn new(output_handle: &'b mut OutputHandle<'a, T, (D, T, R), P>, port: usize) -> Self {
|
||||
Self {
|
||||
output_handle,
|
||||
port,
|
||||
cap: None,
|
||||
buffer: Vec::with_capacity(::timely::container::buffer::default_capacity::<(D, T, R)>()),
|
||||
previous_len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Provides an iterator of elements to the buffer
|
||||
pub fn give_iterator<I: Iterator<Item = (D, T, R)>>(
|
||||
&mut self,
|
||||
cap: &InputCapability<T>,
|
||||
iter: I,
|
||||
) {
|
||||
for item in iter {
|
||||
self.give(cap, item);
|
||||
}
|
||||
}
|
||||
|
||||
/// Give an element to the buffer
|
||||
pub fn give(&mut self, cap: &InputCapability<T>, data: (D, T, R)) {
|
||||
// Retain a cap for the current time, which will be used on flush.
|
||||
if self.cap.as_ref().map_or(true, |t| t.time() != cap.time()) {
|
||||
// Flush on capability change
|
||||
self.flush();
|
||||
// Retain capability for the specified output port.
|
||||
self.cap = Some(cap.delayed_for_output(cap.time(), self.port));
|
||||
}
|
||||
self.give_internal(data);
|
||||
}
|
||||
|
||||
/// Give an element to the buffer, using a pre-fabricated capability. Note that the capability
|
||||
/// must be valid for the associated output.
|
||||
pub fn give_at(&mut self, cap: &Capability<T>, data: (D, T, R)) {
|
||||
// Retain a cap for the current time, which will be used on flush.
|
||||
if self.cap.as_ref().map_or(true, |t| t.time() != cap.time()) {
|
||||
// Flush on capability change
|
||||
self.flush();
|
||||
// Retain capability.
|
||||
self.cap = Some(cap.clone());
|
||||
}
|
||||
self.give_internal(data);
|
||||
}
|
||||
|
||||
/// Give an element and possibly flush the buffer. Note that this needs to have access
|
||||
/// to a capability, which the public functions ensure.
|
||||
fn give_internal(&mut self, data: (D, T, R)) {
|
||||
self.buffer.push(data);
|
||||
|
||||
// Limit, if possible, the lifetime of the allocations for data
|
||||
// and consolidate smaller buffers if we're in the lucky case
|
||||
// of a small domain for D
|
||||
if self.buffer.len() >= 2 * self.previous_len {
|
||||
// Consolidate while the consolidation frees at least half the buffer
|
||||
consolidate_updates(&mut self.buffer);
|
||||
if self.buffer.len() > self.buffer.capacity() / 2 {
|
||||
self.flush();
|
||||
} else {
|
||||
self.previous_len = self.buffer.len();
|
||||
}
|
||||
// At this point, it is an invariant across give calls that self.previous_len
|
||||
// will be in the interval [0, self.buffer.capacity() / 2]. So, we will enter
|
||||
// this if-statement block again when self.buffer.len() == self.buffer.capacity()
|
||||
// or earlier. If consolidation is not effective to keep self.buffer.len()
|
||||
// below half capacity, then flushing when more than half-full will
|
||||
// maintain the invariant.
|
||||
}
|
||||
}
|
||||
|
||||
/// Flush the internal buffer to the underlying session
|
||||
pub fn flush(&mut self) {
|
||||
if let Some(cap) = &self.cap {
|
||||
self.output_handle.session(cap).give_vec(&mut self.buffer);
|
||||
|
||||
// Ensure that the capacity is at least equal to the default in case
|
||||
// it was reduced by give_vec. Note that we cannot rely here on give_vec
|
||||
// returning us a buffer with zero capacity.
|
||||
if self.buffer.capacity() < ::timely::container::buffer::default_capacity::<(D, T, R)>()
|
||||
{
|
||||
let to_reserve = ::timely::container::buffer::default_capacity::<(D, T, R)>()
|
||||
- self.buffer.capacity();
|
||||
self.buffer.reserve_exact(to_reserve);
|
||||
}
|
||||
self.previous_len = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b, T, D: Data, R: Semigroup, P> Drop for ConsolidateBuffer<'a, 'b, T, D, R, P>
|
||||
where
|
||||
P: Push<Bundle<T, (D, T, R)>> + 'a,
|
||||
T: Data + Timestamp + 'a,
|
||||
D: 'a,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
self.flush();
|
||||
}
|
||||
}
|
||||
7
src/flow/src/util/mod.rs
Normal file
7
src/flow/src/util/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
//! utilitys including extend differential dataflow to deal with errors and etc.
|
||||
mod buffer;
|
||||
mod operator;
|
||||
mod reduce;
|
||||
|
||||
pub use operator::CollectionExt;
|
||||
pub use reduce::ReduceExt;
|
||||
257
src/flow/src/util/operator.rs
Normal file
257
src/flow/src/util/operator.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
use differential_dataflow::difference::{Multiply, Semigroup};
|
||||
use differential_dataflow::lattice::Lattice;
|
||||
use differential_dataflow::operators::arrange::Arrange;
|
||||
use differential_dataflow::trace::{Batch, Trace, TraceReader};
|
||||
use differential_dataflow::{AsCollection, Collection};
|
||||
use timely::dataflow::channels::pact::{Exchange, ParallelizationContract, Pipeline};
|
||||
use timely::dataflow::channels::pushers::Tee;
|
||||
use timely::dataflow::operators::generic::builder_rc::OperatorBuilder as OperatorBuilderRc;
|
||||
use timely::dataflow::operators::generic::operator::{self, Operator};
|
||||
use timely::dataflow::operators::generic::{InputHandle, OperatorInfo, OutputHandle};
|
||||
use timely::dataflow::operators::Capability;
|
||||
use timely::dataflow::{Scope, Stream};
|
||||
use timely::{Data, ExchangeData};
|
||||
|
||||
use crate::util::buffer::ConsolidateBuffer;
|
||||
|
||||
pub trait StreamExt<G, D1>
|
||||
where
|
||||
D1: Data,
|
||||
G: Scope,
|
||||
{
|
||||
/// Like `timely::dataflow::operators::generic::operator::Operator::unary`,
|
||||
/// but the logic function can handle failures.
|
||||
///
|
||||
/// Creates a new dataflow operator that partitions its input stream by a
|
||||
/// parallelization strategy `pact` and repeatedly invokes `logic`, the
|
||||
/// function returned by the function passed as `constructor`. The `logic`
|
||||
/// function can read to the input stream and write to either of two output
|
||||
/// streams, where the first output stream represents successful
|
||||
/// computations and the second output stream represents failed
|
||||
/// computations.
|
||||
fn unary_fallible<D2, E, B, P>(
|
||||
&self,
|
||||
pact: P,
|
||||
name: &str,
|
||||
constructor: B,
|
||||
) -> (Stream<G, D2>, Stream<G, E>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
B: FnOnce(
|
||||
Capability<G::Timestamp>,
|
||||
OperatorInfo,
|
||||
) -> Box<
|
||||
dyn FnMut(
|
||||
&mut InputHandle<G::Timestamp, D1, P::Puller>,
|
||||
&mut OutputHandle<G::Timestamp, D2, Tee<G::Timestamp, D2>>,
|
||||
&mut OutputHandle<G::Timestamp, E, Tee<G::Timestamp, E>>,
|
||||
) + 'static,
|
||||
>,
|
||||
P: ParallelizationContract<G::Timestamp, D1>;
|
||||
|
||||
/// Like [`timely::dataflow::operators::map::Map::flat_map`], but `logic`
|
||||
/// is allowed to fail. The first returned stream will contain the
|
||||
/// successful applications of `logic`, while the second returned stream
|
||||
/// will contain the failed applications.
|
||||
fn flat_map_fallible<D2, E, I, L>(&self, name: &str, logic: L) -> (Stream<G, D2>, Stream<G, E>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
I: IntoIterator<Item = Result<D2, E>>,
|
||||
L: FnMut(D1) -> I + 'static;
|
||||
}
|
||||
|
||||
/// Extension methods for differential [`Collection`]s.
|
||||
pub trait CollectionExt<G, D1, R>
|
||||
where
|
||||
G: Scope,
|
||||
R: Semigroup,
|
||||
{
|
||||
/// Creates a new empty collection in `scope`.
|
||||
fn empty(scope: &G) -> Collection<G, D1, R>;
|
||||
|
||||
/// Like [`Collection::map`], but `logic` is allowed to fail. The first
|
||||
/// returned collection will contain successful applications of `logic`,
|
||||
/// while the second returned collection will contain the failed
|
||||
/// applications.
|
||||
fn map_fallible<D2, E, L>(
|
||||
&self,
|
||||
name: &str,
|
||||
mut logic: L,
|
||||
) -> (Collection<G, D2, R>, Collection<G, E, R>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
L: FnMut(D1) -> Result<D2, E> + 'static,
|
||||
{
|
||||
self.flat_map_fallible(name, move |record| Some(logic(record)))
|
||||
}
|
||||
|
||||
/// Like [`Collection::flat_map`], but `logic` is allowed to fail. The first
|
||||
/// returned collection will contain the successful applications of `logic`,
|
||||
/// while the second returned collection will contain the failed
|
||||
/// applications.
|
||||
fn flat_map_fallible<D2, E, I, L>(
|
||||
&self,
|
||||
name: &str,
|
||||
logic: L,
|
||||
) -> (Collection<G, D2, R>, Collection<G, E, R>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
I: IntoIterator<Item = Result<D2, E>>,
|
||||
L: FnMut(D1) -> I + 'static;
|
||||
|
||||
/// Replaces each record with another, with a new difference type.
|
||||
///
|
||||
/// This method is most commonly used to take records containing aggregatable data (e.g. numbers to be summed)
|
||||
/// and move the data into the difference component. This will allow differential dataflow to update in-place.
|
||||
fn explode_one<D2, R2, L>(&self, logic: L) -> Collection<G, D2, <R2 as Multiply<R>>::Output>
|
||||
where
|
||||
D2: differential_dataflow::Data,
|
||||
R2: Semigroup + Multiply<R>,
|
||||
<R2 as Multiply<R>>::Output: Data + Semigroup,
|
||||
L: FnMut(D1) -> (D2, R2) + 'static,
|
||||
G::Timestamp: Lattice;
|
||||
}
|
||||
|
||||
impl<G, D1> StreamExt<G, D1> for Stream<G, D1>
|
||||
where
|
||||
D1: Data,
|
||||
G: Scope,
|
||||
{
|
||||
fn unary_fallible<D2, E, B, P>(
|
||||
&self,
|
||||
pact: P,
|
||||
name: &str,
|
||||
constructor: B,
|
||||
) -> (Stream<G, D2>, Stream<G, E>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
B: FnOnce(
|
||||
Capability<G::Timestamp>,
|
||||
OperatorInfo,
|
||||
) -> Box<
|
||||
dyn FnMut(
|
||||
&mut InputHandle<G::Timestamp, D1, P::Puller>,
|
||||
&mut OutputHandle<G::Timestamp, D2, Tee<G::Timestamp, D2>>,
|
||||
&mut OutputHandle<G::Timestamp, E, Tee<G::Timestamp, E>>,
|
||||
) + 'static,
|
||||
>,
|
||||
P: ParallelizationContract<G::Timestamp, D1>,
|
||||
{
|
||||
let mut builder = OperatorBuilderRc::new(name.into(), self.scope());
|
||||
builder.set_notify(false);
|
||||
|
||||
let operator_info = builder.operator_info();
|
||||
|
||||
let mut input = builder.new_input(self, pact);
|
||||
let (mut ok_output, ok_stream) = builder.new_output();
|
||||
let (mut err_output, err_stream) = builder.new_output();
|
||||
|
||||
builder.build(move |mut capabilities| {
|
||||
// `capabilities` should be a single-element vector.
|
||||
let capability = capabilities.pop().unwrap();
|
||||
let mut logic = constructor(capability, operator_info);
|
||||
move |_frontiers| {
|
||||
let mut ok_output_handle = ok_output.activate();
|
||||
let mut err_output_handle = err_output.activate();
|
||||
logic(&mut input, &mut ok_output_handle, &mut err_output_handle);
|
||||
}
|
||||
});
|
||||
|
||||
(ok_stream, err_stream)
|
||||
}
|
||||
|
||||
#[allow(clippy::redundant_closure)]
|
||||
fn flat_map_fallible<D2, E, I, L>(
|
||||
&self,
|
||||
name: &str,
|
||||
mut logic: L,
|
||||
) -> (Stream<G, D2>, Stream<G, E>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
I: IntoIterator<Item = Result<D2, E>>,
|
||||
L: FnMut(D1) -> I + 'static,
|
||||
{
|
||||
let mut storage = Vec::new();
|
||||
self.unary_fallible(Pipeline, name, move |_, _| {
|
||||
Box::new(move |input, ok_output, err_output| {
|
||||
input.for_each(|time, data| {
|
||||
let mut ok_session = ok_output.session(&time);
|
||||
let mut err_session = err_output.session(&time);
|
||||
data.swap(&mut storage);
|
||||
for r in storage.drain(..).flat_map(|d1| logic(d1)) {
|
||||
match r {
|
||||
Ok(d2) => ok_session.give(d2),
|
||||
Err(e) => err_session.give(e),
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<G, D1, R> CollectionExt<G, D1, R> for Collection<G, D1, R>
|
||||
where
|
||||
G: Scope,
|
||||
G::Timestamp: Data,
|
||||
D1: Data,
|
||||
R: Semigroup,
|
||||
{
|
||||
fn empty(scope: &G) -> Collection<G, D1, R> {
|
||||
operator::empty(scope).as_collection()
|
||||
}
|
||||
|
||||
fn flat_map_fallible<D2, E, I, L>(
|
||||
&self,
|
||||
name: &str,
|
||||
mut logic: L,
|
||||
) -> (Collection<G, D2, R>, Collection<G, E, R>)
|
||||
where
|
||||
D2: Data,
|
||||
E: Data,
|
||||
I: IntoIterator<Item = Result<D2, E>>,
|
||||
L: FnMut(D1) -> I + 'static,
|
||||
{
|
||||
let (ok_stream, err_stream) = self.inner.flat_map_fallible(name, move |(d1, t, r)| {
|
||||
logic(d1).into_iter().map(move |res| match res {
|
||||
Ok(d2) => Ok((d2, t.clone(), r.clone())),
|
||||
Err(e) => Err((e, t.clone(), r.clone())),
|
||||
})
|
||||
});
|
||||
(ok_stream.as_collection(), err_stream.as_collection())
|
||||
}
|
||||
|
||||
fn explode_one<D2, R2, L>(&self, mut logic: L) -> Collection<G, D2, <R2 as Multiply<R>>::Output>
|
||||
where
|
||||
D2: differential_dataflow::Data,
|
||||
R2: Semigroup + Multiply<R>,
|
||||
<R2 as Multiply<R>>::Output: Data + Semigroup,
|
||||
L: FnMut(D1) -> (D2, R2) + 'static,
|
||||
G::Timestamp: Lattice,
|
||||
{
|
||||
self.inner
|
||||
.unary(Pipeline, "ExplodeOne", move |_, _| {
|
||||
let mut buffer = Vec::new();
|
||||
move |input, output| {
|
||||
let mut out = ConsolidateBuffer::new(output, 0);
|
||||
input.for_each(|time, data| {
|
||||
data.swap(&mut buffer);
|
||||
out.give_iterator(
|
||||
&time,
|
||||
buffer.drain(..).map(|(x, t, d)| {
|
||||
let (x, d2) = logic(x);
|
||||
(x, t, d2.multiply(&d))
|
||||
}),
|
||||
);
|
||||
});
|
||||
}
|
||||
})
|
||||
.as_collection()
|
||||
}
|
||||
}
|
||||
68
src/flow/src/util/reduce.rs
Normal file
68
src/flow/src/util/reduce.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
use differential_dataflow::difference::{Abelian, Semigroup};
|
||||
use differential_dataflow::lattice::Lattice;
|
||||
use differential_dataflow::operators::arrange::{Arranged, TraceAgent};
|
||||
use differential_dataflow::operators::reduce::ReduceCore;
|
||||
use differential_dataflow::trace::{Batch, Trace, TraceReader};
|
||||
use differential_dataflow::Data;
|
||||
use timely::dataflow::Scope;
|
||||
|
||||
/// Extension trait for `ReduceCore`, currently providing a reduction based
|
||||
/// on an operator-pair approach.
|
||||
pub trait ReduceExt<G: Scope, K: Data, V: Data, R: Semigroup>
|
||||
where
|
||||
G::Timestamp: Lattice + Ord,
|
||||
{
|
||||
/// This method produces a reduction pair based on the same input arrangement. Each reduction
|
||||
/// in the pair operates with its own logic and the two output arrangements from the reductions
|
||||
/// are produced as a result. The method is useful for reductions that need to present different
|
||||
/// output views on the same input data. An example is producing an error-free reduction output
|
||||
/// along with a separate error output indicating when the error-free output is valid.
|
||||
fn reduce_pair<L1, T1, L2, T2>(
|
||||
&self,
|
||||
name1: &str,
|
||||
name2: &str,
|
||||
logic1: L1,
|
||||
logic2: L2,
|
||||
) -> (Arranged<G, TraceAgent<T1>>, Arranged<G, TraceAgent<T2>>)
|
||||
where
|
||||
T1: Trace + TraceReader<Key = K, Time = G::Timestamp> + 'static,
|
||||
T1::Val: Data,
|
||||
T1::R: Abelian,
|
||||
T1::Batch: Batch,
|
||||
L1: FnMut(&K, &[(&V, R)], &mut Vec<(T1::Val, T1::R)>) + 'static,
|
||||
T2: Trace + TraceReader<Key = K, Time = G::Timestamp> + 'static,
|
||||
T2::Val: Data,
|
||||
T2::R: Abelian,
|
||||
T2::Batch: Batch,
|
||||
L2: FnMut(&K, &[(&V, R)], &mut Vec<(T2::Val, T2::R)>) + 'static;
|
||||
}
|
||||
|
||||
impl<G: Scope, K: Data, V: Data, Tr, R: Semigroup> ReduceExt<G, K, V, R> for Arranged<G, Tr>
|
||||
where
|
||||
G::Timestamp: Lattice + Ord,
|
||||
Tr: TraceReader<Key = K, Val = V, Time = G::Timestamp, R = R> + Clone + 'static,
|
||||
{
|
||||
fn reduce_pair<L1, T1, L2, T2>(
|
||||
&self,
|
||||
name1: &str,
|
||||
name2: &str,
|
||||
logic1: L1,
|
||||
logic2: L2,
|
||||
) -> (Arranged<G, TraceAgent<T1>>, Arranged<G, TraceAgent<T2>>)
|
||||
where
|
||||
T1: Trace + TraceReader<Key = K, Time = G::Timestamp> + 'static,
|
||||
T1::Val: Data,
|
||||
T1::R: Abelian,
|
||||
T1::Batch: Batch,
|
||||
L1: FnMut(&K, &[(&V, R)], &mut Vec<(T1::Val, T1::R)>) + 'static,
|
||||
T2: Trace + TraceReader<Key = K, Time = G::Timestamp> + 'static,
|
||||
T2::Val: Data,
|
||||
T2::R: Abelian,
|
||||
T2::Batch: Batch,
|
||||
L2: FnMut(&K, &[(&V, R)], &mut Vec<(T2::Val, T2::R)>) + 'static,
|
||||
{
|
||||
let arranged1 = self.reduce_abelian::<L1, T1>(name1, logic1);
|
||||
let arranged2 = self.reduce_abelian::<L2, T2>(name2, logic2);
|
||||
(arranged1, arranged2)
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,7 @@ datatypes = { workspace = true }
|
||||
file-table-engine = { workspace = true }
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
humantime-serde = "1.1"
|
||||
humantime-serde.workspace = true
|
||||
itertools.workspace = true
|
||||
meta-client = { workspace = true }
|
||||
# Although it is not used, please do not delete it.
|
||||
|
||||
@@ -38,7 +38,7 @@ use catalog::remote::CachedMetaKvBackend;
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_base::Plugins;
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_catalog::consts::default_engine;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
@@ -213,7 +213,6 @@ impl Instance {
|
||||
let create_expr_factory = CreateExprFactory;
|
||||
|
||||
let row_inserter = Arc::new(RowInserter::new(
|
||||
MITO_ENGINE.to_string(),
|
||||
catalog_manager.clone(),
|
||||
create_expr_factory,
|
||||
dist_instance.clone(),
|
||||
@@ -286,7 +285,6 @@ impl Instance {
|
||||
let grpc_query_handler = StandaloneGrpcQueryHandler::arc(dn_instance.clone());
|
||||
|
||||
let row_inserter = Arc::new(RowInserter::new(
|
||||
MITO_ENGINE.to_string(),
|
||||
catalog_manager.clone(),
|
||||
create_expr_factory,
|
||||
grpc_query_handler.clone(),
|
||||
@@ -366,7 +364,7 @@ impl Instance {
|
||||
catalog_name, schema_name, table_name,
|
||||
);
|
||||
let _ = self
|
||||
.create_table_by_columns(ctx, table_name, columns, MITO_ENGINE)
|
||||
.create_table_by_columns(ctx, table_name, columns, default_engine())
|
||||
.await?;
|
||||
info!(
|
||||
"Successfully created table on insertion: {}.{}.{}",
|
||||
|
||||
@@ -34,7 +34,7 @@ use client::Database;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::schema_name::{SchemaNameKey, SchemaNameValue};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::ddl::{DdlTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_meta::rpc::router::{Partition, Partition as MetaPartition, RouteRequest};
|
||||
@@ -68,7 +68,7 @@ use crate::error::{
|
||||
self, AlterExprToRequestSnafu, CatalogSnafu, ColumnDataTypeSnafu, ColumnNotFoundSnafu,
|
||||
DeserializePartitionSnafu, InvokeDatanodeSnafu, NotSupportedSnafu, ParseSqlSnafu,
|
||||
RequestDatanodeSnafu, RequestMetaSnafu, Result, SchemaExistsSnafu, TableAlreadyExistSnafu,
|
||||
TableNotFoundSnafu, TableSnafu, UnrecognizedTableOptionSnafu,
|
||||
TableMetadataManagerSnafu, TableNotFoundSnafu, TableSnafu, UnrecognizedTableOptionSnafu,
|
||||
};
|
||||
use crate::expr_factory;
|
||||
use crate::instance::distributed::deleter::DistDeleter;
|
||||
@@ -104,6 +104,18 @@ impl DistInstance {
|
||||
partitions: Option<Partitions>,
|
||||
) -> Result<TableRef> {
|
||||
let _timer = common_telemetry::timer!(crate::metrics::DIST_CREATE_TABLE);
|
||||
// 1. get schema info
|
||||
let schema_value = self
|
||||
.catalog_manager
|
||||
.table_metadata_manager_ref()
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
))
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let table_name = TableName::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
@@ -112,7 +124,7 @@ impl DistInstance {
|
||||
|
||||
let (partitions, partition_cols) = parse_partitions(create_table, partitions)?;
|
||||
|
||||
let mut table_info = create_table_info(create_table, partition_cols)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols, schema_value)?;
|
||||
|
||||
let resp = self
|
||||
.create_table_procedure(create_table, partitions, table_info.clone())
|
||||
@@ -340,6 +352,7 @@ impl DistInstance {
|
||||
let expr = CreateDatabaseExpr {
|
||||
database_name: stmt.name.to_string(),
|
||||
create_if_not_exists: stmt.if_not_exists,
|
||||
options: Default::default(),
|
||||
};
|
||||
self.handle_create_database(expr, query_ctx).await
|
||||
}
|
||||
@@ -477,10 +490,12 @@ impl DistInstance {
|
||||
}
|
||||
);
|
||||
|
||||
let schema_value =
|
||||
SchemaNameValue::try_from(&expr.options).context(error::TableMetadataManagerSnafu)?;
|
||||
self.catalog_manager
|
||||
.table_metadata_manager_ref()
|
||||
.schema_manager()
|
||||
.create(schema)
|
||||
.create(schema, Some(schema_value))
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?;
|
||||
|
||||
@@ -745,7 +760,7 @@ fn create_partitions_stmt(partitions: Vec<PartitionInfo>) -> Result<Option<Parti
|
||||
.into_iter()
|
||||
.map(|info| {
|
||||
// Generated the partition name from id
|
||||
let name = &format!("r{}", info.id.as_u64());
|
||||
let name = &format!("r{}", info.id.region_number());
|
||||
let bounds = info.partition.partition_bounds();
|
||||
let value_list = bounds
|
||||
.iter()
|
||||
@@ -772,6 +787,7 @@ fn create_partitions_stmt(partitions: Vec<PartitionInfo>) -> Result<Option<Parti
|
||||
fn create_table_info(
|
||||
create_table: &CreateTableExpr,
|
||||
partition_columns: Vec<String>,
|
||||
schema_opts: Option<SchemaNameValue>,
|
||||
) -> Result<RawTableInfo> {
|
||||
let mut column_schemas = Vec::with_capacity(create_table.column_defs.len());
|
||||
let mut column_name_to_index_map = HashMap::new();
|
||||
@@ -818,6 +834,10 @@ fn create_table_info(
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let table_options = TableOptions::try_from(&create_table.table_options)
|
||||
.context(UnrecognizedTableOptionSnafu)?;
|
||||
let table_options = merge_options(table_options, schema_opts);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: raw_schema,
|
||||
primary_key_indices,
|
||||
@@ -826,8 +846,7 @@ fn create_table_info(
|
||||
next_column_id: column_schemas.len() as u32,
|
||||
region_numbers: vec![],
|
||||
engine_options: HashMap::new(),
|
||||
options: TableOptions::try_from(&create_table.table_options)
|
||||
.context(UnrecognizedTableOptionSnafu)?,
|
||||
options: table_options,
|
||||
created_on: DateTime::default(),
|
||||
partition_key_indices,
|
||||
};
|
||||
@@ -854,6 +873,14 @@ fn create_table_info(
|
||||
Ok(table_info)
|
||||
}
|
||||
|
||||
fn merge_options(
|
||||
mut table_opts: TableOptions,
|
||||
schema_opts: Option<SchemaNameValue>,
|
||||
) -> TableOptions {
|
||||
table_opts.ttl = table_opts.ttl.or(schema_opts.and_then(|s| s.ttl));
|
||||
table_opts
|
||||
}
|
||||
|
||||
fn parse_partitions(
|
||||
create_table: &CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
|
||||
@@ -205,7 +205,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
schema_manager
|
||||
.create(SchemaNameKey::default())
|
||||
.create(SchemaNameKey::default(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -154,9 +154,9 @@ impl PromStoreProtocolHandler for Instance {
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::PromStoreWrite)
|
||||
.context(AuthSnafu)?;
|
||||
let (requests, samples) = prom_store::to_grpc_insert_requests(request)?;
|
||||
let (requests, samples) = prom_store::to_grpc_row_insert_requests(request)?;
|
||||
let _ = self
|
||||
.handle_inserts(requests, ctx)
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
@@ -17,6 +17,7 @@ use api::v1::ddl_request::Expr;
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::{AlterExpr, ColumnSchema, DdlRequest, Row, RowInsertRequest, RowInsertRequests};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::consts::default_engine;
|
||||
use common_grpc_expr::util::{extract_new_columns, ColumnExpr};
|
||||
use common_query::Output;
|
||||
use common_telemetry::info;
|
||||
@@ -30,7 +31,6 @@ use crate::error::{CatalogSnafu, EmptyDataSnafu, Error, FindNewColumnsOnInsertio
|
||||
use crate::expr_factory::CreateExprFactory;
|
||||
|
||||
pub struct RowInserter {
|
||||
engine_name: String,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
create_expr_factory: CreateExprFactory,
|
||||
grpc_query_handler: GrpcQueryHandlerRef<Error>,
|
||||
@@ -38,13 +38,11 @@ pub struct RowInserter {
|
||||
|
||||
impl RowInserter {
|
||||
pub fn new(
|
||||
engine_name: String,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
create_expr_factory: CreateExprFactory,
|
||||
grpc_query_handler: GrpcQueryHandlerRef<Error>,
|
||||
) -> Self {
|
||||
Self {
|
||||
engine_name,
|
||||
catalog_manager,
|
||||
create_expr_factory,
|
||||
grpc_query_handler,
|
||||
@@ -105,7 +103,7 @@ impl RowInserter {
|
||||
let (column_schemas, _) = extract_schema_and_rows(req)?;
|
||||
let create_table_expr = self
|
||||
.create_expr_factory
|
||||
.create_table_expr_by_column_schemas(table_name, column_schemas, &self.engine_name)?;
|
||||
.create_table_expr_by_column_schemas(table_name, column_schemas, default_engine())?;
|
||||
|
||||
let req = Request::Ddl(DdlRequest {
|
||||
expr: Some(Expr::CreateTable(create_table_expr)),
|
||||
|
||||
@@ -44,6 +44,7 @@ serde_json = "1.0"
|
||||
servers = { workspace = true }
|
||||
snafu.workspace = true
|
||||
store-api = { workspace = true }
|
||||
strum.workspace = true
|
||||
table = { workspace = true }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tokio.workspace = true
|
||||
@@ -56,6 +57,7 @@ uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
client = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test = { workspace = true }
|
||||
session = { workspace = true }
|
||||
tracing = "0.1"
|
||||
|
||||
@@ -516,6 +516,9 @@ pub enum Error {
|
||||
operation: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Primary key '{key}' not found when creating region request, at {location}"))]
|
||||
PrimaryKeyNotFound { key: String, location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -570,6 +573,7 @@ impl ErrorExt for Error {
|
||||
| Error::UnsupportedSelectorType { .. }
|
||||
| Error::InvalidArguments { .. }
|
||||
| Error::InvalidHeartbeatRequest { .. }
|
||||
| Error::PrimaryKeyNotFound { .. }
|
||||
| Error::TooManyPartitions { .. } => StatusCode::InvalidArguments,
|
||||
Error::LeaseKeyFromUtf8 { .. }
|
||||
| Error::LeaseValueFromUtf8 { .. }
|
||||
|
||||
@@ -90,7 +90,7 @@ impl MetadataService for DefaultMetadataService {
|
||||
if !exist {
|
||||
self.table_metadata_manager
|
||||
.schema_manager()
|
||||
.create(schema)
|
||||
.create(schema, None)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?;
|
||||
|
||||
|
||||
@@ -19,7 +19,4 @@ pub(crate) const METRIC_META_ROUTE_REQUEST: &str = "meta.route_request";
|
||||
pub(crate) const METRIC_META_HEARTBEAT_CONNECTION_NUM: &str = "meta.heartbeat_connection_num";
|
||||
pub(crate) const METRIC_META_HANDLER_EXECUTE: &str = "meta.handler_execute";
|
||||
|
||||
pub(crate) const METRIC_META_CREATE_TABLE_PROCEDURE_CREATE_META: &str =
|
||||
"meta.procedure.create_table.create_meta";
|
||||
pub(crate) const METRIC_META_CREATE_TABLE_PROCEDURE_CREATE_TABLE: &str =
|
||||
"meta.procedure.create_table.create_table";
|
||||
pub(crate) const METRIC_META_PROCEDURE_CREATE_TABLE: &str = "meta.procedure.create_table";
|
||||
|
||||
@@ -12,8 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::region::region_request::Body as PbRegionRequest;
|
||||
use api::v1::region::{ColumnDef, CreateRequest as PbCreateRegionRequest};
|
||||
use api::v1::SemanticType;
|
||||
use async_trait::async_trait;
|
||||
use client::region::RegionRequester;
|
||||
use client::Database;
|
||||
use common_catalog::consts::MITO2_ENGINE;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
@@ -25,13 +30,16 @@ use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
|
||||
use common_telemetry::info;
|
||||
use futures::future::join_all;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::engine::TableReference;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use strum::AsRefStr;
|
||||
use table::engine::{region_dir, TableReference};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
use super::utils::{handle_request_datanode_error, handle_retry_error};
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result, TableMetadataManagerSnafu};
|
||||
use crate::error::{self, PrimaryKeyNotFoundSnafu, Result, TableMetadataManagerSnafu};
|
||||
use crate::metrics;
|
||||
|
||||
pub struct CreateTableProcedure {
|
||||
context: DdlContext,
|
||||
@@ -69,6 +77,10 @@ impl CreateTableProcedure {
|
||||
&self.creator.data.task.table_info
|
||||
}
|
||||
|
||||
fn table_id(&self) -> TableId {
|
||||
self.table_info().ident.table_id
|
||||
}
|
||||
|
||||
pub fn region_routes(&self) -> &Vec<RegionRoute> {
|
||||
&self.creator.data.region_routes
|
||||
}
|
||||
@@ -99,17 +111,126 @@ impl CreateTableProcedure {
|
||||
return Ok(Status::Done);
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::DatanodeCreateTable;
|
||||
self.creator.data.state = if expr.engine == MITO2_ENGINE {
|
||||
CreateTableState::DatanodeCreateRegions
|
||||
} else {
|
||||
CreateTableState::DatanodeCreateTable
|
||||
};
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
fn create_region_request_template(&self) -> Result<PbCreateRegionRequest> {
|
||||
let create_table_expr = &self.creator.data.task.create_table;
|
||||
|
||||
let column_defs = create_table_expr
|
||||
.column_defs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, c)| {
|
||||
let semantic_type = if create_table_expr.time_index == c.name {
|
||||
SemanticType::Timestamp
|
||||
} else if create_table_expr.primary_keys.contains(&c.name) {
|
||||
SemanticType::Tag
|
||||
} else {
|
||||
SemanticType::Field
|
||||
};
|
||||
|
||||
ColumnDef {
|
||||
name: c.name.clone(),
|
||||
column_id: i as u32,
|
||||
datatype: c.datatype,
|
||||
is_nullable: c.is_nullable,
|
||||
default_constraint: c.default_constraint.clone(),
|
||||
semantic_type: semantic_type as i32,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let primary_key = create_table_expr
|
||||
.primary_keys
|
||||
.iter()
|
||||
.map(|key| {
|
||||
column_defs
|
||||
.iter()
|
||||
.find_map(|c| {
|
||||
if &c.name == key {
|
||||
Some(c.column_id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.context(PrimaryKeyNotFoundSnafu { key })
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(PbCreateRegionRequest {
|
||||
region_id: 0,
|
||||
engine: create_table_expr.engine.to_string(),
|
||||
column_defs,
|
||||
primary_key,
|
||||
create_if_not_exists: true,
|
||||
region_dir: "".to_string(),
|
||||
options: create_table_expr.table_options.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn on_datanode_create_regions(&mut self) -> Result<Status> {
|
||||
let create_table_data = &self.creator.data;
|
||||
let region_routes = &create_table_data.region_routes;
|
||||
|
||||
let create_table_expr = &create_table_data.task.create_table;
|
||||
let catalog = &create_table_expr.catalog_name;
|
||||
let schema = &create_table_expr.schema_name;
|
||||
|
||||
let request_template = self.create_region_request_template()?;
|
||||
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut create_table_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
for datanode in leaders {
|
||||
let clients = self.context.datanode_clients.clone();
|
||||
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let requests = regions
|
||||
.iter()
|
||||
.map(|region_number| {
|
||||
let region_id = RegionId::new(self.table_id(), *region_number);
|
||||
|
||||
let mut create_table_request = request_template.clone();
|
||||
create_table_request.region_id = region_id.as_u64();
|
||||
create_table_request.region_dir = region_dir(catalog, schema, region_id);
|
||||
|
||||
PbRegionRequest::Create(create_table_request)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
create_table_tasks.push(common_runtime::spawn_bg(async move {
|
||||
for request in requests {
|
||||
let client = clients.get_client(&datanode).await;
|
||||
let requester = RegionRequester::new(client);
|
||||
|
||||
if let Err(err) = requester.handle(request).await {
|
||||
return Err(handle_request_datanode_error(datanode)(err));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
}
|
||||
|
||||
join_all(create_table_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|e| e.context(error::JoinSnafu).flatten())
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
self.creator.data.state = CreateTableState::CreateMetadata;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
async fn on_create_metadata(&self) -> Result<Status> {
|
||||
let _timer = common_telemetry::timer!(
|
||||
crate::metrics::METRIC_META_CREATE_TABLE_PROCEDURE_CREATE_META
|
||||
);
|
||||
|
||||
let table_id = self.table_info().ident.table_id as TableId;
|
||||
let table_id = self.table_id();
|
||||
let manager = &self.context.table_metadata_manager;
|
||||
|
||||
let raw_table_info = self.table_info().clone();
|
||||
@@ -124,15 +245,12 @@ impl CreateTableProcedure {
|
||||
}
|
||||
|
||||
async fn on_datanode_create_table(&mut self) -> Result<Status> {
|
||||
let _timer = common_telemetry::timer!(
|
||||
crate::metrics::METRIC_META_CREATE_TABLE_PROCEDURE_CREATE_TABLE
|
||||
);
|
||||
let region_routes = &self.creator.data.region_routes;
|
||||
let table_name = self.table_name();
|
||||
let clients = self.context.datanode_clients.clone();
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut joins = Vec::with_capacity(leaders.len());
|
||||
let table_id = self.table_info().ident.table_id;
|
||||
let table_id = self.table_id();
|
||||
|
||||
for datanode in leaders {
|
||||
let client = clients.get_client(&datanode).await;
|
||||
@@ -172,9 +290,17 @@ impl Procedure for CreateTableProcedure {
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
match self.creator.data.state {
|
||||
let state = &self.creator.data.state;
|
||||
|
||||
let _timer = common_telemetry::timer!(
|
||||
metrics::METRIC_META_PROCEDURE_CREATE_TABLE,
|
||||
&[("step", state.as_ref().to_string())]
|
||||
);
|
||||
|
||||
match state {
|
||||
CreateTableState::Prepare => self.on_prepare().await,
|
||||
CreateTableState::DatanodeCreateTable => self.on_datanode_create_table().await,
|
||||
CreateTableState::DatanodeCreateRegions => self.on_datanode_create_regions().await,
|
||||
CreateTableState::CreateMetadata => self.on_create_metadata().await,
|
||||
}
|
||||
.map_err(handle_retry_error)
|
||||
@@ -213,12 +339,14 @@ impl TableCreator {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr)]
|
||||
enum CreateTableState {
|
||||
/// Prepares to create the table
|
||||
Prepare,
|
||||
/// Datanode creates the table
|
||||
DatanodeCreateTable,
|
||||
/// Create regions on the Datanode
|
||||
DatanodeCreateRegions,
|
||||
/// Creates metadata
|
||||
CreateMetadata,
|
||||
}
|
||||
@@ -236,3 +364,323 @@ impl CreateTableData {
|
||||
self.task.table_ref()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use api::v1::region::region_server::RegionServer;
|
||||
use api::v1::region::RegionResponse;
|
||||
use api::v1::{
|
||||
ColumnDataType, ColumnDef as PbColumnDef, CreateTableExpr, ResponseHeader,
|
||||
Status as PbStatus,
|
||||
};
|
||||
use chrono::DateTime;
|
||||
use client::client_manager::DatanodeClients;
|
||||
use client::Client;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::peer::Peer;
|
||||
use common_runtime::{Builder as RuntimeBuilder, Runtime};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use servers::grpc::region_server::{RegionServerHandler, RegionServerRequestHandler};
|
||||
use table::metadata::{RawTableMeta, TableIdent, TableType};
|
||||
use table::requests::TableOptions;
|
||||
use tokio::sync::mpsc;
|
||||
use tonic::transport::Server;
|
||||
use tower::service_fn;
|
||||
|
||||
use super::*;
|
||||
use crate::handler::{HeartbeatMailbox, Pushers};
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::store::kv::KvBackendAdapter;
|
||||
use crate::service::store::memory::MemStore;
|
||||
use crate::test_util::new_region_route;
|
||||
|
||||
fn create_table_procedure() -> CreateTableProcedure {
|
||||
let create_table_expr = CreateTableExpr {
|
||||
catalog_name: "my_catalog".to_string(),
|
||||
schema_name: "my_schema".to_string(),
|
||||
table_name: "my_table".to_string(),
|
||||
desc: "blabla".to_string(),
|
||||
column_defs: vec![
|
||||
PbColumnDef {
|
||||
name: "ts".to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
PbColumnDef {
|
||||
name: "my_tag1".to_string(),
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
PbColumnDef {
|
||||
name: "my_tag2".to_string(),
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
PbColumnDef {
|
||||
name: "my_field_column".to_string(),
|
||||
datatype: ColumnDataType::Int32 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
],
|
||||
time_index: "ts".to_string(),
|
||||
primary_keys: vec!["my_tag2".to_string(), "my_tag1".to_string()],
|
||||
create_if_not_exists: false,
|
||||
table_options: HashMap::new(),
|
||||
table_id: None,
|
||||
region_numbers: vec![1, 2, 3],
|
||||
engine: MITO2_ENGINE.to_string(),
|
||||
};
|
||||
|
||||
let raw_table_info = RawTableInfo {
|
||||
ident: TableIdent::new(42),
|
||||
name: "my_table".to_string(),
|
||||
desc: Some("blabla".to_string()),
|
||||
catalog_name: "my_catalog".to_string(),
|
||||
schema_name: "my_schema".to_string(),
|
||||
meta: RawTableMeta {
|
||||
schema: RawSchema {
|
||||
column_schemas: vec![
|
||||
ColumnSchema::new(
|
||||
"ts".to_string(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"my_tag1".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"my_tag2".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"my_field_column".to_string(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
true,
|
||||
),
|
||||
],
|
||||
timestamp_index: Some(0),
|
||||
version: 0,
|
||||
},
|
||||
primary_key_indices: vec![1, 2],
|
||||
value_indices: vec![2],
|
||||
engine: MITO2_ENGINE.to_string(),
|
||||
next_column_id: 3,
|
||||
region_numbers: vec![1, 2, 3],
|
||||
engine_options: HashMap::new(),
|
||||
options: TableOptions::default(),
|
||||
created_on: DateTime::default(),
|
||||
partition_key_indices: vec![],
|
||||
},
|
||||
table_type: TableType::Base,
|
||||
};
|
||||
|
||||
let peers = vec![
|
||||
Peer::new(1, "127.0.0.1:4001"),
|
||||
Peer::new(2, "127.0.0.1:4002"),
|
||||
Peer::new(3, "127.0.0.1:4003"),
|
||||
];
|
||||
let region_routes = vec![
|
||||
new_region_route(1, &peers, 3),
|
||||
new_region_route(2, &peers, 2),
|
||||
new_region_route(3, &peers, 1),
|
||||
];
|
||||
|
||||
let kv_store = Arc::new(MemStore::new());
|
||||
|
||||
let mailbox_sequence = Sequence::new("test_heartbeat_mailbox", 0, 100, kv_store.clone());
|
||||
let mailbox = HeartbeatMailbox::create(Pushers::default(), mailbox_sequence);
|
||||
|
||||
CreateTableProcedure::new(
|
||||
1,
|
||||
CreateTableTask::new(create_table_expr, vec![], raw_table_info),
|
||||
region_routes,
|
||||
DdlContext {
|
||||
datanode_clients: Arc::new(DatanodeClients::default()),
|
||||
mailbox,
|
||||
server_addr: "127.0.0.1:4321".to_string(),
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(
|
||||
KvBackendAdapter::wrap(kv_store),
|
||||
)),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_region_request_template() {
|
||||
let procedure = create_table_procedure();
|
||||
|
||||
let template = procedure.create_region_request_template().unwrap();
|
||||
|
||||
let expected = PbCreateRegionRequest {
|
||||
region_id: 0,
|
||||
engine: MITO2_ENGINE.to_string(),
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "ts".to_string(),
|
||||
column_id: 0,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: vec![],
|
||||
semantic_type: SemanticType::Timestamp as i32,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "my_tag1".to_string(),
|
||||
column_id: 1,
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "my_tag2".to_string(),
|
||||
column_id: 2,
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "my_field_column".to_string(),
|
||||
column_id: 3,
|
||||
datatype: ColumnDataType::Int32 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
},
|
||||
],
|
||||
primary_key: vec![2, 1],
|
||||
create_if_not_exists: true,
|
||||
region_dir: "".to_string(),
|
||||
options: HashMap::new(),
|
||||
};
|
||||
assert_eq!(template, expected);
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TestingRegionServerHandler {
|
||||
runtime: Arc<Runtime>,
|
||||
create_region_notifier: mpsc::Sender<RegionId>,
|
||||
}
|
||||
|
||||
impl TestingRegionServerHandler {
|
||||
fn new(create_region_notifier: mpsc::Sender<RegionId>) -> Self {
|
||||
Self {
|
||||
runtime: Arc::new(RuntimeBuilder::default().worker_threads(2).build().unwrap()),
|
||||
create_region_notifier,
|
||||
}
|
||||
}
|
||||
|
||||
fn new_client(&self, datanode: &Peer) -> Client {
|
||||
let (client, server) = tokio::io::duplex(1024);
|
||||
|
||||
let handler =
|
||||
RegionServerRequestHandler::new(Arc::new(self.clone()), self.runtime.clone());
|
||||
|
||||
tokio::spawn(async move {
|
||||
Server::builder()
|
||||
.add_service(RegionServer::new(handler))
|
||||
.serve_with_incoming(futures::stream::iter(vec![Ok::<_, std::io::Error>(
|
||||
server,
|
||||
)]))
|
||||
.await
|
||||
});
|
||||
|
||||
let channel_manager = ChannelManager::new();
|
||||
let mut client = Some(client);
|
||||
channel_manager
|
||||
.reset_with_connector(
|
||||
datanode.addr.clone(),
|
||||
service_fn(move |_| {
|
||||
let client = client.take().unwrap();
|
||||
async move { Ok::<_, std::io::Error>(client) }
|
||||
}),
|
||||
)
|
||||
.unwrap();
|
||||
Client::with_manager_and_urls(channel_manager, vec![datanode.addr.clone()])
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RegionServerHandler for TestingRegionServerHandler {
|
||||
async fn handle(&self, request: PbRegionRequest) -> servers::error::Result<RegionResponse> {
|
||||
let PbRegionRequest::Create(request) = request else {
|
||||
unreachable!()
|
||||
};
|
||||
let region_id = request.region_id.into();
|
||||
|
||||
self.create_region_notifier.send(region_id).await.unwrap();
|
||||
|
||||
Ok(RegionResponse {
|
||||
header: Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: 0,
|
||||
err_msg: "".to_string(),
|
||||
}),
|
||||
}),
|
||||
affected_rows: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_datanode_create_regions() {
|
||||
let mut procedure = create_table_procedure();
|
||||
|
||||
let (tx, mut rx) = mpsc::channel(10);
|
||||
|
||||
let region_server = TestingRegionServerHandler::new(tx);
|
||||
|
||||
let datanodes = find_leaders(&procedure.creator.data.region_routes);
|
||||
for peer in datanodes {
|
||||
let client = region_server.new_client(&peer);
|
||||
procedure
|
||||
.context
|
||||
.datanode_clients
|
||||
.insert_client(peer, client)
|
||||
.await;
|
||||
}
|
||||
|
||||
let expected_created_regions = Arc::new(Mutex::new(HashSet::from([
|
||||
RegionId::new(42, 1),
|
||||
RegionId::new(42, 2),
|
||||
RegionId::new(42, 3),
|
||||
])));
|
||||
let handle = tokio::spawn({
|
||||
let expected_created_regions = expected_created_regions.clone();
|
||||
let mut max_recv = expected_created_regions.lock().unwrap().len();
|
||||
async move {
|
||||
while let Some(region_id) = rx.recv().await {
|
||||
expected_created_regions.lock().unwrap().remove(®ion_id);
|
||||
|
||||
max_recv -= 1;
|
||||
if max_recv == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let status = procedure.on_datanode_create_regions().await.unwrap();
|
||||
assert!(matches!(status, Status::Executing { persist: true }));
|
||||
assert!(matches!(
|
||||
procedure.creator.data.state,
|
||||
CreateTableState::CreateMetadata
|
||||
));
|
||||
|
||||
handle.await.unwrap();
|
||||
|
||||
assert!(expected_created_regions.lock().unwrap().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::key::datanode_table::DatanodeTableKey;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::{ClusterId, RegionIdent};
|
||||
use common_procedure::error::{
|
||||
@@ -168,6 +169,11 @@ impl RegionFailoverManager {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !self.failed_region_exists(failed_region).await? {
|
||||
// The failed region could be failover by another procedure.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let context = self.create_context();
|
||||
let procedure = RegionFailoverProcedure::new(failed_region.clone(), context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
@@ -207,6 +213,27 @@ impl RegionFailoverManager {
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn failed_region_exists(&self, failed_region: &RegionIdent) -> Result<bool> {
|
||||
let table_id = failed_region.table_ident.table_id;
|
||||
let datanode_id = failed_region.datanode_id;
|
||||
|
||||
let value = self
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.get(&DatanodeTableKey::new(datanode_id, table_id))
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
Ok(value
|
||||
.map(|value| {
|
||||
value
|
||||
.regions
|
||||
.iter()
|
||||
.any(|region| *region == failed_region.region_number)
|
||||
})
|
||||
.unwrap_or_default())
|
||||
}
|
||||
}
|
||||
|
||||
/// A "Node" in the state machine of region failover procedure.
|
||||
@@ -214,7 +241,7 @@ impl RegionFailoverManager {
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct Node {
|
||||
failed_region: RegionIdent,
|
||||
state: Option<Box<dyn State>>,
|
||||
state: Box<dyn State>,
|
||||
}
|
||||
|
||||
/// The "Context" of region failover procedure state machine.
|
||||
@@ -233,7 +260,7 @@ pub struct RegionFailoverContext {
|
||||
#[typetag::serde(tag = "region_failover_state")]
|
||||
trait State: Sync + Send + Debug {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>>;
|
||||
@@ -304,7 +331,7 @@ impl RegionFailoverProcedure {
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
failed_region,
|
||||
state: Some(Box::new(state)),
|
||||
state: Box::new(state),
|
||||
};
|
||||
Self { node, context }
|
||||
}
|
||||
@@ -322,25 +349,18 @@ impl Procedure for RegionFailoverProcedure {
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
if let Some(state) = self.node.state.take() {
|
||||
let next_state = state
|
||||
.next(&self.context, &self.node.failed_region)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if matches!(e, Error::RetryLater { .. }) {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
}
|
||||
})?;
|
||||
self.node.state = Some(next_state);
|
||||
}
|
||||
Ok(self
|
||||
.node
|
||||
.state
|
||||
.as_ref()
|
||||
.map(|s| s.status())
|
||||
.unwrap_or(Status::Done))
|
||||
let state = &mut self.node.state;
|
||||
*state = state
|
||||
.next(&self.context, &self.node.failed_region)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if matches!(e, Error::RetryLater { .. }) {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
}
|
||||
})?;
|
||||
Ok(state.status())
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
@@ -362,6 +382,7 @@ impl Procedure for RegionFailoverProcedure {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use api::v1::meta::mailbox_message::Payload;
|
||||
use api::v1::meta::{HeartbeatResponse, MailboxMessage, Peer, RequestHeader};
|
||||
@@ -370,7 +391,8 @@ mod tests {
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::DatanodeId;
|
||||
use common_procedure::BoxedProcedure;
|
||||
use common_procedure::{BoxedProcedure, ProcedureId};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use rand::prelude::SliceRandom;
|
||||
use tokio::sync::mpsc::Receiver;
|
||||
|
||||
@@ -452,6 +474,11 @@ mod tests {
|
||||
Self { selector: None }
|
||||
}
|
||||
|
||||
fn with_selector(mut self, selector: SelectorRef) -> Self {
|
||||
self.selector = Some(selector);
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn build(self) -> TestingEnv {
|
||||
let in_memory = Arc::new(MemStore::new());
|
||||
let kv_store: KvStoreRef = Arc::new(MemStore::new());
|
||||
@@ -531,8 +558,6 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_failover_procedure() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut env = TestingEnvBuilder::new().build().await;
|
||||
let failed_region = env.failed_region(1).await;
|
||||
|
||||
@@ -662,7 +687,7 @@ mod tests {
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
failed_region,
|
||||
state: Some(Box::new(state)),
|
||||
state: Box::new(state),
|
||||
};
|
||||
let procedure = RegionFailoverProcedure {
|
||||
node,
|
||||
@@ -677,7 +702,76 @@ mod tests {
|
||||
let n: Node = serde_json::from_str(&s).unwrap();
|
||||
assert_eq!(
|
||||
format!("{n:?}"),
|
||||
r#"Node { failed_region: RegionIdent { cluster_id: 0, datanode_id: 1, table_ident: TableIdent { catalog: "greptime", schema: "public", table: "my_table", table_id: 1, engine: "mito" }, region_number: 1 }, state: Some(RegionFailoverStart { failover_candidate: None }) }"#
|
||||
r#"Node { failed_region: RegionIdent { cluster_id: 0, datanode_id: 1, table_ident: TableIdent { catalog: "greptime", schema: "public", table: "my_table", table_id: 1, engine: "mito" }, region_number: 1 }, state: RegionFailoverStart { failover_candidate: None } }"#
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_state_not_changed_upon_failure() {
|
||||
struct MySelector {
|
||||
peers: Arc<Mutex<Vec<Option<Peer>>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Selector for MySelector {
|
||||
type Context = SelectorContext;
|
||||
type Output = Vec<Peer>;
|
||||
|
||||
async fn select(&self, _ns: Namespace, _ctx: &Self::Context) -> Result<Self::Output> {
|
||||
let mut peers = self.peers.lock().unwrap();
|
||||
Ok(if let Some(Some(peer)) = peers.pop() {
|
||||
vec![peer]
|
||||
} else {
|
||||
vec![]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a valid peer the second time called "select".
|
||||
let selector = MySelector {
|
||||
peers: Arc::new(Mutex::new(vec![
|
||||
Some(Peer {
|
||||
id: 42,
|
||||
addr: "".to_string(),
|
||||
}),
|
||||
None,
|
||||
])),
|
||||
};
|
||||
|
||||
let env = TestingEnvBuilder::new()
|
||||
.with_selector(Arc::new(selector))
|
||||
.build()
|
||||
.await;
|
||||
let failed_region = env.failed_region(1).await;
|
||||
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
failed_region,
|
||||
state: Box::new(state),
|
||||
};
|
||||
let mut procedure = RegionFailoverProcedure {
|
||||
node,
|
||||
context: env.context,
|
||||
};
|
||||
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
};
|
||||
|
||||
let result = procedure.execute(&ctx).await;
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().is_retry_later());
|
||||
assert_eq!(
|
||||
r#"{"region_failover_state":"RegionFailoverStart","failover_candidate":null}"#,
|
||||
serde_json::to_string(&procedure.node.state).unwrap()
|
||||
);
|
||||
|
||||
let result = procedure.execute(&ctx).await;
|
||||
assert!(matches!(result, Ok(Status::Executing { persist: true })));
|
||||
assert_eq!(
|
||||
r#"{"region_failover_state":"DeactivateRegion","candidate":{"id":42,"addr":""},"region_lease_expiry_seconds":40}"#,
|
||||
serde_json::to_string(&procedure.node.state).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ impl ActivateRegion {
|
||||
}
|
||||
|
||||
async fn handle_response(
|
||||
self,
|
||||
&self,
|
||||
mailbox_receiver: MailboxReceiver,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
@@ -102,7 +102,7 @@ impl ActivateRegion {
|
||||
.fail();
|
||||
};
|
||||
if result {
|
||||
Ok(Box::new(UpdateRegionMetadata::new(self.candidate)))
|
||||
Ok(Box::new(UpdateRegionMetadata::new(self.candidate.clone())))
|
||||
} else {
|
||||
// The region could be just indeed cannot be opened by the candidate, retry
|
||||
// would be in vain. Then why not just end the failover procedure? Because we
|
||||
@@ -131,7 +131,7 @@ impl ActivateRegion {
|
||||
#[typetag::serde]
|
||||
impl State for ActivateRegion {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
|
||||
@@ -76,7 +76,7 @@ impl DeactivateRegion {
|
||||
}
|
||||
|
||||
async fn handle_response(
|
||||
self,
|
||||
&self,
|
||||
ctx: &RegionFailoverContext,
|
||||
mailbox_receiver: MailboxReceiver,
|
||||
failed_region: &RegionIdent,
|
||||
@@ -98,7 +98,7 @@ impl DeactivateRegion {
|
||||
.deregister_inactive_region(failed_region)
|
||||
.await?;
|
||||
|
||||
Ok(Box::new(ActivateRegion::new(self.candidate)))
|
||||
Ok(Box::new(ActivateRegion::new(self.candidate.clone())))
|
||||
} else {
|
||||
// Under rare circumstances would a Datanode fail to close a Region.
|
||||
// So simply retry.
|
||||
@@ -114,7 +114,7 @@ impl DeactivateRegion {
|
||||
// resides might be unreachable. So we wait for the region lease to expire. The
|
||||
// region would be closed by its own [RegionAliveKeeper].
|
||||
self.wait_for_region_lease_expiry().await;
|
||||
Ok(Box::new(ActivateRegion::new(self.candidate)))
|
||||
Ok(Box::new(ActivateRegion::new(self.candidate.clone())))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
@@ -132,7 +132,7 @@ impl DeactivateRegion {
|
||||
#[typetag::serde]
|
||||
impl State for DeactivateRegion {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
@@ -144,7 +144,7 @@ impl State for DeactivateRegion {
|
||||
Err(Error::PusherNotFound { .. }) => {
|
||||
// See the mailbox received timeout situation comments above.
|
||||
self.wait_for_region_lease_expiry().await;
|
||||
return Ok(Box::new(ActivateRegion::new(self.candidate)));
|
||||
return Ok(Box::new(ActivateRegion::new(self.candidate.clone())));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
@@ -26,12 +26,8 @@ pub(super) struct RegionFailoverEnd;
|
||||
#[async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for RegionFailoverEnd {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
_: &RegionFailoverContext,
|
||||
_: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
Ok(self)
|
||||
async fn next(&mut self, _: &RegionFailoverContext, _: &RegionIdent) -> Result<Box<dyn State>> {
|
||||
Ok(Box::new(RegionFailoverEnd))
|
||||
}
|
||||
|
||||
fn status(&self) -> Status {
|
||||
|
||||
@@ -91,7 +91,7 @@ impl RegionFailoverStart {
|
||||
#[typetag::serde]
|
||||
impl State for RegionFailoverStart {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
|
||||
@@ -58,7 +58,7 @@ impl InvalidateCache {
|
||||
#[typetag::serde]
|
||||
impl State for InvalidateCache {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
@@ -108,12 +108,11 @@ mod tests {
|
||||
let _ = heartbeat_receivers.insert(frontend_id, rx);
|
||||
}
|
||||
|
||||
let state = InvalidateCache;
|
||||
let table_ident: TableIdent = failed_region.clone().into();
|
||||
|
||||
// lexicographical order
|
||||
// frontend-4,5,6,7
|
||||
let next_state = Box::new(state)
|
||||
let next_state = InvalidateCache
|
||||
.next(&context, &failed_region)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -72,7 +72,7 @@ impl UpdateRegionMetadata {
|
||||
let mut new_region_routes = table_route_value.region_routes.clone();
|
||||
|
||||
for region_route in new_region_routes.iter_mut() {
|
||||
if region_route.region.id == failed_region.region_number as u64 {
|
||||
if region_route.region.id.region_number() == failed_region.region_number {
|
||||
region_route.leader_peer = Some(self.candidate.clone());
|
||||
break;
|
||||
}
|
||||
@@ -131,7 +131,7 @@ fn pretty_log_table_route_change(
|
||||
#[typetag::serde]
|
||||
impl State for UpdateRegionMetadata {
|
||||
async fn next(
|
||||
mut self: Box<Self>,
|
||||
&mut self,
|
||||
ctx: &RegionFailoverContext,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Result<Box<dyn State>> {
|
||||
@@ -158,19 +158,16 @@ mod tests {
|
||||
|
||||
use super::super::tests::{TestingEnv, TestingEnvBuilder};
|
||||
use super::{State, *};
|
||||
use crate::table_routes::tests::new_region_route;
|
||||
use crate::test_util::new_region_route;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_next_state() {
|
||||
let env = TestingEnvBuilder::new().build().await;
|
||||
let failed_region = env.failed_region(1).await;
|
||||
|
||||
let state = UpdateRegionMetadata::new(Peer::new(2, ""));
|
||||
let mut state = UpdateRegionMetadata::new(Peer::new(2, ""));
|
||||
|
||||
let next_state = Box::new(state)
|
||||
.next(&env.context, &failed_region)
|
||||
.await
|
||||
.unwrap();
|
||||
let next_state = state.next(&env.context, &failed_region).await.unwrap();
|
||||
assert_eq!(format!("{next_state:?}"), "InvalidateCache");
|
||||
}
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ async fn handle_create_table_task(
|
||||
.submit_create_table_task(cluster_id, create_table_task, region_routes)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is dropped via procedure_id {id:?}");
|
||||
info!("Table: {table_id} is created via procedure_id {id:?}");
|
||||
|
||||
Ok(SubmitDdlTaskResponse {
|
||||
key: id.to_string().into(),
|
||||
@@ -178,8 +178,8 @@ async fn handle_create_region_routes(
|
||||
// If the peers are not enough, some peers will be used for multiple partitions.
|
||||
peers.truncate(partitions.len());
|
||||
|
||||
let id = table_id_sequence.next().await?;
|
||||
table_info.ident.table_id = id as u32;
|
||||
let table_id = table_id_sequence.next().await? as u32;
|
||||
table_info.ident.table_id = table_id;
|
||||
|
||||
ensure!(
|
||||
partitions.len() <= MAX_REGION_SEQ as usize,
|
||||
@@ -191,7 +191,7 @@ async fn handle_create_region_routes(
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: RegionId::from_u64(i as u64),
|
||||
id: RegionId::new(table_id, i as u32),
|
||||
partition: Some(partition.into()),
|
||||
..Default::default()
|
||||
};
|
||||
@@ -238,6 +238,8 @@ async fn handle_drop_table_task(
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is dropped via procedure_id {id:?}");
|
||||
|
||||
Ok(SubmitDdlTaskResponse {
|
||||
key: id.to_string().into(),
|
||||
..Default::default()
|
||||
@@ -289,7 +291,7 @@ async fn handle_alter_table_task(
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is altering via procedure_id {id:?}");
|
||||
info!("Table: {table_id} is altered via procedure_id {id:?}");
|
||||
|
||||
Ok(SubmitDdlTaskResponse {
|
||||
key: id.to_string().into(),
|
||||
|
||||
@@ -72,7 +72,7 @@ pub(crate) async fn fetch_tables(
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::DateTime;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
@@ -141,25 +141,4 @@ pub(crate) mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub(crate) fn new_region_route(
|
||||
region_number: u64,
|
||||
peers: &[Peer],
|
||||
leader_node: u64,
|
||||
) -> RegionRoute {
|
||||
let region = Region {
|
||||
id: region_number.into(),
|
||||
name: "".to_string(),
|
||||
partition: None,
|
||||
attrs: BTreeMap::new(),
|
||||
};
|
||||
|
||||
let leader_peer = peers.iter().find(|peer| peer.id == leader_node).cloned();
|
||||
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer,
|
||||
follower_peers: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
|
||||
use crate::cluster::MetaPeerClientBuilder;
|
||||
@@ -28,6 +30,21 @@ use crate::sequence::Sequence;
|
||||
use crate::service::store::kv::KvBackendAdapter;
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
pub(crate) fn new_region_route(region_id: u64, peers: &[Peer], leader_node: u64) -> RegionRoute {
|
||||
let region = Region {
|
||||
id: region_id.into(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let leader_peer = peers.iter().find(|peer| peer.id == leader_node).cloned();
|
||||
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer,
|
||||
follower_peers: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_region_failover_manager() -> Arc<RegionFailoverManager> {
|
||||
let kv_store = Arc::new(MemStore::new());
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ serde_json = "1.0"
|
||||
snafu.workspace = true
|
||||
storage = { workspace = true }
|
||||
store-api = { workspace = true }
|
||||
strum = "0.21"
|
||||
strum.workspace = true
|
||||
table = { workspace = true }
|
||||
tokio-util.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
59
src/mito2/src/access_layer.rs
Normal file
59
src/mito2/src/access_layer.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use object_store::{util, ObjectStore};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{DeleteSstSnafu, Result};
|
||||
use crate::sst::file::FileId;
|
||||
|
||||
pub type AccessLayerRef = Arc<AccessLayer>;
|
||||
|
||||
/// Sst access layer.
|
||||
pub struct AccessLayer {
|
||||
sst_dir: String,
|
||||
object_store: ObjectStore,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for AccessLayer {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("AccessLayer")
|
||||
.field("sst_dir", &self.sst_dir)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl AccessLayer {
|
||||
pub fn new(sst_dir: &str, object_store: ObjectStore) -> AccessLayer {
|
||||
AccessLayer {
|
||||
sst_dir: sst_dir.to_string(),
|
||||
object_store,
|
||||
}
|
||||
}
|
||||
|
||||
fn sst_file_path(&self, file_name: &str) -> String {
|
||||
util::join_path(&self.sst_dir, file_name)
|
||||
}
|
||||
|
||||
/// Deletes a SST file with given file id.
|
||||
pub async fn delete_sst(&self, file_id: FileId) -> Result<()> {
|
||||
let path = self.sst_file_path(&file_id.as_parquet());
|
||||
self.object_store
|
||||
.delete(&path)
|
||||
.await
|
||||
.context(DeleteSstSnafu { file_id })
|
||||
}
|
||||
}
|
||||
@@ -19,16 +19,23 @@ mod tests;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{RecvSnafu, Result};
|
||||
use crate::request::{RegionTask, RequestBody};
|
||||
use crate::error::{RecvSnafu, RegionNotFoundSnafu, Result};
|
||||
use crate::flush::WriteBufferManagerImpl;
|
||||
use crate::read::scan_region::{ScanRegion, Scanner};
|
||||
use crate::request::WorkerRequest;
|
||||
use crate::worker::WorkerGroup;
|
||||
|
||||
/// Region engine implementation for timeseries data.
|
||||
@@ -72,12 +79,24 @@ impl MitoEngine {
|
||||
pub fn is_region_exists(&self, region_id: RegionId) -> bool {
|
||||
self.inner.workers.is_region_exists(region_id)
|
||||
}
|
||||
|
||||
/// Handles the scan `request` and returns a [Scanner] for the `request`.
|
||||
fn handle_query(&self, region_id: RegionId, request: ScanRequest) -> Result<Scanner> {
|
||||
self.inner.handle_query(region_id, request)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_region(&self, id: RegionId) -> Option<crate::region::MitoRegionRef> {
|
||||
self.inner.workers.get_region(id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Inner struct of [MitoEngine].
|
||||
struct EngineInner {
|
||||
/// Region workers group.
|
||||
workers: WorkerGroup,
|
||||
/// Shared object store of all regions.
|
||||
object_store: ObjectStore,
|
||||
}
|
||||
|
||||
impl EngineInner {
|
||||
@@ -87,8 +106,16 @@ impl EngineInner {
|
||||
log_store: Arc<S>,
|
||||
object_store: ObjectStore,
|
||||
) -> EngineInner {
|
||||
let write_buffer_manager = Arc::new(WriteBufferManagerImpl {});
|
||||
|
||||
EngineInner {
|
||||
workers: WorkerGroup::start(config, log_store, object_store),
|
||||
workers: WorkerGroup::start(
|
||||
config,
|
||||
log_store,
|
||||
object_store.clone(),
|
||||
write_buffer_manager,
|
||||
),
|
||||
object_store,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,12 +124,76 @@ impl EngineInner {
|
||||
self.workers.stop().await
|
||||
}
|
||||
|
||||
/// Handles [RequestBody] and return its executed result.
|
||||
/// Get metadata of a region.
|
||||
///
|
||||
/// Returns error if the region doesn't exist.
|
||||
fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef> {
|
||||
// Reading a region doesn't need to go through the region worker thread.
|
||||
let region = self
|
||||
.workers
|
||||
.get_region(region_id)
|
||||
.context(RegionNotFoundSnafu { region_id })?;
|
||||
Ok(region.metadata())
|
||||
}
|
||||
|
||||
/// Handles [RegionRequest] and return its executed result.
|
||||
async fn handle_request(&self, region_id: RegionId, request: RegionRequest) -> Result<Output> {
|
||||
let body = RequestBody::try_from_region_request(region_id, request)?;
|
||||
let (request, receiver) = RegionTask::from_request(region_id, body);
|
||||
self.workers.submit_to_worker(request).await?;
|
||||
let (request, receiver) = WorkerRequest::try_from_region_request(region_id, request)?;
|
||||
self.workers.submit_to_worker(region_id, request).await?;
|
||||
|
||||
receiver.await.context(RecvSnafu)?
|
||||
}
|
||||
|
||||
/// Handles the scan `request` and returns a [Scanner] for the `request`.
|
||||
fn handle_query(&self, region_id: RegionId, request: ScanRequest) -> Result<Scanner> {
|
||||
// Reading a region doesn't need to go through the region worker thread.
|
||||
let region = self
|
||||
.workers
|
||||
.get_region(region_id)
|
||||
.context(RegionNotFoundSnafu { region_id })?;
|
||||
let version = region.version();
|
||||
let scan_region = ScanRegion::new(
|
||||
version,
|
||||
region.region_dir.clone(),
|
||||
self.object_store.clone(),
|
||||
request,
|
||||
);
|
||||
|
||||
scan_region.scanner()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RegionEngine for MitoEngine {
|
||||
fn name(&self) -> &str {
|
||||
"MitoEngine"
|
||||
}
|
||||
|
||||
async fn handle_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> std::result::Result<Output, BoxedError> {
|
||||
self.inner
|
||||
.handle_request(region_id, request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
/// Handle substrait query and return a stream of record batches
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_region_id: RegionId,
|
||||
_request: ScanRequest,
|
||||
) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Retrieve region's metadata.
|
||||
async fn get_metadata(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
) -> std::result::Result<RegionMetadataRef, BoxedError> {
|
||||
self.inner.get_metadata(region_id).map_err(BoxedError::new)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,17 +19,19 @@ use std::collections::HashMap;
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{Row, Rows};
|
||||
use common_recordbatch::RecordBatches;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionOpenRequest, RegionPutRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::region::version::VersionControlData;
|
||||
use crate::test_util::{CreateRequestBuilder, TestEnv};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_new_stop() {
|
||||
let env = TestEnv::with_prefix("engine-stop");
|
||||
let mut env = TestEnv::with_prefix("engine-stop");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -56,7 +58,7 @@ async fn test_engine_new_stop() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_new_region() {
|
||||
let env = TestEnv::with_prefix("new-region");
|
||||
let mut env = TestEnv::with_prefix("new-region");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -71,7 +73,7 @@ async fn test_engine_create_new_region() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_region_if_not_exists() {
|
||||
let env = TestEnv::with_prefix("create-not-exists");
|
||||
let mut env = TestEnv::with_prefix("create-not-exists");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -90,7 +92,7 @@ async fn test_engine_create_region_if_not_exists() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_existing_region() {
|
||||
let env = TestEnv::with_prefix("create-existing");
|
||||
let mut env = TestEnv::with_prefix("create-existing");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -113,7 +115,7 @@ async fn test_engine_create_existing_region() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_open_empty() {
|
||||
let env = TestEnv::with_prefix("open-empty");
|
||||
let mut env = TestEnv::with_prefix("open-empty");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let err = engine
|
||||
@@ -135,7 +137,7 @@ async fn test_engine_open_empty() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_open_existing() {
|
||||
let env = TestEnv::with_prefix("open-exiting");
|
||||
let mut env = TestEnv::with_prefix("open-exiting");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -161,7 +163,7 @@ async fn test_engine_open_existing() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_close_region() {
|
||||
let env = TestEnv::with_prefix("close");
|
||||
let mut env = TestEnv::with_prefix("close");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -193,7 +195,7 @@ async fn test_engine_close_region() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_reopen_region() {
|
||||
let env = TestEnv::with_prefix("reopen-region");
|
||||
let mut env = TestEnv::with_prefix("reopen-region");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -235,8 +237,8 @@ fn column_metadata_to_column_schema(metadata: &ColumnMetadata) -> api::v1::Colum
|
||||
}
|
||||
}
|
||||
|
||||
fn build_rows(num_rows: usize) -> Vec<Row> {
|
||||
(0..num_rows)
|
||||
fn build_rows(start: usize, end: usize) -> Vec<Row> {
|
||||
(start..end)
|
||||
.map(|i| api::v1::Row {
|
||||
values: vec![
|
||||
api::v1::Value {
|
||||
@@ -246,7 +248,7 @@ fn build_rows(num_rows: usize) -> Vec<Row> {
|
||||
value_data: Some(ValueData::F64Value(i as f64)),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::TsMillisecondValue(i as i64)),
|
||||
value_data: Some(ValueData::TsMillisecondValue(i as i64 * 1000)),
|
||||
},
|
||||
],
|
||||
})
|
||||
@@ -255,7 +257,7 @@ fn build_rows(num_rows: usize) -> Vec<Row> {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_to_region() {
|
||||
let env = TestEnv::with_prefix("write-to-region");
|
||||
let mut env = TestEnv::with_prefix("write-to-region");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
@@ -274,7 +276,7 @@ async fn test_write_to_region() {
|
||||
let num_rows = 42;
|
||||
let rows = Rows {
|
||||
schema: column_schemas,
|
||||
rows: build_rows(num_rows),
|
||||
rows: build_rows(0, num_rows),
|
||||
};
|
||||
let output = engine
|
||||
.handle_request(region_id, RegionRequest::Put(RegionPutRequest { rows }))
|
||||
@@ -285,3 +287,136 @@ async fn test_write_to_region() {
|
||||
};
|
||||
assert_eq!(num_rows, rows_inserted);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_replay() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::with_prefix("region-replay");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
|
||||
let column_schemas = request
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(column_metadata_to_column_schema)
|
||||
.collect::<Vec<_>>();
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(0, 20),
|
||||
};
|
||||
let output = engine
|
||||
.handle_request(region_id, RegionRequest::Put(RegionPutRequest { rows }))
|
||||
.await
|
||||
.unwrap();
|
||||
let Output::AffectedRows(rows_inserted) = output else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(20, rows_inserted);
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schemas,
|
||||
rows: build_rows(20, 42),
|
||||
};
|
||||
let output = engine
|
||||
.handle_request(region_id, RegionRequest::Put(RegionPutRequest { rows }))
|
||||
.await
|
||||
.unwrap();
|
||||
let Output::AffectedRows(rows_inserted) = output else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(22, rows_inserted);
|
||||
|
||||
engine.stop().await.unwrap();
|
||||
|
||||
let engine = MitoEngine::new(
|
||||
MitoConfig::default(),
|
||||
env.get_logstore().unwrap(),
|
||||
env.get_object_store().unwrap(),
|
||||
);
|
||||
|
||||
let open_region = engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
engine: String::new(),
|
||||
region_dir,
|
||||
options: HashMap::default(),
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let Output::AffectedRows(rows) = open_region else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(0, rows);
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let scanner = engine.handle_query(region_id, request).unwrap();
|
||||
let stream = scanner.scan().await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(42, batches.iter().map(|b| b.num_rows()).sum::<usize>());
|
||||
|
||||
let region = engine.get_region(region_id).unwrap();
|
||||
let VersionControlData {
|
||||
committed_sequence,
|
||||
last_entry_id,
|
||||
..
|
||||
} = region.version_control.current();
|
||||
|
||||
assert_eq!(42, committed_sequence);
|
||||
assert_eq!(2, last_entry_id);
|
||||
|
||||
engine.stop().await.unwrap();
|
||||
}
|
||||
|
||||
// TODO(yingwen): build_rows() only generate one point for each series. We need to add tests
|
||||
// for series with multiple points and other cases.
|
||||
#[tokio::test]
|
||||
async fn test_write_query_region() {
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = request
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(column_metadata_to_column_schema)
|
||||
.collect::<Vec<_>>();
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schemas,
|
||||
rows: build_rows(0, 3),
|
||||
};
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Put(RegionPutRequest { rows }))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let scanner = engine.handle_query(region_id, request).unwrap();
|
||||
let stream = scanner.scan().await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user