mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-05 21:02:58 +00:00
Compare commits
25 Commits
v0.12.0-ni
...
v0.12.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
480b05c590 | ||
|
|
0de0fd80b0 | ||
|
|
059cb6fdc3 | ||
|
|
29218b5fe7 | ||
|
|
59e6ec0395 | ||
|
|
79ee230f2a | ||
|
|
0e4bd59fac | ||
|
|
6eccadbf73 | ||
|
|
f29a1c56e9 | ||
|
|
88c3d331a1 | ||
|
|
79acc9911e | ||
|
|
0a169980b7 | ||
|
|
c80d2a3222 | ||
|
|
116bdaf690 | ||
|
|
6341fb86c7 | ||
|
|
fa09e181be | ||
|
|
ab4663ec2b | ||
|
|
fac22575aa | ||
|
|
0e249f69cd | ||
|
|
5d1761f3e5 | ||
|
|
dba6da4d00 | ||
|
|
59b31372aa | ||
|
|
d6b8672e63 | ||
|
|
deaa1f9578 | ||
|
|
f378d218e9 |
669
Cargo.lock
generated
669
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -138,8 +138,8 @@ itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
local-ip-address = "0.6"
|
||||
loki-api = { git = "https://github.com/shuiyisong/tracing-loki", branch = "chore/prost_version" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
nalgebra = "0.33"
|
||||
@@ -278,12 +278,10 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls", rev = "46
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
# Apply a fix for pprof for unaligned pointer access
|
||||
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:0.2.5"
|
||||
|
||||
[build]
|
||||
pre-build = [
|
||||
"dpkg --add-architecture $CROSS_DEB_ARCH",
|
||||
@@ -5,3 +8,8 @@ pre-build = [
|
||||
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
|
||||
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
|
||||
]
|
||||
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"JEMALLOC_SYS_WITH_LG_PAGE",
|
||||
]
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<a href="https://greptime.com/product/cloud">GreptimeCloud</a> |
|
||||
<a href="https://docs.greptime.com/">User Guide</a> |
|
||||
<a href="https://greptimedb.rs/">API Docs</a> |
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/issues/3412">Roadmap 2024</a>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/issues/5446">Roadmap 2025</a>
|
||||
</h4>
|
||||
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
|
||||
@@ -116,7 +116,7 @@ docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
--name greptime --rm \
|
||||
greptime/greptimedb:latest standalone start \
|
||||
--http-addr 0.0.0.0:4000 \
|
||||
--rpc-addr 0.0.0.0:4001 \
|
||||
--rpc-bind-addr 0.0.0.0:4001 \
|
||||
--mysql-addr 0.0.0.0:4002 \
|
||||
--postgres-addr 0.0.0.0:4003
|
||||
```
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
| `grpc.tls.mode` | String | `disable` | TLS mode. |
|
||||
@@ -65,8 +65,8 @@
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -88,8 +88,9 @@
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
| `metadata_store.file_size` | String | `64MB` | The size of the metadata store log file. |
|
||||
| `metadata_store.purge_threshold` | String | `256MB` | The threshold of the metadata store size to trigger a purge. |
|
||||
| `metadata_store.purge_interval` | String | `1m` | The interval of the metadata store to trigger a purge. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
@@ -221,8 +222,8 @@
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1:4001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
| `grpc.tls.mode` | String | `disable` | TLS mode. |
|
||||
@@ -300,7 +301,7 @@
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
|
||||
| `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store` |
|
||||
@@ -376,19 +377,14 @@
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
|
||||
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
|
||||
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
|
||||
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
|
||||
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1:3001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:3001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
@@ -549,8 +545,8 @@
|
||||
| `flow` | -- | -- | flow engine options. |
|
||||
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
|
||||
@@ -19,26 +19,6 @@ init_regions_parallelism = 16
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Deprecated, use `grpc.addr` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
|
||||
## Deprecated, use `grpc.hostname` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_hostname = "127.0.0.1"
|
||||
|
||||
## Deprecated, use `grpc.runtime_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_runtime_size = 8
|
||||
|
||||
## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_max_recv_message_size = "512MB"
|
||||
|
||||
## Deprecated, use `grpc.rpc_max_send_message_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
@@ -56,10 +36,11 @@ body_limit = "64MB"
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:3001"
|
||||
## The hostname advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1:3001"
|
||||
bind_addr = "127.0.0.1:3001"
|
||||
## The address advertised to the metasrv, and used for connections from outside the host.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
|
||||
server_addr = "127.0.0.1:3001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
## The maximum receive message size for gRPC server.
|
||||
|
||||
@@ -14,10 +14,10 @@ node_id = 14
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:6800"
|
||||
## The hostname advertised to the metasrv,
|
||||
bind_addr = "127.0.0.1:6800"
|
||||
## The address advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1"
|
||||
server_addr = "127.0.0.1:6800"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
## The maximum receive message size for gRPC server.
|
||||
|
||||
@@ -41,10 +41,11 @@ cors_allowed_origins = ["https://example.com"]
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
## The hostname advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1:4001"
|
||||
bind_addr = "127.0.0.1:4001"
|
||||
## The address advertised to the metasrv, and used for connections from outside the host.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
|
||||
server_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ data_home = "/tmp/metasrv/"
|
||||
## The bind address of metasrv.
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
|
||||
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
## The communication server address for the frontend and datanode to connect to metasrv.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `bind_addr`.
|
||||
server_addr = "127.0.0.1:3002"
|
||||
|
||||
## Store server address default to etcd store.
|
||||
|
||||
@@ -49,7 +49,7 @@ cors_allowed_origins = ["https://example.com"]
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
bind_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
@@ -159,11 +159,11 @@ dir = "/tmp/greptimedb/wal"
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## The threshold of the WAL size to trigger a purge.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## The interval to trigger a purge.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "1m"
|
||||
|
||||
@@ -278,10 +278,12 @@ overwrite_entry_start_id = false
|
||||
|
||||
## Metadata storage options.
|
||||
[metadata_store]
|
||||
## Kv file size in bytes.
|
||||
file_size = "256MB"
|
||||
## Kv purge threshold.
|
||||
purge_threshold = "4GB"
|
||||
## The size of the metadata store log file.
|
||||
file_size = "64MB"
|
||||
## The threshold of the metadata store size to trigger a purge.
|
||||
purge_threshold = "256MB"
|
||||
## The interval of the metadata store to trigger a purge.
|
||||
purge_interval = "1m"
|
||||
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
|
||||
@@ -43,8 +43,8 @@ services:
|
||||
command:
|
||||
- metasrv
|
||||
- start
|
||||
- --bind-addr=0.0.0.0:3002
|
||||
- --server-addr=metasrv:3002
|
||||
- --rpc-bind-addr=0.0.0.0:3002
|
||||
- --rpc-server-addr=metasrv:3002
|
||||
- --store-addrs=etcd0:2379
|
||||
- --http-addr=0.0.0.0:3000
|
||||
healthcheck:
|
||||
@@ -68,8 +68,8 @@ services:
|
||||
- datanode
|
||||
- start
|
||||
- --node-id=0
|
||||
- --rpc-addr=0.0.0.0:3001
|
||||
- --rpc-hostname=datanode0:3001
|
||||
- --rpc-bind-addr=0.0.0.0:3001
|
||||
- --rpc-server-addr=datanode0:3001
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --http-addr=0.0.0.0:5000
|
||||
volumes:
|
||||
@@ -98,7 +98,7 @@ services:
|
||||
- start
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --http-addr=0.0.0.0:4000
|
||||
- --rpc-addr=0.0.0.0:4001
|
||||
- --rpc-bind-addr=0.0.0.0:4001
|
||||
- --mysql-addr=0.0.0.0:4002
|
||||
- --postgres-addr=0.0.0.0:4003
|
||||
healthcheck:
|
||||
@@ -123,8 +123,8 @@ services:
|
||||
- start
|
||||
- --node-id=0
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --rpc-addr=0.0.0.0:4004
|
||||
- --rpc-hostname=flownode0:4004
|
||||
- --rpc-bind-addr=0.0.0.0:4004
|
||||
- --rpc-server-addr=flownode0:4004
|
||||
- --http-addr=0.0.0.0:4005
|
||||
depends_on:
|
||||
frontend0:
|
||||
|
||||
@@ -4,6 +4,16 @@ This crate provides an easy approach to dump memory profiling info.
|
||||
|
||||
## Prerequisites
|
||||
### jemalloc
|
||||
jeprof is already compiled in the target directory of GreptimeDB. You can find the binary and use it.
|
||||
```
|
||||
# find jeprof binary
|
||||
find . -name 'jeprof'
|
||||
# add executable permission
|
||||
chmod +x <path_to_jeprof>
|
||||
```
|
||||
The path is usually under `./target/${PROFILE}/build/tikv-jemalloc-sys-${HASH}/out/build/bin/jeprof`.
|
||||
The default version of jemalloc installed from the package manager may not have the `--collapsed` option.
|
||||
You may need to check the whether the `jeprof` version is >= `5.3.0` if you want to install it from the package manager.
|
||||
```bash
|
||||
# for macOS
|
||||
brew install jemalloc
|
||||
@@ -23,7 +33,11 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
# for Linux
|
||||
MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
|
||||
# for macOS
|
||||
_RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
20
flake.nix
20
flake.nix
@@ -18,7 +18,11 @@
|
||||
libgit2
|
||||
libz
|
||||
];
|
||||
|
||||
lib = nixpkgs.lib;
|
||||
rustToolchain = fenix.packages.${system}.fromToolchainName {
|
||||
name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
|
||||
sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
|
||||
};
|
||||
in
|
||||
{
|
||||
devShells.default = pkgs.mkShell {
|
||||
@@ -30,14 +34,20 @@
|
||||
protobuf
|
||||
gnumake
|
||||
mold
|
||||
(fenix.packages.${system}.fromToolchainFile {
|
||||
dir = ./.;
|
||||
sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
|
||||
})
|
||||
(rustToolchain.withComponents [
|
||||
"cargo"
|
||||
"clippy"
|
||||
"rust-src"
|
||||
"rustc"
|
||||
"rustfmt"
|
||||
"rust-analyzer"
|
||||
"llvm-tools"
|
||||
])
|
||||
cargo-nextest
|
||||
cargo-llvm-cov
|
||||
taplo
|
||||
curl
|
||||
gnuplot ## for cargo bench
|
||||
];
|
||||
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2024-12-25"
|
||||
components = ["rust-analyzer", "llvm-tools"]
|
||||
|
||||
@@ -28,6 +28,7 @@ use common_meta::kv_backend::postgres::PgStore;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_telemetry::info;
|
||||
use common_wal::options::WalOptions;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use rand::Rng;
|
||||
@@ -184,7 +185,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
|
||||
region_routes
|
||||
}
|
||||
|
||||
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, String> {
|
||||
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, WalOptions> {
|
||||
// TODO(niebayes): construct region wal options for benchmark.
|
||||
let _ = regions;
|
||||
HashMap::default()
|
||||
|
||||
@@ -49,7 +49,12 @@ impl TableMetadataBencher {
|
||||
|
||||
let regions: Vec<_> = (0..64).collect();
|
||||
let region_routes = create_region_routes(regions.clone());
|
||||
let region_wal_options = create_region_wal_options(regions);
|
||||
let region_wal_options = create_region_wal_options(regions)
|
||||
.into_iter()
|
||||
.map(|(region_id, wal_options)| {
|
||||
(region_id, serde_json::to_string(&wal_options).unwrap())
|
||||
})
|
||||
.collect();
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
@@ -109,9 +114,17 @@ impl TableMetadataBencher {
|
||||
let table_info = table_info.unwrap();
|
||||
let table_route = table_route.unwrap();
|
||||
let table_id = table_info.table_info.ident.table_id;
|
||||
|
||||
let regions: Vec<_> = (0..64).collect();
|
||||
let region_wal_options = create_region_wal_options(regions);
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_info.table_name(), &table_route)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_info.table_name(),
|
||||
&table_route,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await;
|
||||
start.elapsed()
|
||||
},
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::time::Duration;
|
||||
use base64::engine::general_purpose;
|
||||
use base64::Engine;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use humantime::format_duration;
|
||||
use serde_json::Value;
|
||||
use servers::http::header::constants::GREPTIME_DB_HEADER_TIMEOUT;
|
||||
@@ -24,7 +25,9 @@ use servers::http::result::greptime_result_v1::GreptimedbV1Response;
|
||||
use servers::http::GreptimeQueryOutput;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{HttpQuerySqlSnafu, Result, SerdeJsonSnafu};
|
||||
use crate::error::{
|
||||
BuildClientSnafu, HttpQuerySqlSnafu, ParseProxyOptsSnafu, Result, SerdeJsonSnafu,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DatabaseClient {
|
||||
@@ -32,6 +35,23 @@ pub struct DatabaseClient {
|
||||
catalog: String,
|
||||
auth_header: Option<String>,
|
||||
timeout: Duration,
|
||||
proxy: Option<reqwest::Proxy>,
|
||||
}
|
||||
|
||||
pub fn parse_proxy_opts(
|
||||
proxy: Option<String>,
|
||||
no_proxy: bool,
|
||||
) -> std::result::Result<Option<reqwest::Proxy>, BoxedError> {
|
||||
if no_proxy {
|
||||
return Ok(None);
|
||||
}
|
||||
proxy
|
||||
.map(|proxy| {
|
||||
reqwest::Proxy::all(proxy)
|
||||
.context(ParseProxyOptsSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
impl DatabaseClient {
|
||||
@@ -40,6 +60,7 @@ impl DatabaseClient {
|
||||
catalog: String,
|
||||
auth_basic: Option<String>,
|
||||
timeout: Duration,
|
||||
proxy: Option<reqwest::Proxy>,
|
||||
) -> Self {
|
||||
let auth_header = if let Some(basic) = auth_basic {
|
||||
let encoded = general_purpose::STANDARD.encode(basic);
|
||||
@@ -48,11 +69,18 @@ impl DatabaseClient {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(ref proxy) = proxy {
|
||||
common_telemetry::info!("Using proxy: {:?}", proxy);
|
||||
} else {
|
||||
common_telemetry::info!("Using system proxy(if any)");
|
||||
}
|
||||
|
||||
Self {
|
||||
addr,
|
||||
catalog,
|
||||
auth_header,
|
||||
timeout,
|
||||
proxy,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +95,13 @@ impl DatabaseClient {
|
||||
("db", format!("{}-{}", self.catalog, schema)),
|
||||
("sql", sql.to_string()),
|
||||
];
|
||||
let mut request = reqwest::Client::new()
|
||||
let client = self
|
||||
.proxy
|
||||
.clone()
|
||||
.map(|proxy| reqwest::Client::builder().proxy(proxy).build())
|
||||
.unwrap_or_else(|| Ok(reqwest::Client::new()))
|
||||
.context(BuildClientSnafu)?;
|
||||
let mut request = client
|
||||
.post(&url)
|
||||
.form(¶ms)
|
||||
.header("Content-Type", "application/x-www-form-urlencoded");
|
||||
|
||||
@@ -86,6 +86,22 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse proxy options: {}", error))]
|
||||
ParseProxyOpts {
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build reqwest client: {}", error))]
|
||||
BuildClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid REPL command: {reason}"))]
|
||||
InvalidReplCommand { reason: String },
|
||||
|
||||
@@ -278,7 +294,8 @@ impl ErrorExt for Error {
|
||||
| Error::InitTimezone { .. }
|
||||
| Error::ConnectEtcd { .. }
|
||||
| Error::CreateDir { .. }
|
||||
| Error::EmptyResult { .. } => StatusCode::InvalidArguments,
|
||||
| Error::EmptyResult { .. }
|
||||
| Error::ParseProxyOpts { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::StartProcedureManager { source, .. }
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
@@ -298,7 +315,8 @@ impl ErrorExt for Error {
|
||||
Error::SerdeJson { .. }
|
||||
| Error::FileIo { .. }
|
||||
| Error::SpawnThread { .. }
|
||||
| Error::InitTlsProvider { .. } => StatusCode::Unexpected,
|
||||
| Error::InitTlsProvider { .. }
|
||||
| Error::BuildClient { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::database::DatabaseClient;
|
||||
use crate::database::{parse_proxy_opts, DatabaseClient};
|
||||
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::{database, Tool};
|
||||
|
||||
@@ -91,19 +91,30 @@ pub struct ExportCommand {
|
||||
/// The default behavior will disable server-side default timeout(i.e. `0s`).
|
||||
#[clap(long, value_parser = humantime::parse_duration)]
|
||||
timeout: Option<Duration>,
|
||||
|
||||
/// The proxy server address to connect, if set, will override the system proxy.
|
||||
///
|
||||
/// The default behavior will use the system proxy if neither `proxy` nor `no_proxy` is set.
|
||||
#[clap(long)]
|
||||
proxy: Option<String>,
|
||||
|
||||
/// Disable proxy server, if set, will not use any proxy.
|
||||
#[clap(long)]
|
||||
no_proxy: bool,
|
||||
}
|
||||
|
||||
impl ExportCommand {
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
|
||||
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
|
||||
let database_client = DatabaseClient::new(
|
||||
self.addr.clone(),
|
||||
catalog.clone(),
|
||||
self.auth_basic.clone(),
|
||||
// Treats `None` as `0s` to disable server-side default timeout.
|
||||
self.timeout.unwrap_or_default(),
|
||||
proxy,
|
||||
);
|
||||
|
||||
Ok(Box::new(Export {
|
||||
|
||||
@@ -25,7 +25,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::database::DatabaseClient;
|
||||
use crate::database::{parse_proxy_opts, DatabaseClient};
|
||||
use crate::error::{Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::{database, Tool};
|
||||
|
||||
@@ -76,18 +76,30 @@ pub struct ImportCommand {
|
||||
/// The default behavior will disable server-side default timeout(i.e. `0s`).
|
||||
#[clap(long, value_parser = humantime::parse_duration)]
|
||||
timeout: Option<Duration>,
|
||||
|
||||
/// The proxy server address to connect, if set, will override the system proxy.
|
||||
///
|
||||
/// The default behavior will use the system proxy if neither `proxy` nor `no_proxy` is set.
|
||||
#[clap(long)]
|
||||
proxy: Option<String>,
|
||||
|
||||
/// Disable proxy server, if set, will not use any proxy.
|
||||
#[clap(long, default_value = "false")]
|
||||
no_proxy: bool,
|
||||
}
|
||||
|
||||
impl ImportCommand {
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
|
||||
let database_client = DatabaseClient::new(
|
||||
self.addr.clone(),
|
||||
catalog.clone(),
|
||||
self.auth_basic.clone(),
|
||||
// Treats `None` as `0s` to disable server-side default timeout.
|
||||
self.timeout.unwrap_or_default(),
|
||||
proxy,
|
||||
);
|
||||
|
||||
Ok(Box::new(Import {
|
||||
|
||||
@@ -126,10 +126,14 @@ impl SubCommand {
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
rpc_hostname: Option<String>,
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long, value_delimiter = ',', num_args = 1..)]
|
||||
metasrv_addrs: Option<Vec<String>>,
|
||||
#[clap(short, long)]
|
||||
@@ -181,18 +185,18 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
} else if let Some(addr) = &opts.rpc_addr {
|
||||
warn!("Use the deprecated attribute `DatanodeOptions.rpc_addr`, please use `grpc.addr` instead.");
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(hostname) = &self.rpc_hostname {
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
} else if let Some(hostname) = &opts.rpc_hostname {
|
||||
if let Some(server_addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
} else if let Some(server_addr) = &opts.rpc_hostname {
|
||||
warn!("Use the deprecated attribute `DatanodeOptions.rpc_hostname`, please use `grpc.hostname` instead.");
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
}
|
||||
|
||||
if let Some(runtime_size) = opts.rpc_runtime_size {
|
||||
@@ -277,7 +281,7 @@ impl StartCommand {
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -357,8 +361,8 @@ mod tests {
|
||||
rpc_addr = "127.0.0.1:4001"
|
||||
rpc_hostname = "192.168.0.1"
|
||||
[grpc]
|
||||
addr = "127.0.0.1:3001"
|
||||
hostname = "127.0.0.1"
|
||||
bind_addr = "127.0.0.1:3001"
|
||||
server_addr = "127.0.0.1"
|
||||
runtime_size = 8
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
@@ -369,8 +373,8 @@ mod tests {
|
||||
};
|
||||
|
||||
let options = cmd.load_options(&Default::default()).unwrap().component;
|
||||
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.addr);
|
||||
assert_eq!("192.168.0.1".to_string(), options.grpc.hostname);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.bind_addr);
|
||||
assert_eq!("192.168.0.1".to_string(), options.grpc.server_addr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -431,7 +435,7 @@ mod tests {
|
||||
|
||||
let options = cmd.load_options(&Default::default()).unwrap().component;
|
||||
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.addr);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.bind_addr);
|
||||
assert_eq!(Some(42), options.node_id);
|
||||
|
||||
let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
|
||||
@@ -645,7 +649,7 @@ mod tests {
|
||||
opts.http.addr,
|
||||
DatanodeOptions::default().component.http.addr
|
||||
);
|
||||
assert_eq!(opts.grpc.hostname, "10.103.174.219");
|
||||
assert_eq!(opts.grpc.server_addr, "10.103.174.219");
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -129,11 +129,13 @@ struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
/// Bind address for the gRPC server.
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
/// Hostname for the gRPC server.
|
||||
#[clap(long)]
|
||||
rpc_hostname: Option<String>,
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
/// Metasrv address list;
|
||||
#[clap(long, value_delimiter = ',', num_args = 1..)]
|
||||
metasrv_addrs: Option<Vec<String>>,
|
||||
@@ -184,12 +186,12 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(hostname) = &self.rpc_hostname {
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
if let Some(server_addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
}
|
||||
|
||||
if let Some(node_id) = self.node_id {
|
||||
@@ -237,7 +239,7 @@ impl StartCommand {
|
||||
info!("Flownode options: {:#?}", opts);
|
||||
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
|
||||
// TODO(discord9): make it not optionale after cluster id is required
|
||||
let cluster_id = opts.cluster_id.unwrap_or(0);
|
||||
|
||||
@@ -136,13 +136,19 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
pub struct StartCommand {
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
@@ -218,11 +224,15 @@ impl StartCommand {
|
||||
opts.http.disable_dashboard = disable_dashboard;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
opts.grpc.tls = tls_opts.clone();
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
opts.mysql.enable = true;
|
||||
opts.mysql.addr.clone_from(addr);
|
||||
@@ -269,7 +279,7 @@ impl StartCommand {
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -413,7 +423,7 @@ mod tests {
|
||||
|
||||
let default_opts = FrontendOptions::default().component;
|
||||
|
||||
assert_eq!(opts.grpc.addr, default_opts.grpc.addr);
|
||||
assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
|
||||
assert!(opts.mysql.enable);
|
||||
assert_eq!(opts.mysql.runtime_size, default_opts.mysql.runtime_size);
|
||||
assert!(opts.postgres.enable);
|
||||
@@ -604,7 +614,7 @@ mod tests {
|
||||
assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
|
||||
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -133,11 +133,15 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
bind_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
server_addr: Option<String>,
|
||||
#[clap(long, aliases = ["store-addr"], value_delimiter = ',', num_args = 1..)]
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "bind-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The communication server address for the frontend and datanode to connect to metasrv.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "server-addr")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
|
||||
store_addrs: Option<Vec<String>>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
@@ -201,11 +205,11 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.bind_addr {
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.server_addr {
|
||||
if let Some(addr) = &self.rpc_server_addr {
|
||||
opts.server_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
@@ -269,11 +273,13 @@ impl StartCommand {
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.detect_server_addr();
|
||||
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -306,8 +312,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_read_from_cmd() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
|
||||
selector: Some("LoadBased".to_string()),
|
||||
..Default::default()
|
||||
@@ -381,8 +387,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_load_log_options_from_cli() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
|
||||
selector: Some("LoadBased".to_string()),
|
||||
..Default::default()
|
||||
|
||||
@@ -329,8 +329,8 @@ impl App for Instance {
|
||||
pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -407,9 +407,9 @@ impl StartCommand {
|
||||
opts.storage.data_home.clone_from(data_home);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
let datanode_grpc_addr = DatanodeOptions::default().grpc.addr;
|
||||
let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
|
||||
if addr.eq(&datanode_grpc_addr) {
|
||||
return IllegalConfigSnafu {
|
||||
msg: format!(
|
||||
@@ -417,7 +417,7 @@ impl StartCommand {
|
||||
),
|
||||
}.fail();
|
||||
}
|
||||
opts.grpc.addr.clone_from(addr)
|
||||
opts.grpc.bind_addr.clone_from(addr)
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
@@ -464,7 +464,7 @@ impl StartCommand {
|
||||
let mut plugins = Plugins::new();
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let fe_opts = opts.frontend_options();
|
||||
let dn_opts = opts.datanode_options();
|
||||
|
||||
@@ -486,8 +486,8 @@ impl StartCommand {
|
||||
let metadata_dir = metadata_store_dir(data_home);
|
||||
let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
|
||||
metadata_dir,
|
||||
opts.metadata_store.clone(),
|
||||
opts.procedure.clone(),
|
||||
opts.metadata_store,
|
||||
opts.procedure,
|
||||
)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
@@ -907,7 +907,7 @@ mod tests {
|
||||
assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
|
||||
assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
|
||||
assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.addr);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
|
||||
assert!(fe_opts.mysql.enable);
|
||||
assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
|
||||
assert_eq!(2, fe_opts.mysql.runtime_size);
|
||||
@@ -1037,7 +1037,7 @@ mod tests {
|
||||
assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
|
||||
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ mod tests {
|
||||
.args([
|
||||
"datanode",
|
||||
"start",
|
||||
"--rpc-addr=0.0.0.0:4321",
|
||||
"--rpc-bind-addr=0.0.0.0:4321",
|
||||
"--node-id=1",
|
||||
&format!("--data-home={}", data_home.path().display()),
|
||||
&format!("--wal-dir={}", wal_dir.path().display()),
|
||||
@@ -80,7 +80,7 @@ mod tests {
|
||||
"--log-level=off",
|
||||
"cli",
|
||||
"attach",
|
||||
"--grpc-addr=0.0.0.0:4321",
|
||||
"--grpc-bind-addr=0.0.0.0:4321",
|
||||
// history commands can sneaky into stdout and mess up our tests, so disable it
|
||||
"--disable-helper",
|
||||
]);
|
||||
|
||||
@@ -17,9 +17,6 @@ use std::time::Duration;
|
||||
use cmd::options::GreptimeOptions;
|
||||
use cmd::standalone::StandaloneOptions;
|
||||
use common_config::Configurable;
|
||||
use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
};
|
||||
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
@@ -91,13 +88,8 @@ fn test_load_datanode_example_config() {
|
||||
..Default::default()
|
||||
},
|
||||
grpc: GrpcOptions::default()
|
||||
.with_addr("127.0.0.1:3001")
|
||||
.with_hostname("127.0.0.1:3001"),
|
||||
rpc_addr: Some("127.0.0.1:3001".to_string()),
|
||||
rpc_hostname: Some("127.0.0.1".to_string()),
|
||||
rpc_runtime_size: Some(8),
|
||||
rpc_max_recv_message_size: Some(DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE),
|
||||
rpc_max_send_message_size: Some(DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE),
|
||||
.with_bind_addr("127.0.0.1:3001")
|
||||
.with_server_addr("127.0.0.1:3001"),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
@@ -144,7 +136,9 @@ fn test_load_frontend_example_config() {
|
||||
remote_write: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
grpc: GrpcOptions::default().with_hostname("127.0.0.1:4001"),
|
||||
grpc: GrpcOptions::default()
|
||||
.with_bind_addr("127.0.0.1:4001")
|
||||
.with_server_addr("127.0.0.1:4001"),
|
||||
http: HttpOptions {
|
||||
cors_allowed_origins: vec!["https://example.com".to_string()],
|
||||
..Default::default()
|
||||
|
||||
@@ -12,9 +12,11 @@ common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
config.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
num_cpus.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
snafu.workspace = true
|
||||
sysinfo.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
@@ -16,6 +16,8 @@ pub mod config;
|
||||
pub mod error;
|
||||
pub mod utils;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
pub use config::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -34,22 +36,27 @@ pub enum Mode {
|
||||
Distributed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct KvBackendConfig {
|
||||
// Kv file size in bytes
|
||||
/// The size of the metadata store backend log file.
|
||||
pub file_size: ReadableSize,
|
||||
// Kv purge threshold in bytes
|
||||
/// The threshold of the metadata store size to trigger a purge.
|
||||
pub purge_threshold: ReadableSize,
|
||||
/// The interval of the metadata store to trigger a purge.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub purge_interval: Duration,
|
||||
}
|
||||
|
||||
impl Default for KvBackendConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
// log file size 256MB
|
||||
file_size: ReadableSize::mb(256),
|
||||
// purge threshold 4GB
|
||||
purge_threshold: ReadableSize::gb(4),
|
||||
// The log file size 64MB
|
||||
file_size: ReadableSize::mb(64),
|
||||
// The log purge threshold 256MB
|
||||
purge_threshold: ReadableSize::mb(256),
|
||||
// The log purge interval 1m
|
||||
purge_interval: Duration::from_secs(60),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,11 +20,12 @@ pub mod impl_conv;
|
||||
pub(crate) mod product;
|
||||
mod scalar_add;
|
||||
mod scalar_mul;
|
||||
mod sub;
|
||||
pub(crate) mod sum;
|
||||
mod vector_add;
|
||||
mod vector_div;
|
||||
mod vector_mul;
|
||||
mod vector_norm;
|
||||
mod vector_sub;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -48,10 +49,11 @@ impl VectorFunction {
|
||||
registry.register(Arc::new(scalar_mul::ScalarMulFunction));
|
||||
|
||||
// vector calculation
|
||||
registry.register(Arc::new(vector_add::VectorAddFunction));
|
||||
registry.register(Arc::new(vector_sub::VectorSubFunction));
|
||||
registry.register(Arc::new(vector_mul::VectorMulFunction));
|
||||
registry.register(Arc::new(vector_norm::VectorNormFunction));
|
||||
registry.register(Arc::new(vector_div::VectorDivFunction));
|
||||
registry.register(Arc::new(sub::SubFunction));
|
||||
registry.register(Arc::new(vector_norm::VectorNormFunction));
|
||||
registry.register(Arc::new(elem_sum::ElemSumFunction));
|
||||
registry.register(Arc::new(elem_product::ElemProductFunction));
|
||||
}
|
||||
|
||||
214
src/common/function/src/scalars/vector/vector_add.rs
Normal file
214
src/common/function/src/scalars/vector/vector_add.rs
Normal file
@@ -0,0 +1,214 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::InvalidFuncArgsSnafu;
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use nalgebra::DVectorView;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
|
||||
|
||||
const NAME: &str = "vec_add";
|
||||
|
||||
/// Adds corresponding elements of two vectors, returns a vector.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT vec_to_string(vec_add("[1.0, 1.0]", "[1.0, 2.0]")) as result;
|
||||
///
|
||||
/// +---------------------------------------------------------------+
|
||||
/// | vec_to_string(vec_add(Utf8("[1.0, 1.0]"),Utf8("[1.0, 2.0]"))) |
|
||||
/// +---------------------------------------------------------------+
|
||||
/// | [2,3] |
|
||||
/// +---------------------------------------------------------------+
|
||||
///
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VectorAddFunction;
|
||||
|
||||
impl Function for VectorAddFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(
|
||||
&self,
|
||||
_input_types: &[ConcreteDataType],
|
||||
) -> common_query::error::Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(
|
||||
&self,
|
||||
_func_ctx: FunctionContext,
|
||||
columns: &[VectorRef],
|
||||
) -> common_query::error::Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
ensure!(
|
||||
arg0.len() == arg1.len(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The lengths of the vector are not aligned, args 0: {}, args 1: {}",
|
||||
arg0.len(),
|
||||
arg1.len(),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
let len = arg0.len();
|
||||
let mut result = BinaryVectorBuilder::with_capacity(len);
|
||||
if len == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg0_const = as_veclit_if_const(arg0)?;
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..len {
|
||||
let arg0 = match arg0_const.as_ref() {
|
||||
Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
|
||||
None => as_veclit(arg0.get_ref(i))?,
|
||||
};
|
||||
let arg1 = match arg1_const.as_ref() {
|
||||
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
let (Some(arg0), Some(arg1)) = (arg0, arg1) else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
let vec0 = DVectorView::from_slice(&arg0, arg0.len());
|
||||
let vec1 = DVectorView::from_slice(&arg1, arg1.len());
|
||||
|
||||
let vec_res = vec0 + vec1;
|
||||
let veclit = vec_res.as_slice();
|
||||
let binlit = veclit_to_binlit(veclit);
|
||||
result.push(Some(&binlit));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for VectorAddFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Error;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sub() {
|
||||
let func = VectorAddFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
Some("[2.0,3.0,3.0]".to_string()),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,1.0,1.0]".to_string()),
|
||||
Some("[6.0,5.0,4.0]".to_string()),
|
||||
Some("[3.0,2.0,2.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[input0, input1])
|
||||
.unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.get_ref(0).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get_ref(1).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[10.0, 10.0, 10.0]).as_slice())
|
||||
);
|
||||
assert!(result.get_ref(2).is_null());
|
||||
assert!(result.get_ref(3).is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sub_error() {
|
||||
let func = VectorAddFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
Some("[2.0,3.0,3.0]".to_string()),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,1.0,1.0]".to_string()),
|
||||
Some("[6.0,5.0,4.0]".to_string()),
|
||||
Some("[3.0,2.0,2.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input0, input1]);
|
||||
|
||||
match result {
|
||||
Err(Error::InvalidFuncArgs { err_msg, .. }) => {
|
||||
assert_eq!(
|
||||
err_msg,
|
||||
"The lengths of the vector are not aligned, args 0: 4, args 1: 3"
|
||||
)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -42,19 +42,10 @@ const NAME: &str = "vec_sub";
|
||||
/// | [0,-1] |
|
||||
/// +---------------------------------------------------------------+
|
||||
///
|
||||
/// -- Negative scalar to simulate subtraction
|
||||
/// SELECT vec_to_string(vec_sub('[-1.0, -1.0]', '[1.0, 2.0]'));
|
||||
///
|
||||
/// +-----------------------------------------------------------------+
|
||||
/// | vec_to_string(vec_sub(Utf8("[-1.0, -1.0]"),Utf8("[1.0, 2.0]"))) |
|
||||
/// +-----------------------------------------------------------------+
|
||||
/// | [-2,-3] |
|
||||
/// +-----------------------------------------------------------------+
|
||||
///
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SubFunction;
|
||||
pub struct VectorSubFunction;
|
||||
|
||||
impl Function for SubFunction {
|
||||
impl Function for VectorSubFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
@@ -142,7 +133,7 @@ impl Function for SubFunction {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SubFunction {
|
||||
impl Display for VectorSubFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
@@ -159,7 +150,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_sub() {
|
||||
let func = SubFunction;
|
||||
let func = VectorSubFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
@@ -194,7 +185,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_sub_error() {
|
||||
let func = SubFunction;
|
||||
let func = VectorSubFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_procedure::Status;
|
||||
use common_telemetry::info;
|
||||
@@ -25,6 +26,7 @@ use super::cursor::DropDatabaseCursor;
|
||||
use super::{DropDatabaseContext, DropTableTarget};
|
||||
use crate::ddl::drop_database::State;
|
||||
use crate::ddl::drop_table::executor::DropTableExecutor;
|
||||
use crate::ddl::utils::extract_region_wal_options;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
@@ -107,8 +109,22 @@ impl State for DropDatabaseExecutor {
|
||||
self.physical_table_id,
|
||||
self.physical_region_routes.clone(),
|
||||
);
|
||||
|
||||
// Deletes topic-region mapping if dropping physical table
|
||||
let region_wal_options =
|
||||
if let TableRouteValue::Physical(table_route_value) = &table_route_value {
|
||||
let datanode_table_values = ddl_ctx
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.regions(self.physical_table_id, table_route_value)
|
||||
.await?;
|
||||
extract_region_wal_options(&datanode_table_values)?
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
executor
|
||||
.on_destroy_metadata(ddl_ctx, &table_route_value)
|
||||
.on_destroy_metadata(ddl_ctx, &table_route_value, ®ion_wal_options)
|
||||
.await?;
|
||||
executor.invalidate_table_cache(ddl_ctx).await?;
|
||||
executor
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
pub(crate) mod executor;
|
||||
mod metadata;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{ExternalSnafu, FromJsonSnafu, ToJsonSnafu};
|
||||
@@ -24,8 +26,10 @@ use common_procedure::{
|
||||
};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_wal::options::WalOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionNumber;
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
@@ -131,7 +135,11 @@ impl DropTableProcedure {
|
||||
);
|
||||
// Deletes table metadata logically.
|
||||
self.executor
|
||||
.on_delete_metadata(&self.context, table_route_value)
|
||||
.on_delete_metadata(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
info!("Deleted table metadata for table {table_id}");
|
||||
self.data.state = DropTableState::InvalidateTableCache;
|
||||
@@ -163,7 +171,11 @@ impl DropTableProcedure {
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_delete_metadata_tombstone(&self.context, table_route_value)
|
||||
.on_delete_metadata_tombstone(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
self.dropping_regions.clear();
|
||||
@@ -243,7 +255,11 @@ impl Procedure for DropTableProcedure {
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_restore_metadata(&self.context, table_route_value)
|
||||
.on_restore_metadata(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
@@ -257,6 +273,8 @@ pub struct DropTableData {
|
||||
pub physical_region_routes: Vec<RegionRoute>,
|
||||
pub physical_table_id: Option<TableId>,
|
||||
#[serde(default)]
|
||||
pub region_wal_options: HashMap<RegionNumber, WalOptions>,
|
||||
#[serde(default)]
|
||||
pub allow_rollback: bool,
|
||||
}
|
||||
|
||||
@@ -268,6 +286,7 @@ impl DropTableData {
|
||||
task,
|
||||
physical_region_routes: vec![],
|
||||
physical_table_id: None,
|
||||
region_wal_options: HashMap::new(),
|
||||
allow_rollback: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::region::{
|
||||
region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
@@ -19,9 +21,10 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_wal::options::WalOptions;
|
||||
use futures::future::join_all;
|
||||
use snafu::ensure;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
use table::table_name::TableName;
|
||||
|
||||
@@ -113,9 +116,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.delete_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -124,9 +133,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata_tombstone(self.table_id, &self.table, table_route_value)
|
||||
.delete_table_metadata_tombstone(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -135,9 +150,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.destroy_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.destroy_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let detecting_regions = if table_route_value.is_physical() {
|
||||
@@ -156,9 +177,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.restore_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.restore_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ use snafu::OptionExt;
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::utils::extract_region_wal_options;
|
||||
use crate::error::{self, Result};
|
||||
|
||||
impl DropTableProcedure {
|
||||
@@ -30,9 +31,6 @@ impl DropTableProcedure {
|
||||
.get_physical_table_route(task.table_id)
|
||||
.await?;
|
||||
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
if physical_table_id == self.data.table_id() {
|
||||
let table_info_value = self
|
||||
.context
|
||||
@@ -47,9 +45,21 @@ impl DropTableProcedure {
|
||||
|
||||
let engine = table_info_value.table_info.meta.engine;
|
||||
// rollback only if dropping the metric physical table fails
|
||||
self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME
|
||||
self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME;
|
||||
|
||||
// Deletes topic-region mapping if dropping physical table
|
||||
let datanode_table_values = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.regions(physical_table_id, &physical_table_route_value)
|
||||
.await?;
|
||||
self.data.region_wal_options = extract_region_wal_options(&datanode_table_values)?;
|
||||
}
|
||||
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,16 +12,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_catalog::consts::METRIC_ENGINE;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::Error as ProcedureError;
|
||||
use common_wal::options::WalOptions;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::ddl::DetectingRegion;
|
||||
use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
|
||||
use crate::error::{
|
||||
Error, OperateDatanodeSnafu, ParseWalOptionsSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu,
|
||||
};
|
||||
use crate::key::datanode_table::DatanodeTableValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::peer::Peer;
|
||||
@@ -151,6 +158,32 @@ pub fn convert_region_routes_to_detecting_regions(
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Parses [WalOptions] from serialized strings in hashmap.
|
||||
pub fn parse_region_wal_options(
|
||||
serialized_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<HashMap<RegionNumber, WalOptions>> {
|
||||
let mut region_wal_options = HashMap::with_capacity(serialized_options.len());
|
||||
for (region_number, wal_options) in serialized_options {
|
||||
let wal_option = serde_json::from_str::<WalOptions>(wal_options)
|
||||
.context(ParseWalOptionsSnafu { wal_options })?;
|
||||
region_wal_options.insert(*region_number, wal_option);
|
||||
}
|
||||
Ok(region_wal_options)
|
||||
}
|
||||
|
||||
/// Extracts region wal options from [DatanodeTableValue]s.
|
||||
pub fn extract_region_wal_options(
|
||||
datanode_table_values: &Vec<DatanodeTableValue>,
|
||||
) -> Result<HashMap<RegionNumber, WalOptions>> {
|
||||
let mut region_wal_options = HashMap::new();
|
||||
for value in datanode_table_values {
|
||||
let serialized_options = &value.region_info.region_wal_options;
|
||||
let parsed_options = parse_region_wal_options(serialized_options)?;
|
||||
region_wal_options.extend(parsed_options);
|
||||
}
|
||||
Ok(region_wal_options)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -710,6 +710,15 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse wal options: {}", wal_options))]
|
||||
ParseWalOptions {
|
||||
wal_options: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -762,7 +771,8 @@ impl ErrorExt for Error {
|
||||
| UnexpectedLogicalRouteTable { .. }
|
||||
| ProcedureOutput { .. }
|
||||
| FromUtf8 { .. }
|
||||
| MetadataCorruption { .. } => StatusCode::Unexpected,
|
||||
| MetadataCorruption { .. }
|
||||
| ParseWalOptions { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
//! - This key is mainly used in constructing the view in Datanode and Frontend.
|
||||
//!
|
||||
//! 12. Kafka topic key: `__topic_name/kafka/{topic_name}`
|
||||
//! - The key is used to mark existing topics in kafka for WAL.
|
||||
//! - The key is used to mark existing topics in kafka for WAL.
|
||||
//!
|
||||
//! 13. Topic name to region map key `__topic_region/{topic_name}/{region_id}`
|
||||
//! - Mapping {topic_name} to {region_id}
|
||||
@@ -122,6 +122,7 @@ use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
|
||||
};
|
||||
use common_telemetry::warn;
|
||||
use common_wal::options::WalOptions;
|
||||
use datanode_table::{DatanodeTableKey, DatanodeTableManager, DatanodeTableValue};
|
||||
use flow::flow_route::FlowRouteValue;
|
||||
use flow::table_flow::TableFlowValue;
|
||||
@@ -136,6 +137,7 @@ use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_name::TableName;
|
||||
use table_info::{TableInfoKey, TableInfoManager, TableInfoValue};
|
||||
use table_name::{TableNameKey, TableNameManager, TableNameValue};
|
||||
use topic_region::{TopicRegionKey, TopicRegionManager};
|
||||
use view_info::{ViewInfoKey, ViewInfoManager, ViewInfoValue};
|
||||
|
||||
use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
|
||||
@@ -306,6 +308,7 @@ pub struct TableMetadataManager {
|
||||
schema_manager: SchemaManager,
|
||||
table_route_manager: TableRouteManager,
|
||||
tombstone_manager: TombstoneManager,
|
||||
topic_region_manager: TopicRegionManager,
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
@@ -456,6 +459,7 @@ impl TableMetadataManager {
|
||||
schema_manager: SchemaManager::new(kv_backend.clone()),
|
||||
table_route_manager: TableRouteManager::new(kv_backend.clone()),
|
||||
tombstone_manager: TombstoneManager::new(kv_backend.clone()),
|
||||
topic_region_manager: TopicRegionManager::new(kv_backend.clone()),
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
@@ -648,10 +652,15 @@ impl TableMetadataManager {
|
||||
.table_route_storage()
|
||||
.build_create_txn(table_id, &table_route_value)?;
|
||||
|
||||
let create_topic_region_txn = self
|
||||
.topic_region_manager
|
||||
.build_create_txn(table_id, ®ion_wal_options)?;
|
||||
|
||||
let mut txn = Txn::merge_all(vec![
|
||||
create_table_name_txn,
|
||||
create_table_info_txn,
|
||||
create_table_route_txn,
|
||||
create_topic_region_txn,
|
||||
]);
|
||||
|
||||
if let TableRouteValue::Physical(x) = &table_route_value {
|
||||
@@ -785,6 +794,7 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<Vec<Vec<u8>>> {
|
||||
// Builds keys
|
||||
let datanode_ids = if table_route_value.is_physical() {
|
||||
@@ -806,13 +816,22 @@ impl TableMetadataManager {
|
||||
.into_iter()
|
||||
.map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let topic_region_map = self
|
||||
.topic_region_manager
|
||||
.get_topic_region_mapping(table_id, region_wal_options);
|
||||
let topic_region_keys = topic_region_map
|
||||
.iter()
|
||||
.map(|(region_id, topic)| TopicRegionKey::new(*region_id, topic))
|
||||
.collect::<Vec<_>>();
|
||||
keys.push(table_name.to_bytes());
|
||||
keys.push(table_info_key.to_bytes());
|
||||
keys.push(table_route_key.to_bytes());
|
||||
for key in &datanode_table_keys {
|
||||
keys.push(key.to_bytes());
|
||||
}
|
||||
for key in topic_region_keys {
|
||||
keys.push(key.to_bytes());
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
@@ -823,8 +842,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.create(keys).await
|
||||
}
|
||||
|
||||
@@ -835,9 +856,11 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
self.tombstone_manager.delete(keys).await
|
||||
let table_metadata_keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.delete(table_metadata_keys).await
|
||||
}
|
||||
|
||||
/// Restores metadata for table.
|
||||
@@ -847,8 +870,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.restore(keys).await
|
||||
}
|
||||
|
||||
@@ -859,8 +884,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
let _ = self
|
||||
.kv_backend
|
||||
.batch_delete(BatchDeleteRequest::new().with_keys(keys))
|
||||
@@ -1309,8 +1336,9 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_time::util::current_time_millis;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions};
|
||||
use futures::TryStreamExt;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::{RawTableInfo, TableInfo};
|
||||
use table::table_name::TableName;
|
||||
|
||||
@@ -1323,10 +1351,15 @@ mod tests {
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue};
|
||||
use crate::key::{
|
||||
DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue, TOPIC_REGION_PREFIX,
|
||||
};
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::{region_distribution, LeaderState, Region, RegionRoute};
|
||||
use crate::rpc::store::RangeRequest;
|
||||
use crate::wal_options_allocator::{allocate_region_wal_options, WalOptionsAllocator};
|
||||
|
||||
#[test]
|
||||
fn test_deserialized_value_with_bytes() {
|
||||
@@ -1398,16 +1431,63 @@ mod tests {
|
||||
table_metadata_manager: &TableMetadataManager,
|
||||
table_info: RawTableInfo,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) -> Result<()> {
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
table_info,
|
||||
TableRouteValue::physical(region_routes),
|
||||
HashMap::default(),
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn create_mock_region_wal_options() -> HashMap<RegionNumber, WalOptions> {
|
||||
let topics = (0..2)
|
||||
.map(|i| format!("greptimedb_topic{}", i))
|
||||
.collect::<Vec<_>>();
|
||||
let wal_options = topics
|
||||
.iter()
|
||||
.map(|topic| {
|
||||
WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: topic.clone(),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
(0..16)
|
||||
.enumerate()
|
||||
.map(|(i, region_number)| (region_number, wal_options[i % wal_options.len()].clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_raft_engine_topic_region_map() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let wal_allocator = WalOptionsAllocator::RaftEngine;
|
||||
let regions = (0..16).collect();
|
||||
let region_wal_options = allocate_region_wal_options(regions, &wal_allocator).unwrap();
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let topic_region_key = TOPIC_REGION_PREFIX.to_string();
|
||||
let range_req = RangeRequest::new().with_prefix(topic_region_key);
|
||||
let resp = mem_kv.range(range_req).await.unwrap();
|
||||
// Should be empty because the topic region map is empty for raft engine.
|
||||
assert!(resp.kvs.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_metadata() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
@@ -1416,11 +1496,17 @@ mod tests {
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let region_wal_options = create_mock_region_wal_options()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, serde_json::to_string(&v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1430,6 +1516,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.is_ok());
|
||||
@@ -1440,7 +1527,8 @@ mod tests {
|
||||
assert!(create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
modified_region_routes
|
||||
modified_region_routes,
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.is_err());
|
||||
@@ -1462,6 +1550,19 @@ mod tests {
|
||||
.unwrap(),
|
||||
region_routes
|
||||
);
|
||||
|
||||
for i in 0..2 {
|
||||
let region_number = i as u32;
|
||||
let region_id = RegionId::new(table_info.ident.table_id, region_number);
|
||||
let topic = format!("greptimedb_topic{}", i);
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions(&topic)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 8);
|
||||
assert_eq!(regions[0], region_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1557,12 +1658,18 @@ mod tests {
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let datanode_id = 2;
|
||||
let region_wal_options = create_mock_region_wal_options();
|
||||
let serialized_region_wal_options = region_wal_options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
serialized_region_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1575,12 +1682,22 @@ mod tests {
|
||||
let table_route_value = &TableRouteValue::physical(region_routes.clone());
|
||||
// deletes metadata.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, table_route_value)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_name,
|
||||
table_route_value,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// Should be ignored.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, table_route_value)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_name,
|
||||
table_route_value,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(table_metadata_manager
|
||||
@@ -1617,6 +1734,19 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(table_route.is_none());
|
||||
// Logical delete removes the topic region mapping as well.
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions("greptimedb_topic0")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 0);
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions("greptimedb_topic1")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1633,6 +1763,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1705,6 +1836,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1790,6 +1922,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1870,6 +2003,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1980,7 +2114,11 @@ mod tests {
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
let options = create_mock_region_wal_options();
|
||||
let serialized_options = options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
@@ -2007,7 +2145,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
serialized_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -2020,7 +2158,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.destroy_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.destroy_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(mem_kv.is_empty());
|
||||
@@ -2033,7 +2171,11 @@ mod tests {
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
let options = create_mock_region_wal_options();
|
||||
let serialized_options = options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
@@ -2060,7 +2202,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
serialized_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -2076,18 +2218,18 @@ mod tests {
|
||||
let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.delete_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
let kvs = mem_kv.dump();
|
||||
assert_eq!(kvs, expected_result);
|
||||
// Should be ignored.
|
||||
table_metadata_manager
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
let kvs = mem_kv.dump();
|
||||
|
||||
@@ -21,6 +21,7 @@ use snafu::OptionExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use super::table_route::PhysicalTableRouteValue;
|
||||
use super::MetadataKey;
|
||||
use crate::error::{DatanodeTableInfoNotFoundSnafu, InvalidMetadataSnafu, Result};
|
||||
use crate::key::{
|
||||
@@ -29,7 +30,8 @@ use crate::key::{
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
|
||||
use crate::rpc::store::RangeRequest;
|
||||
use crate::rpc::router::region_distribution;
|
||||
use crate::rpc::store::{BatchGetRequest, RangeRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::DatanodeId;
|
||||
|
||||
@@ -172,6 +174,26 @@ impl DatanodeTableManager {
|
||||
Box::pin(stream)
|
||||
}
|
||||
|
||||
/// Find the [DatanodeTableValue]s for the given [TableId] and [PhysicalTableRouteValue].
|
||||
pub async fn regions(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_routes: &PhysicalTableRouteValue,
|
||||
) -> Result<Vec<DatanodeTableValue>> {
|
||||
let keys = region_distribution(&table_routes.region_routes)
|
||||
.into_keys()
|
||||
.map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
|
||||
.collect::<Vec<_>>();
|
||||
let req = BatchGetRequest {
|
||||
keys: keys.iter().map(|k| k.to_bytes()).collect(),
|
||||
};
|
||||
let resp = self.kv_backend.batch_get(req).await?;
|
||||
resp.kvs
|
||||
.into_iter()
|
||||
.map(datanode_table_value_decoder)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Builds the create datanode table transactions. It only executes while the primary keys comparing successes.
|
||||
pub fn build_create_txn(
|
||||
&self,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::warn;
|
||||
use futures::stream::BoxStream;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -37,6 +38,12 @@ lazy_static! {
|
||||
"^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/({NAME_PATTERN})$"
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
/// for compatibility with older flow name with less strict name pattern
|
||||
static ref COMPAT_FLOW_NAME_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/(.*)$"
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// The key of mapping {flow_name} to [FlowId].
|
||||
@@ -114,12 +121,18 @@ impl<'a> MetadataKey<'a, FlowNameKeyInner<'a>> for FlowNameKeyInner<'_> {
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let captures =
|
||||
FLOW_NAME_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(error::InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
|
||||
})?;
|
||||
let captures = FLOW_NAME_KEY_PATTERN
|
||||
.captures(key)
|
||||
.or_else(|| {
|
||||
warn!(
|
||||
"FlowNameKeyInner '{}' is not a valid flow name in newer version.",
|
||||
key
|
||||
);
|
||||
COMPAT_FLOW_NAME_KEY_PATTERN.captures(key)
|
||||
})
|
||||
.context(error::InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
|
||||
})?;
|
||||
// Safety: pass the regex check above
|
||||
let catalog_name = captures.get(1).unwrap().as_str();
|
||||
let flow_name = captures.get(2).unwrap().as_str();
|
||||
@@ -284,6 +297,12 @@ mod tests {
|
||||
let key = FlowNameKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key.catalog(), "my_catalog");
|
||||
assert_eq!(key.flow_name(), "my_task");
|
||||
|
||||
// compatibility with older version
|
||||
let bytes = b"__flow/name/my_catalog/a/`b`".to_vec();
|
||||
let key = FlowNameKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key.catalog(), "my_catalog");
|
||||
assert_eq!(key.flow_name(), "a/`b`");
|
||||
}
|
||||
#[test]
|
||||
fn test_key_start_range() {
|
||||
|
||||
@@ -26,18 +26,25 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_wal::options::WalOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::ddl::utils::parse_region_wal_options;
|
||||
use crate::error::{Error, InvalidMetadataSnafu, Result};
|
||||
use crate::key::{MetadataKey, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::{BatchPutRequest, PutRequest, RangeRequest};
|
||||
use crate::rpc::store::{BatchDeleteRequest, BatchPutRequest, PutRequest, RangeRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
// The TopicRegionKey is a key for the topic-region mapping in the kvbackend.
|
||||
// The layout of the key is `__topic_region/{topic_name}/{region_id}`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TopicRegionKey<'a> {
|
||||
pub region_id: RegionId,
|
||||
@@ -53,7 +60,7 @@ impl<'a> TopicRegionKey<'a> {
|
||||
}
|
||||
|
||||
pub fn range_topic_key(topic: &str) -> String {
|
||||
format!("{}/{}", TOPIC_REGION_PREFIX, topic)
|
||||
format!("{}/{}/", TOPIC_REGION_PREFIX, topic)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +87,7 @@ impl Display for TopicRegionKey<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}/{}",
|
||||
"{}{}",
|
||||
Self::range_topic_key(self.topic),
|
||||
self.region_id.as_u64()
|
||||
)
|
||||
@@ -151,6 +158,24 @@ impl TopicRegionManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn build_create_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<Txn> {
|
||||
let region_wal_options = parse_region_wal_options(region_wal_options)?;
|
||||
let topic_region_mapping = self.get_topic_region_mapping(table_id, ®ion_wal_options);
|
||||
let topic_region_keys = topic_region_mapping
|
||||
.iter()
|
||||
.map(|(topic, region_id)| TopicRegionKey::new(*topic, region_id))
|
||||
.collect::<Vec<_>>();
|
||||
let operations = topic_region_keys
|
||||
.into_iter()
|
||||
.map(|key| TxnOp::Put(key.to_bytes(), vec![]))
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Txn::new().and_then(operations))
|
||||
}
|
||||
|
||||
/// Returns the list of region ids using specified topic.
|
||||
pub async fn regions(&self, topic: &str) -> Result<Vec<RegionId>> {
|
||||
let prefix = TopicRegionKey::range_topic_key(topic);
|
||||
@@ -169,12 +194,49 @@ impl TopicRegionManager {
|
||||
self.kv_backend.delete(&raw_key, false).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn batch_delete(&self, keys: Vec<TopicRegionKey<'_>>) -> Result<()> {
|
||||
let raw_keys = keys.iter().map(|key| key.to_bytes()).collect::<Vec<_>>();
|
||||
let req = BatchDeleteRequest {
|
||||
keys: raw_keys,
|
||||
prev_kv: false,
|
||||
};
|
||||
self.kv_backend.batch_delete(req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieves a mapping of [`RegionId`]s to their corresponding topics name
|
||||
/// based on the provided table ID and WAL options.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of tuples, where each tuple contains a [`RegionId`] and its corresponding topic name.
|
||||
pub fn get_topic_region_mapping<'a>(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_wal_options: &'a HashMap<RegionNumber, WalOptions>,
|
||||
) -> Vec<(RegionId, &'a str)> {
|
||||
region_wal_options
|
||||
.keys()
|
||||
.filter_map(
|
||||
|region_number| match region_wal_options.get(region_number) {
|
||||
Some(WalOptions::Kafka(kafka)) => {
|
||||
let region_id = RegionId::new(table_id, *region_number);
|
||||
Some((region_id, kafka.topic.as_str()))
|
||||
}
|
||||
Some(WalOptions::RaftEngine) => None,
|
||||
None => None,
|
||||
},
|
||||
)
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_wal::options::KafkaWalOptions;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
|
||||
@@ -220,4 +282,45 @@ mod tests {
|
||||
key_values.sort_by_key(|id| id.as_u64());
|
||||
assert_eq!(key_values, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topic_region_map() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let manager = TopicRegionManager::new(kv_backend.clone());
|
||||
|
||||
let table_id = 1;
|
||||
let region_wal_options = (0..64)
|
||||
.map(|i| {
|
||||
let region_number = i;
|
||||
let wal_options = if i % 2 == 0 {
|
||||
WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: format!("topic_{}", i),
|
||||
})
|
||||
} else {
|
||||
WalOptions::RaftEngine
|
||||
};
|
||||
(region_number, serde_json::to_string(&wal_options).unwrap())
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let region_wal_options = parse_region_wal_options(®ion_wal_options).unwrap();
|
||||
let mut topic_region_mapping =
|
||||
manager.get_topic_region_mapping(table_id, ®ion_wal_options);
|
||||
let mut expected = (0..64)
|
||||
.filter_map(|i| {
|
||||
if i % 2 == 0 {
|
||||
Some((RegionId::new(table_id, i), format!("topic_{}", i)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
topic_region_mapping.sort_by_key(|(region_id, _)| region_id.as_u64());
|
||||
let topic_region_map = topic_region_mapping
|
||||
.iter()
|
||||
.map(|(region_id, topic)| (*region_id, topic.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
expected.sort_by_key(|(region_id, _)| region_id.as_u64());
|
||||
assert_eq!(topic_region_map, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod selector;
|
||||
mod topic_creator;
|
||||
pub(crate) mod topic_creator;
|
||||
mod topic_manager;
|
||||
mod topic_pool;
|
||||
pub(crate) mod topic_pool;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::time::Duration;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct ProcedureConfig {
|
||||
/// Max retry times of procedure.
|
||||
|
||||
@@ -394,7 +394,7 @@ impl Default for DatanodeOptions {
|
||||
require_lease_before_startup: false,
|
||||
init_regions_in_background: false,
|
||||
init_regions_parallelism: 16,
|
||||
grpc: GrpcOptions::default().with_addr("127.0.0.1:3001"),
|
||||
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3001"),
|
||||
http: HttpOptions::default(),
|
||||
meta_client: None,
|
||||
wal: DatanodeWalConfig::default(),
|
||||
|
||||
@@ -89,7 +89,7 @@ impl HeartbeatTask {
|
||||
node_id: opts.node_id.unwrap_or(0),
|
||||
// We use datanode's start time millis as the node's epoch.
|
||||
node_epoch: common_time::util::current_time_millis() as u64,
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
meta_client,
|
||||
region_server,
|
||||
|
||||
@@ -66,8 +66,8 @@ impl<'a> DatanodeServiceBuilder<'a> {
|
||||
let handlers = ServerHandlers::default();
|
||||
|
||||
if let Some(grpc_server) = self.grpc_server.take() {
|
||||
let addr: SocketAddr = self.opts.grpc.addr.parse().context(ParseAddrSnafu {
|
||||
addr: &self.opts.grpc.addr,
|
||||
let addr: SocketAddr = self.opts.grpc.bind_addr.parse().context(ParseAddrSnafu {
|
||||
addr: &self.opts.grpc.bind_addr,
|
||||
})?;
|
||||
let handler: ServerHandler = (Box::new(grpc_server), addr);
|
||||
handlers.insert(handler).await;
|
||||
|
||||
@@ -77,27 +77,32 @@ impl BinaryVector {
|
||||
.unwrap()
|
||||
.iter()
|
||||
{
|
||||
let v = if let Some(binary) = binary {
|
||||
let bytes_size = dim as usize * std::mem::size_of::<f32>();
|
||||
if let Ok(s) = String::from_utf8(binary.to_vec()) {
|
||||
let v = parse_string_to_vector_type_value(&s, Some(dim))?;
|
||||
Some(v)
|
||||
} else if binary.len() == dim as usize * std::mem::size_of::<f32>() {
|
||||
Some(binary.to_vec())
|
||||
} else {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!(
|
||||
"Unexpected bytes size for vector value, expected {}, got {}",
|
||||
bytes_size,
|
||||
binary.len()
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
} else {
|
||||
None
|
||||
let Some(binary) = binary else {
|
||||
vector.push(None);
|
||||
continue;
|
||||
};
|
||||
vector.push(v);
|
||||
|
||||
if let Ok(s) = String::from_utf8(binary.to_vec()) {
|
||||
if let Ok(v) = parse_string_to_vector_type_value(&s, Some(dim)) {
|
||||
vector.push(Some(v));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let expected_bytes_size = dim as usize * std::mem::size_of::<f32>();
|
||||
if binary.len() == expected_bytes_size {
|
||||
vector.push(Some(binary.to_vec()));
|
||||
continue;
|
||||
} else {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!(
|
||||
"Unexpected bytes size for vector value, expected {}, got {}",
|
||||
expected_bytes_size,
|
||||
binary.len()
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
Ok(BinaryVector::from(vector))
|
||||
}
|
||||
|
||||
@@ -387,6 +387,43 @@ impl Decimal128VectorBuilder {
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(Decimal128Array, Decimal128Vector);
|
||||
|
||||
pub(crate) fn replicate_decimal128(
|
||||
vector: &Decimal128Vector,
|
||||
offsets: &[usize],
|
||||
) -> Decimal128Vector {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.get_slice(0, 0);
|
||||
}
|
||||
|
||||
// Safety: safe to unwrap because we the vector ensures precision and scale are valid.
|
||||
let mut builder = Decimal128VectorBuilder::with_capacity(*offsets.last().unwrap())
|
||||
.with_precision_and_scale(vector.precision(), vector.scale())
|
||||
.unwrap();
|
||||
|
||||
let mut previous_offset = 0;
|
||||
|
||||
for (offset, value) in offsets.iter().zip(vector.array.iter()) {
|
||||
let repeat_times = *offset - previous_offset;
|
||||
match value {
|
||||
Some(data) => {
|
||||
unsafe {
|
||||
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
|
||||
builder
|
||||
.mutable_array
|
||||
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
builder.mutable_array.append_nulls(repeat_times);
|
||||
}
|
||||
}
|
||||
previous_offset = *offset;
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow_array::Decimal128Array;
|
||||
|
||||
@@ -114,13 +114,30 @@ macro_rules! impl_scalar_vector_op {
|
||||
)+};
|
||||
}
|
||||
|
||||
impl_scalar_vector_op!(
|
||||
BinaryVector,
|
||||
BooleanVector,
|
||||
ListVector,
|
||||
StringVector,
|
||||
Decimal128Vector
|
||||
);
|
||||
impl_scalar_vector_op!(BinaryVector, BooleanVector, ListVector, StringVector);
|
||||
|
||||
impl VectorOp for Decimal128Vector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
std::sync::Arc::new(replicate::replicate_decimal128(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<Decimal128Vector>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, Decimal128Vector, filter)
|
||||
}
|
||||
|
||||
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
|
||||
cast::cast_non_constant!(self, to_type)
|
||||
}
|
||||
|
||||
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
|
||||
take::take_indices!(self, Decimal128Vector, indices)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use crate::prelude::*;
|
||||
pub(crate) use crate::vectors::decimal::replicate_decimal128;
|
||||
pub(crate) use crate::vectors::null::replicate_null;
|
||||
pub(crate) use crate::vectors::primitive::replicate_primitive;
|
||||
|
||||
@@ -45,7 +46,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{Int32Vector, NullVector, StringVector, VectorOp};
|
||||
use crate::vectors::{Decimal128Vector, Int32Vector, NullVector, StringVector, VectorOp};
|
||||
|
||||
#[test]
|
||||
fn test_replicate_primitive() {
|
||||
@@ -167,4 +168,23 @@ mod tests {
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
impl_replicate_timestamp_test!(Nanosecond);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_decimal() {
|
||||
let data = vec![100];
|
||||
// create a decimal vector
|
||||
let v = Decimal128Vector::from_values(data.clone())
|
||||
.with_precision_and_scale(10, 2)
|
||||
.unwrap();
|
||||
let offsets = [5];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(5, v.len());
|
||||
|
||||
let expect: VectorRef = Arc::new(
|
||||
Decimal128Vector::from_values(vec![100; 5])
|
||||
.with_precision_and_scale(10, 2)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,10 +80,12 @@ impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_vec(array: Vec<T::Native>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(array),
|
||||
}
|
||||
pub fn from_vec(vector: Vec<T::Native>) -> Self {
|
||||
let mutable_buffer = arrow::buffer::MutableBuffer::from(vector);
|
||||
let mut primitive_builder =
|
||||
PrimitiveBuilder::<T::ArrowPrimitive>::new_from_buffer(mutable_buffer, None);
|
||||
let array = primitive_builder.finish();
|
||||
Self { array }
|
||||
}
|
||||
|
||||
pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
|
||||
|
||||
@@ -121,7 +121,7 @@ impl Default for FlownodeOptions {
|
||||
cluster_id: None,
|
||||
node_id: None,
|
||||
flow: FlowConfig::default(),
|
||||
grpc: GrpcOptions::default().with_addr("127.0.0.1:3004"),
|
||||
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3004"),
|
||||
http: HttpOptions::default(),
|
||||
meta_client: None,
|
||||
logging: LoggingOptions::default(),
|
||||
|
||||
@@ -83,7 +83,7 @@ impl HeartbeatTask {
|
||||
) -> Self {
|
||||
Self {
|
||||
node_id: opts.node_id.unwrap_or(0),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval,
|
||||
retry_interval: heartbeat_opts.retry_interval,
|
||||
|
||||
@@ -334,7 +334,7 @@ impl FlownodeBuilder {
|
||||
|
||||
let heartbeat_task = self.heartbeat_task;
|
||||
|
||||
let addr = self.opts.grpc.addr;
|
||||
let addr = self.opts.grpc.bind_addr;
|
||||
let instance = FlownodeInstance {
|
||||
server,
|
||||
addr: addr.parse().context(ParseAddrSnafu { addr })?,
|
||||
|
||||
@@ -51,7 +51,6 @@ prometheus.workspace = true
|
||||
promql-parser.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
raft-engine.workspace = true
|
||||
serde.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
|
||||
@@ -56,7 +56,7 @@ impl HeartbeatTask {
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
) -> Self {
|
||||
HeartbeatTask {
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval.as_millis() as u64,
|
||||
retry_interval: heartbeat_opts.retry_interval.as_millis() as u64,
|
||||
|
||||
@@ -40,7 +40,7 @@ use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
use common_procedure::options::ProcedureConfig;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_query::Output;
|
||||
use common_telemetry::{debug, error, tracing};
|
||||
use common_telemetry::{debug, error, info, tracing};
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use log_store::raft_engine::RaftEngineBackend;
|
||||
use operator::delete::DeleterRef;
|
||||
@@ -55,7 +55,6 @@ use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
use servers::error as server_error;
|
||||
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
@@ -134,19 +133,15 @@ impl Instance {
|
||||
kv_backend_config: KvBackendConfig,
|
||||
procedure_config: ProcedureConfig,
|
||||
) -> Result<(KvBackendRef, ProcedureManagerRef)> {
|
||||
let kv_backend = Arc::new(
|
||||
RaftEngineBackend::try_open_with_cfg(Config {
|
||||
dir,
|
||||
purge_threshold: ReadableSize(kv_backend_config.purge_threshold.0),
|
||||
recovery_mode: RecoveryMode::TolerateTailCorruption,
|
||||
batch_compression_threshold: ReadableSize::kb(8),
|
||||
target_file_size: ReadableSize(kv_backend_config.file_size.0),
|
||||
..Default::default()
|
||||
})
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OpenRaftEngineBackendSnafu)?,
|
||||
info!(
|
||||
"Creating metadata kvbackend with config: {:?}",
|
||||
kv_backend_config
|
||||
);
|
||||
let kv_backend = RaftEngineBackend::try_open_with_cfg(dir, &kv_backend_config)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OpenRaftEngineBackendSnafu)?;
|
||||
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
|
||||
|
||||
let manager_config = ManagerConfig {
|
||||
|
||||
@@ -20,11 +20,11 @@ use common_telemetry::tracing;
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use pipeline::PipelineWay;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use servers::error::{self, AuthSnafu, InFlightWriteBytesExceededSnafu, Result as ServerResult};
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
use servers::query_handler::OpenTelemetryProtocolHandler;
|
||||
use servers::query_handler::{OpenTelemetryProtocolHandler, PipelineHandlerRef};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -112,8 +112,10 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn logs(
|
||||
&self,
|
||||
pipeline_handler: PipelineHandlerRef,
|
||||
request: ExportLogsServiceRequest,
|
||||
pipeline: PipelineWay,
|
||||
pipeline_params: GreptimePipelineParams,
|
||||
table_name: String,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
@@ -128,7 +130,15 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(request, pipeline, table_name)?;
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(
|
||||
request,
|
||||
pipeline,
|
||||
pipeline_params,
|
||||
table_name,
|
||||
&ctx,
|
||||
pipeline_handler,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
let result = limiter.limit_row_inserts(&requests);
|
||||
|
||||
@@ -193,7 +193,7 @@ where
|
||||
|
||||
{
|
||||
// Always init GRPC server
|
||||
let grpc_addr = parse_addr(&opts.grpc.addr)?;
|
||||
let grpc_addr = parse_addr(&opts.grpc.bind_addr)?;
|
||||
let grpc_server = self.build_grpc_server(&opts)?;
|
||||
handlers.insert((Box::new(grpc_server), grpc_addr)).await;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
|
||||
@@ -17,8 +17,9 @@
|
||||
use std::any::Any;
|
||||
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||
use std::path::Path;
|
||||
use std::sync::RwLock;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_config::KvBackendConfig;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::error as meta_error;
|
||||
use common_meta::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
|
||||
@@ -30,16 +31,19 @@ use common_meta::rpc::store::{
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::util::get_next_prefix_key;
|
||||
use raft_engine::{Config, Engine, LogBatch};
|
||||
use common_runtime::RepeatedTask;
|
||||
use raft_engine::{Config, Engine, LogBatch, ReadableSize, RecoveryMode};
|
||||
use snafu::{IntoError, ResultExt};
|
||||
|
||||
use crate::error::{self, IoSnafu, RaftEngineSnafu};
|
||||
use crate::error::{self, Error, IoSnafu, RaftEngineSnafu, StartGcTaskSnafu};
|
||||
use crate::raft_engine::log_store::PurgeExpiredFilesFunction;
|
||||
|
||||
pub(crate) const SYSTEM_NAMESPACE: u64 = 0;
|
||||
|
||||
/// RaftEngine based [KvBackend] implementation.
|
||||
pub struct RaftEngineBackend {
|
||||
engine: RwLock<Engine>,
|
||||
engine: RwLock<Arc<Engine>>,
|
||||
_gc_task: RepeatedTask<Error>,
|
||||
}
|
||||
|
||||
fn ensure_dir(dir: &str) -> error::Result<()> {
|
||||
@@ -65,15 +69,34 @@ fn ensure_dir(dir: &str) -> error::Result<()> {
|
||||
}
|
||||
|
||||
impl RaftEngineBackend {
|
||||
pub fn try_open_with_cfg(config: Config) -> error::Result<Self> {
|
||||
ensure_dir(&config.dir)?;
|
||||
if let Some(spill_dir) = &config.spill_dir {
|
||||
pub fn try_open_with_cfg(dir: String, config: &KvBackendConfig) -> error::Result<Self> {
|
||||
let cfg = Config {
|
||||
dir: dir.to_string(),
|
||||
purge_threshold: ReadableSize(config.purge_threshold.0),
|
||||
recovery_mode: RecoveryMode::TolerateTailCorruption,
|
||||
batch_compression_threshold: ReadableSize::kb(8),
|
||||
target_file_size: ReadableSize(config.file_size.0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
ensure_dir(&dir)?;
|
||||
if let Some(spill_dir) = &cfg.spill_dir {
|
||||
ensure_dir(spill_dir)?;
|
||||
}
|
||||
|
||||
let engine = Engine::open(config).context(RaftEngineSnafu)?;
|
||||
let engine = Arc::new(Engine::open(cfg).context(RaftEngineSnafu)?);
|
||||
let gc_task = RepeatedTask::new(
|
||||
config.purge_interval,
|
||||
Box::new(PurgeExpiredFilesFunction {
|
||||
engine: engine.clone(),
|
||||
}),
|
||||
);
|
||||
gc_task
|
||||
.start(common_runtime::global_runtime())
|
||||
.context(StartGcTaskSnafu)?;
|
||||
Ok(Self {
|
||||
engine: RwLock::new(engine),
|
||||
_gc_task: gc_task,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -398,21 +421,11 @@ mod tests {
|
||||
};
|
||||
use common_meta::rpc::store::{CompareAndPutRequest, CompareAndPutResponse};
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn build_kv_backend(dir: String) -> RaftEngineBackend {
|
||||
let config = Config {
|
||||
dir,
|
||||
spill_dir: None,
|
||||
recovery_mode: RecoveryMode::AbsoluteConsistency,
|
||||
target_file_size: ReadableSize::mb(4),
|
||||
purge_threshold: ReadableSize::mb(16),
|
||||
..Default::default()
|
||||
};
|
||||
let engine = RwLock::new(Engine::open(config).unwrap());
|
||||
RaftEngineBackend { engine }
|
||||
RaftEngineBackend::try_open_with_cfg(dir, &KvBackendConfig::default()).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -50,7 +50,7 @@ pub struct RaftEngineLogStore {
|
||||
}
|
||||
|
||||
pub struct PurgeExpiredFilesFunction {
|
||||
engine: Arc<Engine>,
|
||||
pub engine: Arc<Engine>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -203,7 +203,7 @@ impl Configurable for MetasrvOptions {
|
||||
}
|
||||
|
||||
impl MetasrvOptions {
|
||||
/// Detect server address if `auto_server_addr` is true.
|
||||
/// Detect server address.
|
||||
#[cfg(not(target_os = "android"))]
|
||||
pub fn detect_server_addr(&mut self) {
|
||||
if self.server_addr.is_empty() {
|
||||
|
||||
@@ -163,8 +163,18 @@ impl RegionEngine for MetricEngine {
|
||||
}
|
||||
}
|
||||
RegionRequest::Flush(req) => self.inner.flush_region(region_id, req).await,
|
||||
RegionRequest::Delete(_) | RegionRequest::Truncate(_) => {
|
||||
UnsupportedRegionRequestSnafu { request }.fail()
|
||||
RegionRequest::Truncate(_) => UnsupportedRegionRequestSnafu { request }.fail(),
|
||||
RegionRequest::Delete(_) => {
|
||||
if self.inner.is_physical_region(region_id) {
|
||||
self.inner
|
||||
.mito
|
||||
.handle_request(region_id, request)
|
||||
.await
|
||||
.context(error::MitoDeleteOperationSnafu)
|
||||
.map(|response| response.affected_rows)
|
||||
} else {
|
||||
UnsupportedRegionRequestSnafu { request }.fail()
|
||||
}
|
||||
}
|
||||
RegionRequest::Catchup(req) => self.inner.catchup_region(region_id, req).await,
|
||||
};
|
||||
|
||||
@@ -125,6 +125,12 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Mito delete operation fails"))]
|
||||
MitoDeleteOperation {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Mito catchup operation fails"))]
|
||||
MitoCatchupOperation {
|
||||
@@ -288,7 +294,8 @@ impl ErrorExt for Error {
|
||||
| MitoReadOperation { source, .. }
|
||||
| MitoWriteOperation { source, .. }
|
||||
| MitoCatchupOperation { source, .. }
|
||||
| MitoFlushOperation { source, .. } => source.status_code(),
|
||||
| MitoFlushOperation { source, .. }
|
||||
| MitoDeleteOperation { source, .. } => source.status_code(),
|
||||
|
||||
EncodePrimaryKey { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -17,10 +17,12 @@ use std::sync::Arc;
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, with_instrument_layers};
|
||||
use object_store::ObjectStore;
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::SequenceNumber;
|
||||
use store_api::storage::{RegionId, SequenceNumber};
|
||||
|
||||
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
|
||||
use crate::cache::write_cache::SstUploadRequest;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::config::{BloomFilterConfig, FulltextIndexConfig, InvertedIndexConfig};
|
||||
@@ -30,13 +32,15 @@ use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
use crate::sst::index::IndexerBuilder;
|
||||
use crate::sst::index::IndexerBuilderImpl;
|
||||
use crate::sst::location;
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::sst::parquet::writer::ParquetWriter;
|
||||
use crate::sst::parquet::{SstInfo, WriteOptions};
|
||||
|
||||
pub type AccessLayerRef = Arc<AccessLayer>;
|
||||
/// SST write results.
|
||||
pub type SstInfoArray = SmallVec<[SstInfo; 2]>;
|
||||
|
||||
/// A layer to access SST files under the same directory.
|
||||
pub struct AccessLayer {
|
||||
@@ -121,11 +125,8 @@ impl AccessLayer {
|
||||
&self,
|
||||
request: SstWriteRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
let file_path = location::sst_file_path(&self.region_dir, request.file_id);
|
||||
let index_file_path = location::index_file_path(&self.region_dir, request.file_id);
|
||||
) -> Result<SstInfoArray> {
|
||||
let region_id = request.metadata.region_id;
|
||||
let file_id = request.file_id;
|
||||
let cache_manager = request.cache_manager.clone();
|
||||
|
||||
let sst_info = if let Some(write_cache) = cache_manager.write_cache() {
|
||||
@@ -134,8 +135,9 @@ impl AccessLayer {
|
||||
.write_and_upload_sst(
|
||||
request,
|
||||
SstUploadRequest {
|
||||
upload_path: file_path,
|
||||
index_upload_path: index_file_path,
|
||||
dest_path_provider: RegionFilePathFactory {
|
||||
region_dir: self.region_dir.clone(),
|
||||
},
|
||||
remote_store: self.object_store.clone(),
|
||||
},
|
||||
write_opts,
|
||||
@@ -144,11 +146,9 @@ impl AccessLayer {
|
||||
} else {
|
||||
// Write cache is disabled.
|
||||
let store = self.object_store.clone();
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer_builder = IndexerBuilderImpl {
|
||||
op_type: request.op_type,
|
||||
file_id,
|
||||
file_path: index_file_path,
|
||||
metadata: &request.metadata,
|
||||
metadata: request.metadata.clone(),
|
||||
row_group_size: write_opts.row_group_size,
|
||||
puffin_manager: self.puffin_manager_factory.build(store),
|
||||
intermediate_manager: self.intermediate_manager.clone(),
|
||||
@@ -156,24 +156,31 @@ impl AccessLayer {
|
||||
inverted_index_config: request.inverted_index_config,
|
||||
fulltext_index_config: request.fulltext_index_config,
|
||||
bloom_filter_index_config: request.bloom_filter_index_config,
|
||||
}
|
||||
.build()
|
||||
.await;
|
||||
};
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
self.object_store.clone(),
|
||||
file_path,
|
||||
request.metadata,
|
||||
indexer,
|
||||
);
|
||||
indexer_builder,
|
||||
RegionFilePathFactory {
|
||||
region_dir: self.region_dir.clone(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
writer
|
||||
.write_all(request.source, request.max_sequence, write_opts)
|
||||
.await?
|
||||
};
|
||||
|
||||
// Put parquet metadata to cache manager.
|
||||
if let Some(sst_info) = &sst_info {
|
||||
if let Some(parquet_metadata) = &sst_info.file_metadata {
|
||||
cache_manager.put_parquet_meta_data(region_id, file_id, parquet_metadata.clone())
|
||||
if !sst_info.is_empty() {
|
||||
for sst in &sst_info {
|
||||
if let Some(parquet_metadata) = &sst.file_metadata {
|
||||
cache_manager.put_parquet_meta_data(
|
||||
region_id,
|
||||
sst.file_id,
|
||||
parquet_metadata.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,7 +198,6 @@ pub(crate) enum OperationType {
|
||||
/// Contents to build a SST.
|
||||
pub(crate) struct SstWriteRequest {
|
||||
pub(crate) op_type: OperationType,
|
||||
pub(crate) file_id: FileId,
|
||||
pub(crate) metadata: RegionMetadataRef,
|
||||
pub(crate) source: Source,
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
@@ -229,3 +235,47 @@ async fn clean_dir(dir: &str) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Path provider for SST file and index file.
|
||||
pub trait FilePathProvider: Send + Sync {
|
||||
/// Creates index file path of given file id.
|
||||
fn build_index_file_path(&self, file_id: FileId) -> String;
|
||||
|
||||
/// Creates SST file path of given file id.
|
||||
fn build_sst_file_path(&self, file_id: FileId) -> String;
|
||||
}
|
||||
|
||||
/// Path provider that builds paths in local write cache.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct WriteCachePathProvider {
|
||||
pub(crate) region_id: RegionId,
|
||||
pub(crate) file_cache: FileCacheRef,
|
||||
}
|
||||
|
||||
impl FilePathProvider for WriteCachePathProvider {
|
||||
fn build_index_file_path(&self, file_id: FileId) -> String {
|
||||
let puffin_key = IndexKey::new(self.region_id, file_id, FileType::Puffin);
|
||||
self.file_cache.cache_file_path(puffin_key)
|
||||
}
|
||||
|
||||
fn build_sst_file_path(&self, file_id: FileId) -> String {
|
||||
let parquet_file_key = IndexKey::new(self.region_id, file_id, FileType::Parquet);
|
||||
self.file_cache.cache_file_path(parquet_file_key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Path provider that builds paths in region storage path.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct RegionFilePathFactory {
|
||||
pub(crate) region_dir: String,
|
||||
}
|
||||
|
||||
impl FilePathProvider for RegionFilePathFactory {
|
||||
fn build_index_file_path(&self, file_id: FileId) -> String {
|
||||
location::index_file_path(&self.region_dir, file_id)
|
||||
}
|
||||
|
||||
fn build_sst_file_path(&self, file_id: FileId) -> String {
|
||||
location::sst_file_path(&self.region_dir, file_id)
|
||||
}
|
||||
}
|
||||
|
||||
113
src/mito2/src/cache/write_cache.rs
vendored
113
src/mito2/src/cache/write_cache.rs
vendored
@@ -23,7 +23,10 @@ use futures::AsyncWriteExt;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::access_layer::{new_fs_cache_store, SstWriteRequest};
|
||||
use crate::access_layer::{
|
||||
new_fs_cache_store, FilePathProvider, RegionFilePathFactory, SstInfoArray, SstWriteRequest,
|
||||
WriteCachePathProvider,
|
||||
};
|
||||
use crate::cache::file_cache::{FileCache, FileCacheRef, FileType, IndexKey, IndexValue};
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics::{
|
||||
@@ -32,9 +35,9 @@ use crate::metrics::{
|
||||
};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
use crate::sst::index::IndexerBuilder;
|
||||
use crate::sst::index::IndexerBuilderImpl;
|
||||
use crate::sst::parquet::writer::ParquetWriter;
|
||||
use crate::sst::parquet::{SstInfo, WriteOptions};
|
||||
use crate::sst::parquet::WriteOptions;
|
||||
use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY};
|
||||
|
||||
/// A cache for uploading files to remote object stores.
|
||||
@@ -103,22 +106,21 @@ impl WriteCache {
|
||||
write_request: SstWriteRequest,
|
||||
upload_request: SstUploadRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
) -> Result<SstInfoArray> {
|
||||
let timer = FLUSH_ELAPSED
|
||||
.with_label_values(&["write_sst"])
|
||||
.start_timer();
|
||||
|
||||
let region_id = write_request.metadata.region_id;
|
||||
let file_id = write_request.file_id;
|
||||
let parquet_key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
let puffin_key = IndexKey::new(region_id, file_id, FileType::Puffin);
|
||||
|
||||
let store = self.file_cache.local_store();
|
||||
let indexer = IndexerBuilder {
|
||||
let path_provider = WriteCachePathProvider {
|
||||
file_cache: self.file_cache.clone(),
|
||||
region_id,
|
||||
};
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: write_request.op_type,
|
||||
file_id,
|
||||
file_path: self.file_cache.cache_file_path(puffin_key),
|
||||
metadata: &write_request.metadata,
|
||||
metadata: write_request.metadata.clone(),
|
||||
row_group_size: write_opts.row_group_size,
|
||||
puffin_manager: self.puffin_manager_factory.build(store),
|
||||
intermediate_manager: self.intermediate_manager.clone(),
|
||||
@@ -126,17 +128,16 @@ impl WriteCache {
|
||||
inverted_index_config: write_request.inverted_index_config,
|
||||
fulltext_index_config: write_request.fulltext_index_config,
|
||||
bloom_filter_index_config: write_request.bloom_filter_index_config,
|
||||
}
|
||||
.build()
|
||||
.await;
|
||||
};
|
||||
|
||||
// Write to FileCache.
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
self.file_cache.local_store(),
|
||||
self.file_cache.cache_file_path(parquet_key),
|
||||
write_request.metadata,
|
||||
indexer,
|
||||
);
|
||||
path_provider,
|
||||
)
|
||||
.await;
|
||||
|
||||
let sst_info = writer
|
||||
.write_all(write_request.source, write_request.max_sequence, write_opts)
|
||||
@@ -145,22 +146,29 @@ impl WriteCache {
|
||||
timer.stop_and_record();
|
||||
|
||||
// Upload sst file to remote object store.
|
||||
let Some(sst_info) = sst_info else {
|
||||
// No data need to upload.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let parquet_path = &upload_request.upload_path;
|
||||
let remote_store = &upload_request.remote_store;
|
||||
self.upload(parquet_key, parquet_path, remote_store).await?;
|
||||
|
||||
if sst_info.index_metadata.file_size > 0 {
|
||||
let puffin_key = IndexKey::new(region_id, file_id, FileType::Puffin);
|
||||
let puffin_path = &upload_request.index_upload_path;
|
||||
self.upload(puffin_key, puffin_path, remote_store).await?;
|
||||
if sst_info.is_empty() {
|
||||
return Ok(sst_info);
|
||||
}
|
||||
|
||||
Ok(Some(sst_info))
|
||||
let remote_store = &upload_request.remote_store;
|
||||
for sst in &sst_info {
|
||||
let parquet_key = IndexKey::new(region_id, sst.file_id, FileType::Parquet);
|
||||
let parquet_path = upload_request
|
||||
.dest_path_provider
|
||||
.build_sst_file_path(sst.file_id);
|
||||
self.upload(parquet_key, &parquet_path, remote_store)
|
||||
.await?;
|
||||
|
||||
if sst.index_metadata.file_size > 0 {
|
||||
let puffin_key = IndexKey::new(region_id, sst.file_id, FileType::Puffin);
|
||||
let puffin_path = &upload_request
|
||||
.dest_path_provider
|
||||
.build_index_file_path(sst.file_id);
|
||||
self.upload(puffin_key, puffin_path, remote_store).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(sst_info)
|
||||
}
|
||||
|
||||
/// Removes a file from the cache by `index_key`.
|
||||
@@ -319,10 +327,8 @@ impl WriteCache {
|
||||
|
||||
/// Request to write and upload a SST.
|
||||
pub struct SstUploadRequest {
|
||||
/// Path to upload the file.
|
||||
pub upload_path: String,
|
||||
/// Path to upload the index file.
|
||||
pub index_upload_path: String,
|
||||
/// Destination path provider of which SST files in write cache should be uploaded to.
|
||||
pub dest_path_provider: RegionFilePathFactory,
|
||||
/// Remote object store to upload.
|
||||
pub remote_store: ObjectStore,
|
||||
}
|
||||
@@ -336,11 +342,9 @@ mod tests {
|
||||
use crate::cache::test_util::new_fs_store;
|
||||
use crate::cache::{CacheManager, CacheStrategy};
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::location::{index_file_path, sst_file_path};
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::test_util::sst_util::{
|
||||
assert_parquet_metadata_eq, new_batch_by_range, new_source, sst_file_handle,
|
||||
assert_parquet_metadata_eq, new_batch_by_range, new_source, sst_file_handle_with_file_id,
|
||||
sst_region_metadata,
|
||||
};
|
||||
use crate::test_util::TestEnv;
|
||||
@@ -351,9 +355,9 @@ mod tests {
|
||||
// and now just use local file system to mock.
|
||||
let mut env = TestEnv::new();
|
||||
let mock_store = env.init_object_store_manager();
|
||||
let file_id = FileId::random();
|
||||
let upload_path = sst_file_path("test", file_id);
|
||||
let index_upload_path = index_file_path("test", file_id);
|
||||
let path_provider = RegionFilePathFactory {
|
||||
region_dir: "test".to_string(),
|
||||
};
|
||||
|
||||
let local_dir = create_temp_dir("");
|
||||
let local_store = new_fs_store(local_dir.path().to_str().unwrap());
|
||||
@@ -373,7 +377,6 @@ mod tests {
|
||||
|
||||
let write_request = SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
file_id,
|
||||
metadata,
|
||||
source,
|
||||
storage: None,
|
||||
@@ -386,8 +389,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let upload_request = SstUploadRequest {
|
||||
upload_path: upload_path.clone(),
|
||||
index_upload_path: index_upload_path.clone(),
|
||||
dest_path_provider: path_provider.clone(),
|
||||
remote_store: mock_store.clone(),
|
||||
};
|
||||
|
||||
@@ -397,18 +399,22 @@ mod tests {
|
||||
};
|
||||
|
||||
// Write to cache and upload sst to mock remote store
|
||||
write_cache
|
||||
let sst_info = write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0); //todo(hl): we assume it only creates one file.
|
||||
|
||||
let file_id = sst_info.file_id;
|
||||
let sst_upload_path = path_provider.build_sst_file_path(file_id);
|
||||
let index_upload_path = path_provider.build_index_file_path(file_id);
|
||||
|
||||
// Check write cache contains the key
|
||||
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
assert!(write_cache.file_cache.contains_key(&key));
|
||||
|
||||
// Check file data
|
||||
let remote_data = mock_store.read(&upload_path).await.unwrap();
|
||||
let remote_data = mock_store.read(&sst_upload_path).await.unwrap();
|
||||
let cache_data = local_store
|
||||
.read(&write_cache.file_cache.cache_file_path(key))
|
||||
.await
|
||||
@@ -436,6 +442,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_metadata_from_write_cache() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::new();
|
||||
let data_home = env.data_home().display().to_string();
|
||||
let mock_store = env.init_object_store_manager();
|
||||
@@ -456,8 +463,7 @@ mod tests {
|
||||
|
||||
// Create source
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_id = handle.file_id();
|
||||
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
new_batch_by_range(&["b", "f"], 0, 40),
|
||||
@@ -467,7 +473,6 @@ mod tests {
|
||||
// Write to local cache and upload sst to mock remote store
|
||||
let write_request = SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
file_id,
|
||||
metadata,
|
||||
source,
|
||||
storage: None,
|
||||
@@ -482,11 +487,10 @@ mod tests {
|
||||
row_group_size: 512,
|
||||
..Default::default()
|
||||
};
|
||||
let upload_path = sst_file_path(&data_home, file_id);
|
||||
let index_upload_path = index_file_path(&data_home, file_id);
|
||||
let upload_request = SstUploadRequest {
|
||||
upload_path: upload_path.clone(),
|
||||
index_upload_path: index_upload_path.clone(),
|
||||
dest_path_provider: RegionFilePathFactory {
|
||||
region_dir: data_home.clone(),
|
||||
},
|
||||
remote_store: mock_store.clone(),
|
||||
};
|
||||
|
||||
@@ -494,10 +498,11 @@ mod tests {
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
let write_parquet_metadata = sst_info.file_metadata.unwrap();
|
||||
|
||||
// Read metadata from write cache
|
||||
let handle = sst_file_handle_with_file_id(sst_info.file_id, 0, 1000);
|
||||
let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone())
|
||||
.cache(CacheStrategy::EnableAll(cache_manager.clone()));
|
||||
let reader = builder.build().await.unwrap();
|
||||
|
||||
@@ -68,7 +68,7 @@ use crate::schedule::remote_job_scheduler::{
|
||||
CompactionJob, DefaultNotifier, RemoteJob, RemoteJobSchedulerRef,
|
||||
};
|
||||
use crate::schedule::scheduler::SchedulerRef;
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta, Level};
|
||||
use crate::sst::file::{FileHandle, FileMeta, Level};
|
||||
use crate::sst::version::LevelMeta;
|
||||
use crate::worker::WorkerListener;
|
||||
|
||||
@@ -596,7 +596,6 @@ impl CompactionStatus {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompactionOutput {
|
||||
pub output_file_id: FileId,
|
||||
/// Compaction output file level.
|
||||
pub output_level: Level,
|
||||
/// Compaction input files.
|
||||
@@ -610,7 +609,6 @@ pub struct CompactionOutput {
|
||||
/// SerializedCompactionOutput is a serialized version of [CompactionOutput] by replacing [FileHandle] with [FileMeta].
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SerializedCompactionOutput {
|
||||
output_file_id: FileId,
|
||||
output_level: Level,
|
||||
inputs: Vec<FileMeta>,
|
||||
filter_deleted: bool,
|
||||
|
||||
@@ -20,6 +20,7 @@ use api::v1::region::compact_request;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{info, warn};
|
||||
use common_time::TimeToLive;
|
||||
use itertools::Itertools;
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -278,19 +279,6 @@ impl Compactor for DefaultCompactor {
|
||||
|
||||
for output in picker_output.outputs.drain(..) {
|
||||
compacted_inputs.extend(output.inputs.iter().map(|f| f.meta_ref().clone()));
|
||||
|
||||
info!(
|
||||
"Compaction region {} output [{}]-> {}",
|
||||
compaction_region.region_id,
|
||||
output
|
||||
.inputs
|
||||
.iter()
|
||||
.map(|f| f.file_id().to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(","),
|
||||
output.output_file_id
|
||||
);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
|
||||
..Default::default()
|
||||
@@ -299,7 +287,6 @@ impl Compactor for DefaultCompactor {
|
||||
let region_metadata = compaction_region.region_metadata.clone();
|
||||
let sst_layer = compaction_region.access_layer.clone();
|
||||
let region_id = compaction_region.region_id;
|
||||
let file_id = output.output_file_id;
|
||||
let cache_manager = compaction_region.cache_manager.clone();
|
||||
let storage = compaction_region.region_options.storage.clone();
|
||||
let index_options = compaction_region
|
||||
@@ -320,6 +307,11 @@ impl Compactor for DefaultCompactor {
|
||||
.max()
|
||||
.flatten();
|
||||
futs.push(async move {
|
||||
let input_file_names = output
|
||||
.inputs
|
||||
.iter()
|
||||
.map(|f| f.file_id().to_string())
|
||||
.join(",");
|
||||
let reader = CompactionSstReaderBuilder {
|
||||
metadata: region_metadata.clone(),
|
||||
sst_layer: sst_layer.clone(),
|
||||
@@ -332,11 +324,10 @@ impl Compactor for DefaultCompactor {
|
||||
}
|
||||
.build_sst_reader()
|
||||
.await?;
|
||||
let file_meta_opt = sst_layer
|
||||
let output_files = sst_layer
|
||||
.write_sst(
|
||||
SstWriteRequest {
|
||||
op_type: OperationType::Compact,
|
||||
file_id,
|
||||
metadata: region_metadata,
|
||||
source: Source::Reader(reader),
|
||||
cache_manager,
|
||||
@@ -350,9 +341,10 @@ impl Compactor for DefaultCompactor {
|
||||
&write_opts,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|sst_info| FileMeta {
|
||||
region_id,
|
||||
file_id,
|
||||
file_id: sst_info.file_id,
|
||||
time_range: sst_info.time_range,
|
||||
level: output.output_level,
|
||||
file_size: sst_info.file_size,
|
||||
@@ -361,8 +353,15 @@ impl Compactor for DefaultCompactor {
|
||||
num_rows: sst_info.num_rows as u64,
|
||||
num_row_groups: sst_info.num_row_groups,
|
||||
sequence: max_sequence,
|
||||
});
|
||||
Ok(file_meta_opt)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let output_file_names =
|
||||
output_files.iter().map(|f| f.file_id.to_string()).join(",");
|
||||
info!(
|
||||
"Region {} compaction inputs: [{}], outputs: [{}]",
|
||||
region_id, input_file_names, output_file_names
|
||||
);
|
||||
Ok(output_files)
|
||||
});
|
||||
}
|
||||
let mut output_files = Vec::with_capacity(futs.len());
|
||||
@@ -377,7 +376,7 @@ impl Compactor for DefaultCompactor {
|
||||
.await
|
||||
.context(JoinSnafu)?
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
.collect::<Result<Vec<Vec<_>>>>()?;
|
||||
output_files.extend(metas.into_iter().flatten());
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,6 @@ impl From<&PickerOutput> for SerializedPickerOutput {
|
||||
.outputs
|
||||
.iter()
|
||||
.map(|output| SerializedCompactionOutput {
|
||||
output_file_id: output.output_file_id,
|
||||
output_level: output.output_level,
|
||||
inputs: output.inputs.iter().map(|s| s.meta_ref().clone()).collect(),
|
||||
filter_deleted: output.filter_deleted,
|
||||
@@ -91,7 +90,6 @@ impl PickerOutput {
|
||||
.outputs
|
||||
.into_iter()
|
||||
.map(|output| CompactionOutput {
|
||||
output_file_id: output.output_file_id,
|
||||
output_level: output.output_level,
|
||||
inputs: output
|
||||
.inputs
|
||||
@@ -167,14 +165,12 @@ mod tests {
|
||||
let picker_output = PickerOutput {
|
||||
outputs: vec![
|
||||
CompactionOutput {
|
||||
output_file_id: FileId::random(),
|
||||
output_level: 0,
|
||||
inputs: inputs_file_handle.clone(),
|
||||
filter_deleted: false,
|
||||
output_time_range: None,
|
||||
},
|
||||
CompactionOutput {
|
||||
output_file_id: FileId::random(),
|
||||
output_level: 0,
|
||||
inputs: inputs_file_handle.clone(),
|
||||
filter_deleted: false,
|
||||
@@ -205,7 +201,6 @@ mod tests {
|
||||
.iter()
|
||||
.zip(picker_output_from_serialized.outputs.iter())
|
||||
.for_each(|(expected, actual)| {
|
||||
assert_eq!(expected.output_file_id, actual.output_file_id);
|
||||
assert_eq!(expected.output_level, actual.output_level);
|
||||
expected
|
||||
.inputs
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::compaction::compactor::CompactionRegion;
|
||||
use crate::compaction::picker::{Picker, PickerOutput};
|
||||
use crate::compaction::run::{find_sorted_runs, reduce_runs, Item};
|
||||
use crate::compaction::{get_expired_ssts, CompactionOutput};
|
||||
use crate::sst::file::{overlaps, FileHandle, FileId, Level};
|
||||
use crate::sst::file::{overlaps, FileHandle, Level};
|
||||
use crate::sst::version::LevelMeta;
|
||||
|
||||
const LEVEL_COMPACTED: Level = 1;
|
||||
@@ -134,7 +134,6 @@ impl TwcsPicker {
|
||||
for input in split_inputs {
|
||||
debug_assert!(input.len() > 1);
|
||||
output.push(CompactionOutput {
|
||||
output_file_id: FileId::random(),
|
||||
output_level: LEVEL_COMPACTED, // always compact to l1
|
||||
inputs: input,
|
||||
filter_deleted,
|
||||
@@ -373,7 +372,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::compaction::test_util::{new_file_handle, new_file_handles};
|
||||
use crate::sst::file::{FileMeta, Level};
|
||||
use crate::sst::file::{FileId, FileMeta, Level};
|
||||
use crate::test_util::NoopFilePurger;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::compaction::buckets::infer_time_bucket;
|
||||
use crate::compaction::compactor::{CompactionRegion, CompactionVersion};
|
||||
use crate::compaction::picker::{Picker, PickerOutput};
|
||||
use crate::compaction::{get_expired_ssts, CompactionOutput};
|
||||
use crate::sst::file::{FileHandle, FileId};
|
||||
use crate::sst::file::FileHandle;
|
||||
|
||||
/// Compaction picker that splits the time range of all involved files to windows, and merges
|
||||
/// the data segments intersects with those windows of files together so that the output files
|
||||
@@ -132,7 +132,6 @@ fn build_output(windows: BTreeMap<i64, (i64, Vec<FileHandle>)>) -> Vec<Compactio
|
||||
);
|
||||
|
||||
let output = CompactionOutput {
|
||||
output_file_id: FileId::random(),
|
||||
output_level: 1,
|
||||
inputs: files,
|
||||
filter_deleted: false,
|
||||
|
||||
@@ -416,7 +416,9 @@ impl EngineInner {
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
let (request, receiver) = WorkerRequest::try_from_region_request(region_id, request)?;
|
||||
let region_metadata = self.get_metadata(region_id).ok();
|
||||
let (request, receiver) =
|
||||
WorkerRequest::try_from_region_request(region_id, request, region_metadata)?;
|
||||
self.workers.submit_to_worker(region_id, request).await?;
|
||||
|
||||
receiver.await.context(RecvSnafu)?
|
||||
|
||||
@@ -45,7 +45,7 @@ use crate::request::{
|
||||
SenderWriteRequest, WorkerRequest,
|
||||
};
|
||||
use crate::schedule::scheduler::{Job, SchedulerRef};
|
||||
use crate::sst::file::{FileId, FileMeta};
|
||||
use crate::sst::file::FileMeta;
|
||||
use crate::sst::parquet::WriteOptions;
|
||||
use crate::worker::WorkerListener;
|
||||
|
||||
@@ -347,14 +347,12 @@ impl RegionFlushTask {
|
||||
}
|
||||
|
||||
let max_sequence = mem.stats().max_sequence();
|
||||
let file_id = FileId::random();
|
||||
let iter = mem.iter(None, None, None)?;
|
||||
let source = Source::Iter(iter);
|
||||
|
||||
// Flush to level 0.
|
||||
let write_request = SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
file_id,
|
||||
metadata: version.metadata.clone(),
|
||||
source,
|
||||
cache_manager: self.cache_manager.clone(),
|
||||
@@ -365,29 +363,31 @@ impl RegionFlushTask {
|
||||
fulltext_index_config: self.engine_config.fulltext_index.clone(),
|
||||
bloom_filter_index_config: self.engine_config.bloom_filter_index.clone(),
|
||||
};
|
||||
let Some(sst_info) = self
|
||||
|
||||
let ssts_written = self
|
||||
.access_layer
|
||||
.write_sst(write_request, &write_opts)
|
||||
.await?
|
||||
else {
|
||||
.await?;
|
||||
if ssts_written.is_empty() {
|
||||
// No data written.
|
||||
continue;
|
||||
};
|
||||
}
|
||||
|
||||
flushed_bytes += sst_info.file_size;
|
||||
let file_meta = FileMeta {
|
||||
region_id: self.region_id,
|
||||
file_id,
|
||||
time_range: sst_info.time_range,
|
||||
level: 0,
|
||||
file_size: sst_info.file_size,
|
||||
available_indexes: sst_info.index_metadata.build_available_indexes(),
|
||||
index_file_size: sst_info.index_metadata.file_size,
|
||||
num_rows: sst_info.num_rows as u64,
|
||||
num_row_groups: sst_info.num_row_groups,
|
||||
sequence: NonZeroU64::new(max_sequence),
|
||||
};
|
||||
file_metas.push(file_meta);
|
||||
file_metas.extend(ssts_written.into_iter().map(|sst_info| {
|
||||
flushed_bytes += sst_info.file_size;
|
||||
FileMeta {
|
||||
region_id: self.region_id,
|
||||
file_id: sst_info.file_id,
|
||||
time_range: sst_info.time_range,
|
||||
level: 0,
|
||||
file_size: sst_info.file_size,
|
||||
available_indexes: sst_info.index_metadata.build_available_indexes(),
|
||||
index_file_size: sst_info.index_metadata.file_size,
|
||||
num_rows: sst_info.num_rows as u64,
|
||||
num_row_groups: sst_info.num_row_groups,
|
||||
sequence: NonZeroU64::new(max_sequence),
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
if !file_metas.is_empty() {
|
||||
|
||||
@@ -26,10 +26,12 @@ use common_time::Timestamp;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector, VectorRef};
|
||||
use datatypes::prelude::{MutableVector, Vector, VectorRef};
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::{Value, ValueRef};
|
||||
use datatypes::vectors::{
|
||||
Helper, UInt64Vector, UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
|
||||
Helper, TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
|
||||
TimestampSecondVector, UInt64Vector, UInt8Vector,
|
||||
};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
@@ -54,7 +56,10 @@ use crate::region::options::MergeMode;
|
||||
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
|
||||
|
||||
/// Initial vector builder capacity.
|
||||
const INITIAL_BUILDER_CAPACITY: usize = 0;
|
||||
const INITIAL_BUILDER_CAPACITY: usize = 16;
|
||||
|
||||
/// Vector builder capacity.
|
||||
const BUILDER_CAPACITY: usize = 512;
|
||||
|
||||
/// Builder to build [TimeSeriesMemtable].
|
||||
#[derive(Debug, Default)]
|
||||
@@ -154,9 +159,7 @@ impl TimeSeriesMemtable {
|
||||
);
|
||||
|
||||
let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
|
||||
let fields = kv.fields().collect::<Vec<_>>();
|
||||
|
||||
stats.value_bytes += fields.iter().map(|v| v.data_size()).sum::<usize>();
|
||||
let (series, series_allocated) = self.series_set.get_or_add_series(primary_key_encoded);
|
||||
stats.key_bytes += series_allocated;
|
||||
|
||||
@@ -166,7 +169,8 @@ impl TimeSeriesMemtable {
|
||||
stats.max_ts = stats.max_ts.max(ts);
|
||||
|
||||
let mut guard = series.write().unwrap();
|
||||
guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), fields);
|
||||
let size = guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), kv.fields());
|
||||
stats.value_bytes += size;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -617,6 +621,7 @@ struct Series {
|
||||
pk_cache: Option<Vec<Value>>,
|
||||
active: ValueBuilder,
|
||||
frozen: Vec<Values>,
|
||||
region_metadata: RegionMetadataRef,
|
||||
}
|
||||
|
||||
impl Series {
|
||||
@@ -625,12 +630,24 @@ impl Series {
|
||||
pk_cache: None,
|
||||
active: ValueBuilder::new(region_metadata, INITIAL_BUILDER_CAPACITY),
|
||||
frozen: vec![],
|
||||
region_metadata: region_metadata.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes a row of values into Series.
|
||||
fn push(&mut self, ts: ValueRef, sequence: u64, op_type: OpType, values: Vec<ValueRef>) {
|
||||
self.active.push(ts, sequence, op_type as u8, values);
|
||||
/// Pushes a row of values into Series. Return the size of values.
|
||||
fn push<'a>(
|
||||
&mut self,
|
||||
ts: ValueRef<'a>,
|
||||
sequence: u64,
|
||||
op_type: OpType,
|
||||
values: impl Iterator<Item = ValueRef<'a>>,
|
||||
) -> usize {
|
||||
// + 10 to avoid potential reallocation.
|
||||
if self.active.len() + 10 > BUILDER_CAPACITY {
|
||||
let region_metadata = self.region_metadata.clone();
|
||||
self.freeze(®ion_metadata);
|
||||
}
|
||||
self.active.push(ts, sequence, op_type as u8, values)
|
||||
}
|
||||
|
||||
fn update_pk_cache(&mut self, pk_values: Vec<Value>) {
|
||||
@@ -691,22 +708,23 @@ impl Series {
|
||||
|
||||
/// `ValueBuilder` holds all the vector builders for field columns.
|
||||
struct ValueBuilder {
|
||||
timestamp: Box<dyn MutableVector>,
|
||||
sequence: UInt64VectorBuilder,
|
||||
op_type: UInt8VectorBuilder,
|
||||
timestamp: Vec<i64>,
|
||||
timestamp_type: ConcreteDataType,
|
||||
sequence: Vec<u64>,
|
||||
op_type: Vec<u8>,
|
||||
fields: Vec<Option<Box<dyn MutableVector>>>,
|
||||
field_types: Vec<ConcreteDataType>,
|
||||
}
|
||||
|
||||
impl ValueBuilder {
|
||||
fn new(region_metadata: &RegionMetadataRef, capacity: usize) -> Self {
|
||||
let timestamp = region_metadata
|
||||
let timestamp_type = region_metadata
|
||||
.time_index_column()
|
||||
.column_schema
|
||||
.data_type
|
||||
.create_mutable_vector(capacity);
|
||||
let sequence = UInt64VectorBuilder::with_capacity(capacity);
|
||||
let op_type = UInt8VectorBuilder::with_capacity(capacity);
|
||||
.clone();
|
||||
let sequence = Vec::with_capacity(capacity);
|
||||
let op_type = Vec::with_capacity(capacity);
|
||||
|
||||
let field_types = region_metadata
|
||||
.field_columns()
|
||||
@@ -715,7 +733,8 @@ impl ValueBuilder {
|
||||
let fields = (0..field_types.len()).map(|_| None).collect();
|
||||
|
||||
Self {
|
||||
timestamp,
|
||||
timestamp: Vec::with_capacity(capacity),
|
||||
timestamp_type,
|
||||
sequence,
|
||||
op_type,
|
||||
fields,
|
||||
@@ -725,26 +744,45 @@ impl ValueBuilder {
|
||||
|
||||
/// Pushes a new row to `ValueBuilder`.
|
||||
/// We don't need primary keys since they've already be encoded.
|
||||
fn push(&mut self, ts: ValueRef, sequence: u64, op_type: u8, fields: Vec<ValueRef>) {
|
||||
debug_assert_eq!(fields.len(), self.fields.len());
|
||||
self.timestamp.push_value_ref(ts);
|
||||
self.sequence.push_value_ref(ValueRef::UInt64(sequence));
|
||||
self.op_type.push_value_ref(ValueRef::UInt8(op_type));
|
||||
/// Returns the size of field values.
|
||||
///
|
||||
/// In this method, we don't check the data type of the value, because it is already checked in the caller.
|
||||
fn push<'a>(
|
||||
&mut self,
|
||||
ts: ValueRef,
|
||||
sequence: u64,
|
||||
op_type: u8,
|
||||
fields: impl Iterator<Item = ValueRef<'a>>,
|
||||
) -> usize {
|
||||
#[cfg(debug_assertions)]
|
||||
let fields = {
|
||||
let field_vec = fields.collect::<Vec<_>>();
|
||||
debug_assert_eq!(field_vec.len(), self.fields.len());
|
||||
field_vec.into_iter()
|
||||
};
|
||||
|
||||
self.timestamp
|
||||
.push(ts.as_timestamp().unwrap().unwrap().value());
|
||||
self.sequence.push(sequence);
|
||||
self.op_type.push(op_type);
|
||||
let num_rows = self.timestamp.len();
|
||||
for (idx, field_value) in fields.into_iter().enumerate() {
|
||||
let mut size = 0;
|
||||
for (idx, field_value) in fields.enumerate() {
|
||||
size += field_value.data_size();
|
||||
if !field_value.is_null() || self.fields[idx].is_some() {
|
||||
self.fields[idx]
|
||||
.get_or_insert_with(|| {
|
||||
// lazy initialize on first non-null value
|
||||
let mut mutable_vector =
|
||||
self.field_types[idx].create_mutable_vector(num_rows);
|
||||
// fill previous rows with nulls
|
||||
mutable_vector.push_nulls(num_rows - 1);
|
||||
mutable_vector
|
||||
})
|
||||
.push_value_ref(field_value);
|
||||
if let Some(field) = self.fields[idx].as_mut() {
|
||||
let _ = field.try_push_value_ref(field_value);
|
||||
} else {
|
||||
let mut mutable_vector = self.field_types[idx]
|
||||
.create_mutable_vector(num_rows.max(INITIAL_BUILDER_CAPACITY));
|
||||
mutable_vector.push_nulls(num_rows - 1);
|
||||
let _ = mutable_vector.try_push_value_ref(field_value);
|
||||
self.fields[idx] = Some(mutable_vector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
/// Returns the length of [ValueBuilder]
|
||||
@@ -844,9 +882,23 @@ impl From<ValueBuilder> for Values {
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let sequence = Arc::new(value.sequence.finish());
|
||||
let op_type = Arc::new(value.op_type.finish());
|
||||
let timestamp = value.timestamp.to_vector();
|
||||
let sequence = Arc::new(UInt64Vector::from_vec(value.sequence));
|
||||
let op_type = Arc::new(UInt8Vector::from_vec(value.op_type));
|
||||
let timestamp: VectorRef = match value.timestamp_type {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)) => {
|
||||
Arc::new(TimestampSecondVector::from_vec(value.timestamp))
|
||||
}
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => {
|
||||
Arc::new(TimestampMillisecondVector::from_vec(value.timestamp))
|
||||
}
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => {
|
||||
Arc::new(TimestampMicrosecondVector::from_vec(value.timestamp))
|
||||
}
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => {
|
||||
Arc::new(TimestampNanosecondVector::from_vec(value.timestamp))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
debug_assert_eq!(timestamp.len(), sequence.len());
|
||||
@@ -951,8 +1003,8 @@ mod tests {
|
||||
ValueRef::Timestamp(Timestamp::new_millisecond(val))
|
||||
}
|
||||
|
||||
fn field_value_ref(v0: i64, v1: f64) -> Vec<ValueRef<'static>> {
|
||||
vec![ValueRef::Int64(v0), ValueRef::Float64(OrderedFloat(v1))]
|
||||
fn field_value_ref(v0: i64, v1: f64) -> impl Iterator<Item = ValueRef<'static>> {
|
||||
vec![ValueRef::Int64(v0), ValueRef::Float64(OrderedFloat(v1))].into_iter()
|
||||
}
|
||||
|
||||
fn check_values(values: Values, expect: &[(i64, u64, u8, i64, f64)]) {
|
||||
@@ -1014,20 +1066,20 @@ mod tests {
|
||||
ts_value_ref(1),
|
||||
0,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Null, ValueRef::Null],
|
||||
vec![ValueRef::Null, ValueRef::Null].into_iter(),
|
||||
);
|
||||
series.push(
|
||||
ts_value_ref(1),
|
||||
0,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Int64(1), ValueRef::Null],
|
||||
vec![ValueRef::Int64(1), ValueRef::Null].into_iter(),
|
||||
);
|
||||
series.push(ts_value_ref(1), 2, OpType::Put, field_value_ref(2, 10.2));
|
||||
series.push(
|
||||
ts_value_ref(1),
|
||||
3,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Int64(2), ValueRef::Null],
|
||||
vec![ValueRef::Int64(2), ValueRef::Null].into_iter(),
|
||||
);
|
||||
assert_eq!(4, series.active.timestamp.len());
|
||||
assert_eq!(0, series.frozen.len());
|
||||
|
||||
@@ -554,7 +554,7 @@ where
|
||||
|
||||
// set next_entry_id and write to memtable.
|
||||
region_write_ctx.set_next_entry_id(last_entry_id + 1);
|
||||
region_write_ctx.write_memtable();
|
||||
region_write_ctx.write_memtable().await;
|
||||
}
|
||||
|
||||
// TODO(weny): We need to update `flushed_entry_id` in the region manifest
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::mem;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{Mutation, OpType, Rows, WalEntry, WriteHint};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use snafu::ResultExt;
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::logstore::LogStore;
|
||||
@@ -197,23 +198,43 @@ impl RegionWriteCtx {
|
||||
}
|
||||
|
||||
/// Consumes mutations and writes them into mutable memtable.
|
||||
pub(crate) fn write_memtable(&mut self) {
|
||||
pub(crate) async fn write_memtable(&mut self) {
|
||||
debug_assert_eq!(self.notifiers.len(), self.wal_entry.mutations.len());
|
||||
|
||||
if self.failed {
|
||||
return;
|
||||
}
|
||||
|
||||
let mutable = &self.version.memtables.mutable;
|
||||
// Takes mutations from the wal entry.
|
||||
let mutations = mem::take(&mut self.wal_entry.mutations);
|
||||
for (mutation, notify) in mutations.into_iter().zip(&mut self.notifiers) {
|
||||
// Write mutation to the memtable.
|
||||
let Some(kvs) = KeyValues::new(&self.version.metadata, mutation) else {
|
||||
continue;
|
||||
};
|
||||
if let Err(e) = mutable.write(&kvs) {
|
||||
notify.err = Some(Arc::new(e));
|
||||
let mutable = self.version.memtables.mutable.clone();
|
||||
let mutations = mem::take(&mut self.wal_entry.mutations)
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, mutation)| {
|
||||
let kvs = KeyValues::new(&self.version.metadata, mutation)?;
|
||||
Some((i, kvs))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if mutations.len() == 1 {
|
||||
if let Err(err) = mutable.write(&mutations[0].1) {
|
||||
self.notifiers[mutations[0].0].err = Some(Arc::new(err));
|
||||
}
|
||||
} else {
|
||||
let mut tasks = FuturesUnordered::new();
|
||||
for (i, kvs) in mutations {
|
||||
let mutable = mutable.clone();
|
||||
// use tokio runtime to schedule tasks.
|
||||
tasks.push(common_runtime::spawn_blocking_global(move || {
|
||||
(i, mutable.write(&kvs))
|
||||
}));
|
||||
}
|
||||
|
||||
while let Some(result) = tasks.next().await {
|
||||
// first unwrap the result from `spawn` above
|
||||
let (i, result) = result.unwrap();
|
||||
if let Err(err) = result {
|
||||
self.notifiers[i].err = Some(Arc::new(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -66,13 +66,20 @@ pub struct WriteRequest {
|
||||
has_null: Vec<bool>,
|
||||
/// Write hint.
|
||||
pub hint: Option<WriteHint>,
|
||||
/// Region metadata on the time of this request is created.
|
||||
pub(crate) region_metadata: Option<RegionMetadataRef>,
|
||||
}
|
||||
|
||||
impl WriteRequest {
|
||||
/// Creates a new request.
|
||||
///
|
||||
/// Returns `Err` if `rows` are invalid.
|
||||
pub fn new(region_id: RegionId, op_type: OpType, rows: Rows) -> Result<WriteRequest> {
|
||||
pub fn new(
|
||||
region_id: RegionId,
|
||||
op_type: OpType,
|
||||
rows: Rows,
|
||||
region_metadata: Option<RegionMetadataRef>,
|
||||
) -> Result<WriteRequest> {
|
||||
let mut name_to_index = HashMap::with_capacity(rows.schema.len());
|
||||
for (index, column) in rows.schema.iter().enumerate() {
|
||||
ensure!(
|
||||
@@ -116,6 +123,7 @@ impl WriteRequest {
|
||||
name_to_index,
|
||||
has_null,
|
||||
hint: None,
|
||||
region_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -248,46 +256,67 @@ impl WriteRequest {
|
||||
pub(crate) fn fill_missing_columns(&mut self, metadata: &RegionMetadata) -> Result<()> {
|
||||
debug_assert_eq!(self.region_id, metadata.region_id);
|
||||
|
||||
let mut columns_to_fill = vec![];
|
||||
for column in &metadata.column_metadatas {
|
||||
if !self.name_to_index.contains_key(&column.column_schema.name) {
|
||||
self.fill_column(column)?;
|
||||
columns_to_fill.push(column);
|
||||
}
|
||||
}
|
||||
self.fill_columns(columns_to_fill)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks the schema and fill missing columns.
|
||||
pub(crate) fn maybe_fill_missing_columns(&mut self, metadata: &RegionMetadata) -> Result<()> {
|
||||
if let Err(e) = self.check_schema(metadata) {
|
||||
if e.is_fill_default() {
|
||||
// TODO(yingwen): Add metrics for this case.
|
||||
// We need to fill default value. The write request may be a request
|
||||
// sent before changing the schema.
|
||||
self.fill_missing_columns(metadata)?;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fills default value for specific `column`.
|
||||
fn fill_column(&mut self, column: &ColumnMetadata) -> Result<()> {
|
||||
// Need to add a default value for this column.
|
||||
let proto_value = self.column_default_value(column)?;
|
||||
|
||||
if proto_value.value_data.is_none() {
|
||||
return Ok(());
|
||||
/// Fills default value for specific `columns`.
|
||||
fn fill_columns(&mut self, columns: Vec<&ColumnMetadata>) -> Result<()> {
|
||||
let mut default_values = Vec::with_capacity(columns.len());
|
||||
let mut columns_to_fill = Vec::with_capacity(columns.len());
|
||||
for column in columns {
|
||||
let default_value = self.column_default_value(column)?;
|
||||
if default_value.value_data.is_some() {
|
||||
default_values.push(default_value);
|
||||
columns_to_fill.push(column);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert default value to each row.
|
||||
for row in &mut self.rows.rows {
|
||||
row.values.push(proto_value.clone());
|
||||
row.values.extend(default_values.iter().cloned());
|
||||
}
|
||||
|
||||
// Insert column schema.
|
||||
let (datatype, datatype_ext) =
|
||||
ColumnDataTypeWrapper::try_from(column.column_schema.data_type.clone())
|
||||
.with_context(|_| ConvertColumnDataTypeSnafu {
|
||||
reason: format!(
|
||||
"no protobuf type for column {} ({:?})",
|
||||
column.column_schema.name, column.column_schema.data_type
|
||||
),
|
||||
})?
|
||||
.to_parts();
|
||||
self.rows.schema.push(ColumnSchema {
|
||||
column_name: column.column_schema.name.clone(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: column.semantic_type as i32,
|
||||
datatype_extension: datatype_ext,
|
||||
options: options_from_column_schema(&column.column_schema),
|
||||
});
|
||||
for column in columns_to_fill {
|
||||
let (datatype, datatype_ext) =
|
||||
ColumnDataTypeWrapper::try_from(column.column_schema.data_type.clone())
|
||||
.with_context(|_| ConvertColumnDataTypeSnafu {
|
||||
reason: format!(
|
||||
"no protobuf type for column {} ({:?})",
|
||||
column.column_schema.name, column.column_schema.data_type
|
||||
),
|
||||
})?
|
||||
.to_parts();
|
||||
self.rows.schema.push(ColumnSchema {
|
||||
column_name: column.column_schema.name.clone(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: column.semantic_type as i32,
|
||||
datatype_extension: datatype_ext,
|
||||
options: options_from_column_schema(&column.column_schema),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -559,19 +588,32 @@ impl WorkerRequest {
|
||||
pub(crate) fn try_from_region_request(
|
||||
region_id: RegionId,
|
||||
value: RegionRequest,
|
||||
region_metadata: Option<RegionMetadataRef>,
|
||||
) -> Result<(WorkerRequest, Receiver<Result<AffectedRows>>)> {
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
let worker_request = match value {
|
||||
RegionRequest::Put(v) => {
|
||||
let write_request =
|
||||
WriteRequest::new(region_id, OpType::Put, v.rows)?.with_hint(v.hint);
|
||||
let mut write_request =
|
||||
WriteRequest::new(region_id, OpType::Put, v.rows, region_metadata.clone())?
|
||||
.with_hint(v.hint);
|
||||
if write_request.primary_key_encoding() == PrimaryKeyEncoding::Dense
|
||||
&& let Some(region_metadata) = ®ion_metadata
|
||||
{
|
||||
write_request.maybe_fill_missing_columns(region_metadata)?;
|
||||
}
|
||||
WorkerRequest::Write(SenderWriteRequest {
|
||||
sender: sender.into(),
|
||||
request: write_request,
|
||||
})
|
||||
}
|
||||
RegionRequest::Delete(v) => {
|
||||
let write_request = WriteRequest::new(region_id, OpType::Delete, v.rows)?;
|
||||
let mut write_request =
|
||||
WriteRequest::new(region_id, OpType::Delete, v.rows, region_metadata.clone())?;
|
||||
if write_request.primary_key_encoding() == PrimaryKeyEncoding::Dense
|
||||
&& let Some(region_metadata) = ®ion_metadata
|
||||
{
|
||||
write_request.maybe_fill_missing_columns(region_metadata)?;
|
||||
}
|
||||
WorkerRequest::Write(SenderWriteRequest {
|
||||
sender: sender.into(),
|
||||
request: write_request,
|
||||
@@ -875,7 +917,7 @@ mod tests {
|
||||
rows: vec![],
|
||||
};
|
||||
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap_err();
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap_err();
|
||||
check_invalid_request(&err, "duplicate column c0");
|
||||
}
|
||||
|
||||
@@ -891,7 +933,7 @@ mod tests {
|
||||
}],
|
||||
};
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
assert_eq!(0, request.column_index_by_name("c0").unwrap());
|
||||
assert_eq!(1, request.column_index_by_name("c1").unwrap());
|
||||
assert_eq!(None, request.column_index_by_name("c2"));
|
||||
@@ -909,7 +951,7 @@ mod tests {
|
||||
}],
|
||||
};
|
||||
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap_err();
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap_err();
|
||||
check_invalid_request(&err, "row has 3 columns but schema has 2");
|
||||
}
|
||||
|
||||
@@ -955,7 +997,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
request.check_schema(&metadata).unwrap();
|
||||
}
|
||||
|
||||
@@ -972,7 +1014,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts expect type Timestamp(Millisecond(TimestampMillisecondType)), given: INT64(4)");
|
||||
}
|
||||
@@ -994,7 +1036,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts has semantic type Timestamp, given: TAG(0)");
|
||||
}
|
||||
@@ -1016,7 +1058,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts is not null but input has null");
|
||||
}
|
||||
@@ -1035,7 +1077,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "missing column ts");
|
||||
}
|
||||
@@ -1058,7 +1100,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, r#"unknown columns: ["k1"]"#);
|
||||
}
|
||||
@@ -1104,7 +1146,7 @@ mod tests {
|
||||
builder.build().unwrap()
|
||||
};
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
assert!(request
|
||||
@@ -1128,7 +1170,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1214,7 +1256,8 @@ mod tests {
|
||||
};
|
||||
let metadata = region_metadata_two_fields();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "delete requests need column k0");
|
||||
let err = request.fill_missing_columns(&metadata).unwrap_err();
|
||||
@@ -1233,7 +1276,8 @@ mod tests {
|
||||
values: vec![i64_value(100), ts_ms_value(1)],
|
||||
}],
|
||||
};
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1296,7 +1340,8 @@ mod tests {
|
||||
values: vec![i64_value(100), ts_ms_value(1)],
|
||||
}],
|
||||
};
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1333,7 +1378,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.fill_missing_columns(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts does not have default value");
|
||||
}
|
||||
@@ -1363,11 +1408,39 @@ mod tests {
|
||||
};
|
||||
let metadata = region_metadata_two_fields();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(
|
||||
&err,
|
||||
"column f1 expect type Int64(Int64Type), given: STRING(12)",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_request_metadata() {
|
||||
let rows = Rows {
|
||||
schema: vec![
|
||||
new_column_schema("c0", ColumnDataType::Int64, SemanticType::Tag),
|
||||
new_column_schema("c1", ColumnDataType::Int64, SemanticType::Tag),
|
||||
],
|
||||
rows: vec![Row {
|
||||
values: vec![i64_value(1), i64_value(2)],
|
||||
}],
|
||||
};
|
||||
|
||||
let metadata = Arc::new(new_region_metadata());
|
||||
let request = WriteRequest::new(
|
||||
RegionId::new(1, 1),
|
||||
OpType::Put,
|
||||
rows,
|
||||
Some(metadata.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(request.region_metadata.is_some());
|
||||
assert_eq!(
|
||||
request.region_metadata.unwrap().region_id,
|
||||
RegionId::new(1, 1)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,8 +348,8 @@ impl DensePrimaryKeyCodec {
|
||||
I: Iterator<Item = ValueRef<'a>>,
|
||||
{
|
||||
let mut serializer = Serializer::new(buffer);
|
||||
for (value, (_, field)) in row.zip(self.ordered_primary_key_columns.iter()) {
|
||||
field.serialize(&mut serializer, &value)?;
|
||||
for (idx, value) in row.enumerate() {
|
||||
self.field_at(idx).serialize(&mut serializer, &value)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -105,7 +105,6 @@ pub struct Indexer {
|
||||
file_id: FileId,
|
||||
file_path: String,
|
||||
region_id: RegionId,
|
||||
|
||||
puffin_manager: Option<SstPuffinManager>,
|
||||
inverted_indexer: Option<InvertedIndexer>,
|
||||
last_mem_inverted_index: usize,
|
||||
@@ -168,11 +167,15 @@ impl Indexer {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct IndexerBuilder<'a> {
|
||||
#[async_trait::async_trait]
|
||||
pub trait IndexerBuilder {
|
||||
/// Builds indexer of given file id to [index_file_path].
|
||||
async fn build(&self, file_id: FileId, index_file_path: String) -> Indexer;
|
||||
}
|
||||
|
||||
pub(crate) struct IndexerBuilderImpl {
|
||||
pub(crate) op_type: OperationType,
|
||||
pub(crate) file_id: FileId,
|
||||
pub(crate) file_path: String,
|
||||
pub(crate) metadata: &'a RegionMetadataRef,
|
||||
pub(crate) metadata: RegionMetadataRef,
|
||||
pub(crate) row_group_size: usize,
|
||||
pub(crate) puffin_manager: SstPuffinManager,
|
||||
pub(crate) intermediate_manager: IntermediateManager,
|
||||
@@ -182,20 +185,20 @@ pub(crate) struct IndexerBuilder<'a> {
|
||||
pub(crate) bloom_filter_index_config: BloomFilterConfig,
|
||||
}
|
||||
|
||||
impl IndexerBuilder<'_> {
|
||||
#[async_trait::async_trait]
|
||||
impl IndexerBuilder for IndexerBuilderImpl {
|
||||
/// Sanity check for arguments and create a new [Indexer] if arguments are valid.
|
||||
pub(crate) async fn build(self) -> Indexer {
|
||||
async fn build(&self, file_id: FileId, index_file_path: String) -> Indexer {
|
||||
let mut indexer = Indexer {
|
||||
file_id: self.file_id,
|
||||
file_path: self.file_path.clone(),
|
||||
file_id,
|
||||
file_path: index_file_path,
|
||||
region_id: self.metadata.region_id,
|
||||
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
indexer.inverted_indexer = self.build_inverted_indexer();
|
||||
indexer.fulltext_indexer = self.build_fulltext_indexer().await;
|
||||
indexer.bloom_filter_indexer = self.build_bloom_filter_indexer();
|
||||
indexer.inverted_indexer = self.build_inverted_indexer(file_id);
|
||||
indexer.fulltext_indexer = self.build_fulltext_indexer(file_id).await;
|
||||
indexer.bloom_filter_indexer = self.build_bloom_filter_indexer(file_id);
|
||||
if indexer.inverted_indexer.is_none()
|
||||
&& indexer.fulltext_indexer.is_none()
|
||||
&& indexer.bloom_filter_indexer.is_none()
|
||||
@@ -204,11 +207,13 @@ impl IndexerBuilder<'_> {
|
||||
return Indexer::default();
|
||||
}
|
||||
|
||||
indexer.puffin_manager = Some(self.puffin_manager);
|
||||
indexer.puffin_manager = Some(self.puffin_manager.clone());
|
||||
indexer
|
||||
}
|
||||
}
|
||||
|
||||
fn build_inverted_indexer(&self) -> Option<InvertedIndexer> {
|
||||
impl IndexerBuilderImpl {
|
||||
fn build_inverted_indexer(&self, file_id: FileId) -> Option<InvertedIndexer> {
|
||||
let create = match self.op_type {
|
||||
OperationType::Flush => self.inverted_index_config.create_on_flush.auto(),
|
||||
OperationType::Compact => self.inverted_index_config.create_on_compaction.auto(),
|
||||
@@ -217,7 +222,7 @@ impl IndexerBuilder<'_> {
|
||||
if !create {
|
||||
debug!(
|
||||
"Skip creating inverted index due to config, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
}
|
||||
@@ -228,7 +233,7 @@ impl IndexerBuilder<'_> {
|
||||
if indexed_column_ids.is_empty() {
|
||||
debug!(
|
||||
"No columns to be indexed, skip creating inverted index, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
}
|
||||
@@ -238,7 +243,7 @@ impl IndexerBuilder<'_> {
|
||||
else {
|
||||
warn!(
|
||||
"Segment row count is 0, skip creating index, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
};
|
||||
@@ -246,7 +251,7 @@ impl IndexerBuilder<'_> {
|
||||
let Some(row_group_size) = NonZeroUsize::new(self.row_group_size) else {
|
||||
warn!(
|
||||
"Row group size is 0, skip creating index, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
};
|
||||
@@ -257,8 +262,8 @@ impl IndexerBuilder<'_> {
|
||||
}
|
||||
|
||||
let indexer = InvertedIndexer::new(
|
||||
self.file_id,
|
||||
self.metadata,
|
||||
file_id,
|
||||
&self.metadata,
|
||||
self.intermediate_manager.clone(),
|
||||
self.inverted_index_config.mem_threshold_on_create(),
|
||||
segment_row_count,
|
||||
@@ -268,7 +273,7 @@ impl IndexerBuilder<'_> {
|
||||
Some(indexer)
|
||||
}
|
||||
|
||||
async fn build_fulltext_indexer(&self) -> Option<FulltextIndexer> {
|
||||
async fn build_fulltext_indexer(&self, file_id: FileId) -> Option<FulltextIndexer> {
|
||||
let create = match self.op_type {
|
||||
OperationType::Flush => self.fulltext_index_config.create_on_flush.auto(),
|
||||
OperationType::Compact => self.fulltext_index_config.create_on_compaction.auto(),
|
||||
@@ -277,7 +282,7 @@ impl IndexerBuilder<'_> {
|
||||
if !create {
|
||||
debug!(
|
||||
"Skip creating full-text index due to config, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
}
|
||||
@@ -285,9 +290,9 @@ impl IndexerBuilder<'_> {
|
||||
let mem_limit = self.fulltext_index_config.mem_threshold_on_create();
|
||||
let creator = FulltextIndexer::new(
|
||||
&self.metadata.region_id,
|
||||
&self.file_id,
|
||||
&file_id,
|
||||
&self.intermediate_manager,
|
||||
self.metadata,
|
||||
&self.metadata,
|
||||
self.fulltext_index_config.compress,
|
||||
mem_limit,
|
||||
)
|
||||
@@ -298,7 +303,7 @@ impl IndexerBuilder<'_> {
|
||||
if creator.is_none() {
|
||||
debug!(
|
||||
"Skip creating full-text index due to no columns require indexing, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
}
|
||||
return creator;
|
||||
@@ -309,19 +314,19 @@ impl IndexerBuilder<'_> {
|
||||
if cfg!(any(test, feature = "test")) {
|
||||
panic!(
|
||||
"Failed to create full-text indexer, region_id: {}, file_id: {}, err: {:?}",
|
||||
self.metadata.region_id, self.file_id, err
|
||||
self.metadata.region_id, file_id, err
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
err; "Failed to create full-text indexer, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn build_bloom_filter_indexer(&self) -> Option<BloomFilterIndexer> {
|
||||
fn build_bloom_filter_indexer(&self, file_id: FileId) -> Option<BloomFilterIndexer> {
|
||||
let create = match self.op_type {
|
||||
OperationType::Flush => self.bloom_filter_index_config.create_on_flush.auto(),
|
||||
OperationType::Compact => self.bloom_filter_index_config.create_on_compaction.auto(),
|
||||
@@ -330,15 +335,15 @@ impl IndexerBuilder<'_> {
|
||||
if !create {
|
||||
debug!(
|
||||
"Skip creating bloom filter due to config, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
let mem_limit = self.bloom_filter_index_config.mem_threshold_on_create();
|
||||
let indexer = BloomFilterIndexer::new(
|
||||
self.file_id,
|
||||
self.metadata,
|
||||
file_id,
|
||||
&self.metadata,
|
||||
self.intermediate_manager.clone(),
|
||||
mem_limit,
|
||||
);
|
||||
@@ -348,7 +353,7 @@ impl IndexerBuilder<'_> {
|
||||
if indexer.is_none() {
|
||||
debug!(
|
||||
"Skip creating bloom filter due to no columns require indexing, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
}
|
||||
return indexer;
|
||||
@@ -359,12 +364,12 @@ impl IndexerBuilder<'_> {
|
||||
if cfg!(any(test, feature = "test")) {
|
||||
panic!(
|
||||
"Failed to create bloom filter, region_id: {}, file_id: {}, err: {:?}",
|
||||
self.metadata.region_id, self.file_id, err
|
||||
self.metadata.region_id, file_id, err
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
err; "Failed to create bloom filter, region_id: {}, file_id: {}",
|
||||
self.metadata.region_id, self.file_id,
|
||||
self.metadata.region_id, file_id,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -490,11 +495,9 @@ mod tests {
|
||||
with_fulltext: true,
|
||||
with_skipping_bloom: true,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata,
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager,
|
||||
@@ -503,7 +506,7 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_some());
|
||||
@@ -522,11 +525,9 @@ mod tests {
|
||||
with_fulltext: true,
|
||||
with_skipping_bloom: true,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata: metadata.clone(),
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager.clone(),
|
||||
@@ -538,18 +539,16 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_none());
|
||||
assert!(indexer.fulltext_indexer.is_some());
|
||||
assert!(indexer.bloom_filter_indexer.is_some());
|
||||
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Compact,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata: metadata.clone(),
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager.clone(),
|
||||
@@ -561,18 +560,16 @@ mod tests {
|
||||
},
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_some());
|
||||
assert!(indexer.fulltext_indexer.is_none());
|
||||
assert!(indexer.bloom_filter_indexer.is_some());
|
||||
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Compact,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata,
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager,
|
||||
@@ -584,7 +581,7 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_some());
|
||||
@@ -603,11 +600,9 @@ mod tests {
|
||||
with_fulltext: true,
|
||||
with_skipping_bloom: true,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata: metadata.clone(),
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager.clone(),
|
||||
@@ -616,7 +611,7 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_none());
|
||||
@@ -628,11 +623,9 @@ mod tests {
|
||||
with_fulltext: false,
|
||||
with_skipping_bloom: true,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata: metadata.clone(),
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager.clone(),
|
||||
@@ -641,7 +634,7 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_some());
|
||||
@@ -653,11 +646,9 @@ mod tests {
|
||||
with_fulltext: true,
|
||||
with_skipping_bloom: false,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata: metadata.clone(),
|
||||
row_group_size: 1024,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager,
|
||||
@@ -666,7 +657,7 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_some());
|
||||
@@ -685,11 +676,9 @@ mod tests {
|
||||
with_fulltext: true,
|
||||
with_skipping_bloom: true,
|
||||
});
|
||||
let indexer = IndexerBuilder {
|
||||
let indexer = IndexerBuilderImpl {
|
||||
op_type: OperationType::Flush,
|
||||
file_id: FileId::random(),
|
||||
file_path: "test".to_string(),
|
||||
metadata: &metadata,
|
||||
metadata,
|
||||
row_group_size: 0,
|
||||
puffin_manager: factory.build(mock_object_store()),
|
||||
intermediate_manager: intm_manager,
|
||||
@@ -698,7 +687,7 @@ mod tests {
|
||||
fulltext_index_config: FulltextIndexConfig::default(),
|
||||
bloom_filter_index_config: BloomFilterConfig::default(),
|
||||
}
|
||||
.build()
|
||||
.build(FileId::random(), "test".to_string())
|
||||
.await;
|
||||
|
||||
assert!(indexer.inverted_indexer.is_none());
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::sync::Arc;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
|
||||
use crate::sst::file::FileTimeRange;
|
||||
use crate::sst::file::{FileId, FileTimeRange};
|
||||
use crate::sst::index::IndexOutput;
|
||||
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
|
||||
@@ -62,6 +62,8 @@ impl Default for WriteOptions {
|
||||
|
||||
/// Parquet SST info returned by the writer.
|
||||
pub struct SstInfo {
|
||||
/// SST file id.
|
||||
pub file_id: FileId,
|
||||
/// Time range of the SST. The timestamps have the same time unit as the
|
||||
/// data in the SST.
|
||||
pub time_range: FileTimeRange,
|
||||
@@ -95,12 +97,13 @@ mod tests {
|
||||
use tokio_util::compat::FuturesAsyncWriteCompatExt;
|
||||
|
||||
use super::*;
|
||||
use crate::access_layer::FilePathProvider;
|
||||
use crate::cache::{CacheManager, CacheStrategy, PageKey};
|
||||
use crate::sst::index::Indexer;
|
||||
use crate::sst::index::{Indexer, IndexerBuilder};
|
||||
use crate::sst::parquet::format::WriteFormat;
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::sst::parquet::writer::ParquetWriter;
|
||||
use crate::sst::DEFAULT_WRITE_CONCURRENCY;
|
||||
use crate::sst::{location, DEFAULT_WRITE_CONCURRENCY};
|
||||
use crate::test_util::sst_util::{
|
||||
assert_parquet_metadata_eq, build_test_binary_test_region_metadata, new_batch_by_range,
|
||||
new_batch_with_binary, new_source, sst_file_handle, sst_region_metadata,
|
||||
@@ -109,12 +112,38 @@ mod tests {
|
||||
|
||||
const FILE_DIR: &str = "/";
|
||||
|
||||
#[derive(Clone)]
|
||||
struct FixedPathProvider {
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
impl FilePathProvider for FixedPathProvider {
|
||||
fn build_index_file_path(&self, _file_id: FileId) -> String {
|
||||
location::index_file_path(FILE_DIR, self.file_id)
|
||||
}
|
||||
|
||||
fn build_sst_file_path(&self, _file_id: FileId) -> String {
|
||||
location::sst_file_path(FILE_DIR, self.file_id)
|
||||
}
|
||||
}
|
||||
|
||||
struct NoopIndexBuilder;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexerBuilder for NoopIndexBuilder {
|
||||
async fn build(&self, _file_id: FileId, _path: String) -> Indexer {
|
||||
Indexer::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_read() {
|
||||
let mut env = TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let file_path = FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
};
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
@@ -126,18 +155,20 @@ mod tests {
|
||||
row_group_size: 50,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
NoopIndexBuilder,
|
||||
file_path,
|
||||
metadata,
|
||||
Indexer::default(),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
|
||||
let info = writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert_eq!(
|
||||
@@ -168,7 +199,6 @@ mod tests {
|
||||
let mut env = TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
@@ -183,16 +213,19 @@ mod tests {
|
||||
// Prepare data.
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
file_path,
|
||||
metadata.clone(),
|
||||
Indexer::default(),
|
||||
);
|
||||
NoopIndexBuilder,
|
||||
FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
|
||||
// Enable page cache.
|
||||
let cache = CacheStrategy::EnableAll(Arc::new(
|
||||
@@ -236,7 +269,6 @@ mod tests {
|
||||
let mut env = crate::test_util::TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
@@ -252,16 +284,19 @@ mod tests {
|
||||
// sst info contains the parquet metadata, which is converted from FileMetaData
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
file_path,
|
||||
metadata.clone(),
|
||||
Indexer::default(),
|
||||
);
|
||||
NoopIndexBuilder,
|
||||
FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let sst_info = writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("write_all should return sst info");
|
||||
.remove(0);
|
||||
let writer_metadata = sst_info.file_metadata.unwrap();
|
||||
|
||||
// read the sst file metadata
|
||||
@@ -277,7 +312,6 @@ mod tests {
|
||||
let mut env = TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
@@ -292,15 +326,18 @@ mod tests {
|
||||
// Prepare data.
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
file_path,
|
||||
metadata.clone(),
|
||||
Indexer::default(),
|
||||
);
|
||||
NoopIndexBuilder,
|
||||
FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
|
||||
// Predicate
|
||||
let predicate = Some(Predicate::new(vec![Expr::BinaryExpr(BinaryExpr {
|
||||
@@ -330,7 +367,6 @@ mod tests {
|
||||
let mut env = TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "z"], 0, 0),
|
||||
@@ -345,15 +381,18 @@ mod tests {
|
||||
// Prepare data.
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
file_path,
|
||||
metadata.clone(),
|
||||
Indexer::default(),
|
||||
);
|
||||
NoopIndexBuilder,
|
||||
FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store);
|
||||
let mut reader = builder.build().await.unwrap();
|
||||
@@ -365,7 +404,6 @@ mod tests {
|
||||
let mut env = TestEnv::new();
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = handle.file_path(FILE_DIR);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
@@ -380,16 +418,19 @@ mod tests {
|
||||
// Prepare data.
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
file_path,
|
||||
metadata.clone(),
|
||||
Indexer::default(),
|
||||
);
|
||||
NoopIndexBuilder,
|
||||
FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
.remove(0);
|
||||
|
||||
// Predicate
|
||||
let predicate = Some(Predicate::new(vec![Expr::BinaryExpr(BinaryExpr {
|
||||
|
||||
@@ -28,6 +28,7 @@ use parquet::basic::{Compression, Encoding, ZstdLevel};
|
||||
use parquet::file::metadata::KeyValue;
|
||||
use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
|
||||
use parquet::schema::types::ColumnPath;
|
||||
use smallvec::smallvec;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::consts::SEQUENCE_COLUMN_NAME;
|
||||
@@ -35,40 +36,48 @@ use store_api::storage::SequenceNumber;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio_util::compat::{Compat, FuturesAsyncWriteCompatExt};
|
||||
|
||||
use crate::access_layer::{FilePathProvider, SstInfoArray};
|
||||
use crate::error::{InvalidMetadataSnafu, OpenDalSnafu, Result, WriteParquetSnafu};
|
||||
use crate::read::{Batch, Source};
|
||||
use crate::sst::index::Indexer;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::index::{Indexer, IndexerBuilder};
|
||||
use crate::sst::parquet::format::WriteFormat;
|
||||
use crate::sst::parquet::helper::parse_parquet_metadata;
|
||||
use crate::sst::parquet::{SstInfo, WriteOptions, PARQUET_METADATA_KEY};
|
||||
use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY};
|
||||
|
||||
/// Parquet SST writer.
|
||||
pub struct ParquetWriter<F: WriterFactory> {
|
||||
pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvider> {
|
||||
/// Path provider that creates SST and index file paths according to file id.
|
||||
path_provider: P,
|
||||
writer: Option<AsyncArrowWriter<SizeAwareWriter<F::Writer>>>,
|
||||
/// Current active file id.
|
||||
current_file: FileId,
|
||||
writer_factory: F,
|
||||
/// Region metadata of the source and the target SST.
|
||||
metadata: RegionMetadataRef,
|
||||
indexer: Indexer,
|
||||
/// Indexer build that can create indexer for multiple files.
|
||||
indexer_builder: I,
|
||||
/// Current active indexer.
|
||||
current_indexer: Option<Indexer>,
|
||||
bytes_written: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
pub trait WriterFactory {
|
||||
type Writer: AsyncWrite + Send + Unpin;
|
||||
fn create(&mut self) -> impl Future<Output = Result<Self::Writer>>;
|
||||
fn create(&mut self, file_path: &str) -> impl Future<Output = Result<Self::Writer>>;
|
||||
}
|
||||
|
||||
pub struct ObjectStoreWriterFactory {
|
||||
path: String,
|
||||
object_store: ObjectStore,
|
||||
}
|
||||
|
||||
impl WriterFactory for ObjectStoreWriterFactory {
|
||||
type Writer = Compat<FuturesAsyncWriter>;
|
||||
|
||||
async fn create(&mut self) -> Result<Self::Writer> {
|
||||
async fn create(&mut self, file_path: &str) -> Result<Self::Writer> {
|
||||
self.object_store
|
||||
.writer_with(&self.path)
|
||||
.writer_with(file_path)
|
||||
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
|
||||
.concurrent(DEFAULT_WRITE_CONCURRENCY)
|
||||
.await
|
||||
@@ -77,36 +86,73 @@ impl WriterFactory for ObjectStoreWriterFactory {
|
||||
}
|
||||
}
|
||||
|
||||
impl ParquetWriter<ObjectStoreWriterFactory> {
|
||||
pub fn new_with_object_store(
|
||||
impl<I, P> ParquetWriter<ObjectStoreWriterFactory, I, P>
|
||||
where
|
||||
P: FilePathProvider,
|
||||
I: IndexerBuilder,
|
||||
{
|
||||
pub async fn new_with_object_store(
|
||||
object_store: ObjectStore,
|
||||
path: String,
|
||||
metadata: RegionMetadataRef,
|
||||
indexer: Indexer,
|
||||
) -> ParquetWriter<ObjectStoreWriterFactory> {
|
||||
indexer_builder: I,
|
||||
path_provider: P,
|
||||
) -> ParquetWriter<ObjectStoreWriterFactory, I, P> {
|
||||
ParquetWriter::new(
|
||||
ObjectStoreWriterFactory { path, object_store },
|
||||
ObjectStoreWriterFactory { object_store },
|
||||
metadata,
|
||||
indexer,
|
||||
indexer_builder,
|
||||
path_provider,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> ParquetWriter<F>
|
||||
impl<F, I, P> ParquetWriter<F, I, P>
|
||||
where
|
||||
F: WriterFactory,
|
||||
I: IndexerBuilder,
|
||||
P: FilePathProvider,
|
||||
{
|
||||
/// Creates a new parquet SST writer.
|
||||
pub fn new(factory: F, metadata: RegionMetadataRef, indexer: Indexer) -> ParquetWriter<F> {
|
||||
pub async fn new(
|
||||
factory: F,
|
||||
metadata: RegionMetadataRef,
|
||||
indexer_builder: I,
|
||||
path_provider: P,
|
||||
) -> ParquetWriter<F, I, P> {
|
||||
let init_file = FileId::random();
|
||||
let index_file_path = path_provider.build_index_file_path(init_file);
|
||||
let indexer = indexer_builder.build(init_file, index_file_path).await;
|
||||
|
||||
ParquetWriter {
|
||||
path_provider,
|
||||
writer: None,
|
||||
current_file: init_file,
|
||||
writer_factory: factory,
|
||||
metadata,
|
||||
indexer,
|
||||
indexer_builder,
|
||||
current_indexer: Some(indexer),
|
||||
bytes_written: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_or_create_indexer(&mut self) -> &mut Indexer {
|
||||
match self.current_indexer {
|
||||
None => {
|
||||
self.current_file = FileId::random();
|
||||
let index_file_path = self.path_provider.build_index_file_path(self.current_file);
|
||||
let indexer = self
|
||||
.indexer_builder
|
||||
.build(self.current_file, index_file_path)
|
||||
.await;
|
||||
self.current_indexer = Some(indexer);
|
||||
// safety: self.current_indexer already set above.
|
||||
self.current_indexer.as_mut().unwrap()
|
||||
}
|
||||
Some(ref mut indexer) => indexer,
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates source and writes all rows to Parquet file.
|
||||
///
|
||||
/// Returns the [SstInfo] if the SST is written.
|
||||
@@ -115,7 +161,7 @@ where
|
||||
mut source: Source,
|
||||
override_sequence: Option<SequenceNumber>, // override the `sequence` field from `Source`
|
||||
opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
) -> Result<SstInfoArray> {
|
||||
let write_format =
|
||||
WriteFormat::new(self.metadata.clone()).with_override_sequence(override_sequence);
|
||||
let mut stats = SourceStats::default();
|
||||
@@ -128,24 +174,24 @@ where
|
||||
match res {
|
||||
Ok(mut batch) => {
|
||||
stats.update(&batch);
|
||||
self.indexer.update(&mut batch).await;
|
||||
self.get_or_create_indexer().await.update(&mut batch).await;
|
||||
}
|
||||
Err(e) => {
|
||||
self.indexer.abort().await;
|
||||
self.get_or_create_indexer().await.abort().await;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let index_output = self.indexer.finish().await;
|
||||
let index_output = self.get_or_create_indexer().await.finish().await;
|
||||
|
||||
if stats.num_rows == 0 {
|
||||
return Ok(None);
|
||||
return Ok(smallvec![]);
|
||||
}
|
||||
|
||||
let Some(mut arrow_writer) = self.writer.take() else {
|
||||
// No batch actually written.
|
||||
return Ok(None);
|
||||
return Ok(smallvec![]);
|
||||
};
|
||||
|
||||
arrow_writer.flush().await.context(WriteParquetSnafu)?;
|
||||
@@ -159,15 +205,18 @@ where
|
||||
// convert FileMetaData to ParquetMetaData
|
||||
let parquet_metadata = parse_parquet_metadata(file_meta)?;
|
||||
|
||||
let file_id = self.current_file;
|
||||
|
||||
// object_store.write will make sure all bytes are written or an error is raised.
|
||||
Ok(Some(SstInfo {
|
||||
Ok(smallvec![SstInfo {
|
||||
file_id,
|
||||
time_range,
|
||||
file_size,
|
||||
num_rows: stats.num_rows,
|
||||
num_row_groups: parquet_metadata.num_row_groups() as u64,
|
||||
file_metadata: Some(Arc::new(parquet_metadata)),
|
||||
index_metadata: index_output,
|
||||
}))
|
||||
}])
|
||||
}
|
||||
|
||||
/// Customizes per-column config according to schema and maybe column cardinality.
|
||||
@@ -229,8 +278,9 @@ where
|
||||
let props_builder = Self::customize_column_config(props_builder, &self.metadata);
|
||||
let writer_props = props_builder.build();
|
||||
|
||||
let sst_file_path = self.path_provider.build_sst_file_path(self.current_file);
|
||||
let writer = SizeAwareWriter::new(
|
||||
self.writer_factory.create().await?,
|
||||
self.writer_factory.create(&sst_file_path).await?,
|
||||
self.bytes_written.clone(),
|
||||
);
|
||||
let arrow_writer =
|
||||
|
||||
@@ -104,13 +104,13 @@ pub fn new_source(batches: &[Batch]) -> Source {
|
||||
Source::Reader(Box::new(reader))
|
||||
}
|
||||
|
||||
/// Creates a new [FileHandle] for a SST.
|
||||
pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
/// Creates a SST file handle with provided file id
|
||||
pub fn sst_file_handle_with_file_id(file_id: FileId, start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
let file_purger = new_noop_file_purger();
|
||||
FileHandle::new(
|
||||
FileMeta {
|
||||
region_id: REGION_ID,
|
||||
file_id: FileId::random(),
|
||||
file_id,
|
||||
time_range: (
|
||||
Timestamp::new_millisecond(start_ms),
|
||||
Timestamp::new_millisecond(end_ms),
|
||||
@@ -127,6 +127,11 @@ pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a new [FileHandle] for a SST.
|
||||
pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
sst_file_handle_with_file_id(FileId::random(), start_ms, end_ms)
|
||||
}
|
||||
|
||||
pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
|
||||
assert!(end >= start);
|
||||
let pk = new_primary_key(tags);
|
||||
|
||||
@@ -688,11 +688,18 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
self.last_periodical_check_millis += init_check_delay.as_millis() as i64;
|
||||
|
||||
// Buffer to retrieve requests from receiver.
|
||||
let mut buffer = RequestBuffer::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut write_req_buffer: Vec<SenderWriteRequest> =
|
||||
Vec::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut ddl_req_buffer: Vec<SenderDdlRequest> =
|
||||
Vec::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut general_req_buffer: Vec<WorkerRequest> =
|
||||
RequestBuffer::with_capacity(self.config.worker_request_batch_size);
|
||||
|
||||
while self.running.load(Ordering::Relaxed) {
|
||||
// Clear the buffer before handling next batch of requests.
|
||||
buffer.clear();
|
||||
write_req_buffer.clear();
|
||||
ddl_req_buffer.clear();
|
||||
general_req_buffer.clear();
|
||||
|
||||
let max_wait_time = self.time_provider.wait_duration(CHECK_REGION_INTERVAL);
|
||||
let sleep = tokio::time::sleep(max_wait_time);
|
||||
@@ -701,7 +708,11 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
tokio::select! {
|
||||
request_opt = self.receiver.recv() => {
|
||||
match request_opt {
|
||||
Some(request) => buffer.push(request),
|
||||
Some(request) => match request {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(request),
|
||||
},
|
||||
// The channel is disconnected.
|
||||
None => break,
|
||||
}
|
||||
@@ -736,18 +747,29 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
// Try to recv more requests from the channel.
|
||||
for _ in 1..buffer.capacity() {
|
||||
for _ in 1..self.config.worker_request_batch_size {
|
||||
// We have received one request so we start from 1.
|
||||
match self.receiver.try_recv() {
|
||||
Ok(req) => buffer.push(req),
|
||||
Ok(req) => match req {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(req),
|
||||
},
|
||||
// We still need to handle remaining requests.
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
self.listener.on_recv_requests(buffer.len());
|
||||
self.listener.on_recv_requests(
|
||||
write_req_buffer.len() + ddl_req_buffer.len() + general_req_buffer.len(),
|
||||
);
|
||||
|
||||
self.handle_requests(&mut buffer).await;
|
||||
self.handle_requests(
|
||||
&mut write_req_buffer,
|
||||
&mut ddl_req_buffer,
|
||||
&mut general_req_buffer,
|
||||
)
|
||||
.await;
|
||||
|
||||
self.handle_periodical_tasks();
|
||||
}
|
||||
@@ -760,16 +782,17 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
/// Dispatches and processes requests.
|
||||
///
|
||||
/// `buffer` should be empty.
|
||||
async fn handle_requests(&mut self, buffer: &mut RequestBuffer) {
|
||||
let mut write_requests = Vec::with_capacity(buffer.len());
|
||||
let mut ddl_requests = Vec::with_capacity(buffer.len());
|
||||
for worker_req in buffer.drain(..) {
|
||||
async fn handle_requests(
|
||||
&mut self,
|
||||
write_requests: &mut Vec<SenderWriteRequest>,
|
||||
ddl_requests: &mut Vec<SenderDdlRequest>,
|
||||
general_requests: &mut Vec<WorkerRequest>,
|
||||
) {
|
||||
for worker_req in general_requests.drain(..) {
|
||||
match worker_req {
|
||||
WorkerRequest::Write(sender_req) => {
|
||||
write_requests.push(sender_req);
|
||||
}
|
||||
WorkerRequest::Ddl(sender_req) => {
|
||||
ddl_requests.push(sender_req);
|
||||
WorkerRequest::Write(_) | WorkerRequest::Ddl(_) => {
|
||||
// These requests are categorized into write_requests and ddl_requests.
|
||||
continue;
|
||||
}
|
||||
WorkerRequest::Background { region_id, notify } => {
|
||||
// For background notify, we handle it directly.
|
||||
@@ -803,12 +826,12 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
/// Takes and handles all ddl requests.
|
||||
async fn handle_ddl_requests(&mut self, ddl_requests: Vec<SenderDdlRequest>) {
|
||||
async fn handle_ddl_requests(&mut self, ddl_requests: &mut Vec<SenderDdlRequest>) {
|
||||
if ddl_requests.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for ddl in ddl_requests {
|
||||
for ddl in ddl_requests.drain(..) {
|
||||
let res = match ddl.request {
|
||||
DdlRequest::Create(req) => self.handle_create_request(ddl.region_id, req).await,
|
||||
DdlRequest::Drop(_) => self.handle_drop_request(ddl.region_id).await,
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::region::{RegionLeaderState, RegionMapRef};
|
||||
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
|
||||
|
||||
const GC_TASK_INTERVAL_SEC: u64 = 5 * 60; // 5 minutes
|
||||
const MAX_RETRY_TIMES: u64 = 288; // 24 hours (5m * 288)
|
||||
const MAX_RETRY_TIMES: u64 = 12; // 1 hours (5m * 12)
|
||||
|
||||
impl<S> RegionWorkerLoop<S>
|
||||
where
|
||||
@@ -118,12 +118,16 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Background GC task to remove the entire region path once it find there is no
|
||||
/// parquet file left. Returns whether the path is removed.
|
||||
/// Background GC task to remove the entire region path once one of the following
|
||||
/// conditions is true:
|
||||
/// - It finds there is no parquet file left.
|
||||
/// - After `gc_duration`.
|
||||
///
|
||||
/// This task will keep running until finished. Any resource captured by it will
|
||||
/// not be released before then. Be sure to only pass weak reference if something
|
||||
/// is depended on ref-count mechanism.
|
||||
/// Returns whether the path is removed.
|
||||
///
|
||||
/// This task will retry on failure and keep running until finished. Any resource
|
||||
/// captured by it will not be released before then. Be sure to only pass weak reference
|
||||
/// if something is depended on ref-count mechanism.
|
||||
async fn later_drop_task(
|
||||
region_id: RegionId,
|
||||
region_path: String,
|
||||
@@ -131,9 +135,9 @@ async fn later_drop_task(
|
||||
dropping_regions: RegionMapRef,
|
||||
gc_duration: Duration,
|
||||
) -> bool {
|
||||
let mut force = false;
|
||||
for _ in 0..MAX_RETRY_TIMES {
|
||||
sleep(gc_duration).await;
|
||||
let result = remove_region_dir_once(®ion_path, &object_store).await;
|
||||
let result = remove_region_dir_once(®ion_path, &object_store, force).await;
|
||||
match result {
|
||||
Err(err) => {
|
||||
warn!(
|
||||
@@ -143,11 +147,14 @@ async fn later_drop_task(
|
||||
}
|
||||
Ok(true) => {
|
||||
dropping_regions.remove_region(region_id);
|
||||
info!("Region {} is dropped", region_path);
|
||||
info!("Region {} is dropped, force: {}", region_path, force);
|
||||
return true;
|
||||
}
|
||||
Ok(false) => (),
|
||||
}
|
||||
sleep(gc_duration).await;
|
||||
// Force recycle after gc duration.
|
||||
force = true;
|
||||
}
|
||||
|
||||
warn!(
|
||||
@@ -160,9 +167,11 @@ async fn later_drop_task(
|
||||
|
||||
// TODO(ruihang): place the marker in a separate dir
|
||||
/// Removes region dir if there is no parquet files, returns whether the directory is removed.
|
||||
/// If `force = true`, always removes the dir.
|
||||
pub(crate) async fn remove_region_dir_once(
|
||||
region_path: &str,
|
||||
object_store: &ObjectStore,
|
||||
force: bool,
|
||||
) -> Result<bool> {
|
||||
// list all files under the given region path to check if there are un-deleted parquet files
|
||||
let mut has_parquet_file = false;
|
||||
@@ -173,7 +182,8 @@ pub(crate) async fn remove_region_dir_once(
|
||||
.await
|
||||
.context(OpenDalSnafu)?;
|
||||
while let Some(file) = files.try_next().await.context(OpenDalSnafu)? {
|
||||
if file.path().ends_with(".parquet") {
|
||||
if !force && file.path().ends_with(".parquet") {
|
||||
// If not in force mode, we only remove the region dir if there is no parquet file
|
||||
has_parquet_file = true;
|
||||
break;
|
||||
} else if !file.path().ends_with(DROPPING_MARKER_FILE) {
|
||||
|
||||
@@ -230,13 +230,13 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
request.on_success();
|
||||
|
||||
// Handle pending requests for the region.
|
||||
if let Some((ddl_requests, write_requests)) =
|
||||
if let Some((mut ddl_requests, mut write_requests)) =
|
||||
self.flush_scheduler.on_flush_success(region_id)
|
||||
{
|
||||
// Perform DDLs first because they require empty memtables.
|
||||
self.handle_ddl_requests(ddl_requests).await;
|
||||
self.handle_ddl_requests(&mut ddl_requests).await;
|
||||
// Handle pending write requests, we don't stall these requests.
|
||||
self.handle_write_requests(write_requests, false).await;
|
||||
self.handle_write_requests(&mut write_requests, false).await;
|
||||
}
|
||||
|
||||
// Handle stalled requests.
|
||||
|
||||
@@ -55,7 +55,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
.await
|
||||
.context(OpenDalSnafu)?
|
||||
{
|
||||
let result = remove_region_dir_once(&request.region_dir, object_store).await;
|
||||
let result = remove_region_dir_once(&request.region_dir, object_store, true).await;
|
||||
info!(
|
||||
"Region {} is dropped, worker: {}, result: {:?}",
|
||||
region_id, self.id, result
|
||||
|
||||
@@ -18,11 +18,10 @@ use std::collections::{hash_map, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::OpType;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::{debug, error};
|
||||
use snafu::ensure;
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{InvalidRequestSnafu, RegionLeaderStateSnafu, RejectWriteSnafu, Result};
|
||||
@@ -36,7 +35,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
/// Takes and handles all write requests.
|
||||
pub(crate) async fn handle_write_requests(
|
||||
&mut self,
|
||||
mut write_requests: Vec<SenderWriteRequest>,
|
||||
write_requests: &mut Vec<SenderWriteRequest>,
|
||||
allow_stall: bool,
|
||||
) {
|
||||
if write_requests.is_empty() {
|
||||
@@ -56,7 +55,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
|
||||
if self.write_buffer_manager.should_stall() && allow_stall {
|
||||
self.stalled_count.add(write_requests.len() as i64);
|
||||
self.stalled_requests.append(&mut write_requests);
|
||||
self.stalled_requests.append(write_requests);
|
||||
self.listener.on_write_stall();
|
||||
return;
|
||||
}
|
||||
@@ -105,10 +104,35 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
let _timer = WRITE_STAGE_ELAPSED
|
||||
.with_label_values(&["write_memtable"])
|
||||
.start_timer();
|
||||
for mut region_ctx in region_ctxs.into_values() {
|
||||
region_ctx.write_memtable();
|
||||
if region_ctxs.len() == 1 {
|
||||
// fast path for single region.
|
||||
let mut region_ctx = region_ctxs.into_values().next().unwrap();
|
||||
region_ctx.write_memtable().await;
|
||||
put_rows += region_ctx.put_num;
|
||||
delete_rows += region_ctx.delete_num;
|
||||
} else {
|
||||
let region_write_task = region_ctxs
|
||||
.into_values()
|
||||
.map(|mut region_ctx| {
|
||||
// use tokio runtime to schedule tasks.
|
||||
common_runtime::spawn_global(async move {
|
||||
region_ctx.write_memtable().await;
|
||||
(region_ctx.put_num, region_ctx.delete_num)
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for result in futures::future::join_all(region_write_task).await {
|
||||
match result {
|
||||
Ok((put, delete)) => {
|
||||
put_rows += put;
|
||||
delete_rows += delete;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(e; "unexpected error when joining region write tasks");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
WRITE_ROWS_TOTAL
|
||||
@@ -125,8 +149,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
let stalled = std::mem::take(&mut self.stalled_requests);
|
||||
self.stalled_count.sub(stalled.requests.len() as i64);
|
||||
// We already stalled these requests, don't stall them again.
|
||||
for (_, (_, requests)) in stalled.requests {
|
||||
self.handle_write_requests(requests, false).await;
|
||||
for (_, (_, mut requests)) in stalled.requests {
|
||||
self.handle_write_requests(&mut requests, false).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,25 +158,25 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
pub(crate) fn reject_stalled_requests(&mut self) {
|
||||
let stalled = std::mem::take(&mut self.stalled_requests);
|
||||
self.stalled_count.sub(stalled.requests.len() as i64);
|
||||
for (_, (_, requests)) in stalled.requests {
|
||||
reject_write_requests(requests);
|
||||
for (_, (_, mut requests)) in stalled.requests {
|
||||
reject_write_requests(&mut requests);
|
||||
}
|
||||
}
|
||||
|
||||
/// Rejects a specific region's stalled requests.
|
||||
pub(crate) fn reject_region_stalled_requests(&mut self, region_id: &RegionId) {
|
||||
debug!("Rejects stalled requests for region {}", region_id);
|
||||
let requests = self.stalled_requests.remove(region_id);
|
||||
let mut requests = self.stalled_requests.remove(region_id);
|
||||
self.stalled_count.sub(requests.len() as i64);
|
||||
reject_write_requests(requests);
|
||||
reject_write_requests(&mut requests);
|
||||
}
|
||||
|
||||
/// Handles a specific region's stalled requests.
|
||||
pub(crate) async fn handle_region_stalled_requests(&mut self, region_id: &RegionId) {
|
||||
debug!("Handles stalled requests for region {}", region_id);
|
||||
let requests = self.stalled_requests.remove(region_id);
|
||||
let mut requests = self.stalled_requests.remove(region_id);
|
||||
self.stalled_count.sub(requests.len() as i64);
|
||||
self.handle_write_requests(requests, true).await;
|
||||
self.handle_write_requests(&mut requests, true).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,11 +184,11 @@ impl<S> RegionWorkerLoop<S> {
|
||||
/// Validates and groups requests by region.
|
||||
fn prepare_region_write_ctx(
|
||||
&mut self,
|
||||
write_requests: Vec<SenderWriteRequest>,
|
||||
write_requests: &mut Vec<SenderWriteRequest>,
|
||||
) -> HashMap<RegionId, RegionWriteCtx> {
|
||||
// Initialize region write context map.
|
||||
let mut region_ctxs = HashMap::new();
|
||||
for mut sender_req in write_requests {
|
||||
for mut sender_req in write_requests.drain(..) {
|
||||
let region_id = sender_req.request.region_id;
|
||||
|
||||
// If region is waiting for alteration, add requests to pending writes.
|
||||
@@ -232,13 +256,21 @@ impl<S> RegionWorkerLoop<S> {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the primary key is dense, we need to fill missing columns.
|
||||
if sender_req.request.primary_key_encoding() == PrimaryKeyEncoding::Dense {
|
||||
// Checks whether request schema is compatible with region schema.
|
||||
if let Err(e) = maybe_fill_missing_columns(
|
||||
&mut sender_req.request,
|
||||
®ion_ctx.version().metadata,
|
||||
) {
|
||||
// Double check the request schema
|
||||
let need_fill_missing_columns =
|
||||
if let Some(ref region_metadata) = sender_req.request.region_metadata {
|
||||
region_ctx.version().metadata.schema_version != region_metadata.schema_version
|
||||
} else {
|
||||
true
|
||||
};
|
||||
// Only fill missing columns if primary key is dense encoded.
|
||||
if need_fill_missing_columns
|
||||
&& sender_req.request.primary_key_encoding() == PrimaryKeyEncoding::Dense
|
||||
{
|
||||
if let Err(e) = sender_req
|
||||
.request
|
||||
.maybe_fill_missing_columns(®ion_ctx.version().metadata)
|
||||
{
|
||||
sender_req.sender.send(Err(e));
|
||||
|
||||
continue;
|
||||
@@ -266,10 +298,10 @@ impl<S> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
/// Send rejected error to all `write_requests`.
|
||||
fn reject_write_requests(write_requests: Vec<SenderWriteRequest>) {
|
||||
fn reject_write_requests(write_requests: &mut Vec<SenderWriteRequest>) {
|
||||
WRITE_REJECT_TOTAL.inc_by(write_requests.len() as u64);
|
||||
|
||||
for req in write_requests {
|
||||
for req in write_requests.drain(..) {
|
||||
req.sender.send(
|
||||
RejectWriteSnafu {
|
||||
region_id: req.request.region_id,
|
||||
@@ -279,22 +311,6 @@ fn reject_write_requests(write_requests: Vec<SenderWriteRequest>) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks the schema and fill missing columns.
|
||||
fn maybe_fill_missing_columns(request: &mut WriteRequest, metadata: &RegionMetadata) -> Result<()> {
|
||||
if let Err(e) = request.check_schema(metadata) {
|
||||
if e.is_fill_default() {
|
||||
// TODO(yingwen): Add metrics for this case.
|
||||
// We need to fill default value. The write request may be a request
|
||||
// sent before changing the schema.
|
||||
request.fill_missing_columns(metadata)?;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rejects delete request under append mode.
|
||||
fn check_op_type(append_mode: bool, request: &WriteRequest) -> Result<()> {
|
||||
if append_mode {
|
||||
|
||||
@@ -704,6 +704,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid flow name: {name}"))]
|
||||
InvalidFlowName {
|
||||
name: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Empty {} expr", name))]
|
||||
EmptyDdlExpr {
|
||||
name: String,
|
||||
@@ -821,6 +828,7 @@ impl ErrorExt for Error {
|
||||
| Error::UnsupportedRegionRequest { .. }
|
||||
| Error::InvalidTableName { .. }
|
||||
| Error::InvalidViewName { .. }
|
||||
| Error::InvalidFlowName { .. }
|
||||
| Error::InvalidView { .. }
|
||||
| Error::InvalidExpr { .. }
|
||||
| Error::AdminFunctionNotFound { .. }
|
||||
|
||||
@@ -38,7 +38,7 @@ use query::sql::{
|
||||
use session::context::QueryContextRef;
|
||||
use session::table_name::table_idents_to_full_name;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::ast::ColumnOption;
|
||||
use sql::ast::{ColumnOption, ObjectName};
|
||||
use sql::statements::alter::{
|
||||
AlterDatabase, AlterDatabaseOperation, AlterTable, AlterTableOperation,
|
||||
};
|
||||
@@ -55,8 +55,9 @@ use table::table_reference::TableReference;
|
||||
use crate::error::{
|
||||
BuildCreateExprOnInsertionSnafu, ColumnDataTypeSnafu, ConvertColumnDefaultConstraintSnafu,
|
||||
ConvertIdentifierSnafu, EncodeJsonSnafu, ExternalSnafu, IllegalPrimaryKeysDefSnafu,
|
||||
InferFileTableSchemaSnafu, InvalidSqlSnafu, NotSupportedSnafu, ParseSqlSnafu,
|
||||
PrepareFileTableSnafu, Result, SchemaIncompatibleSnafu, UnrecognizedTableOptionSnafu,
|
||||
InferFileTableSchemaSnafu, InvalidFlowNameSnafu, InvalidSqlSnafu, NotSupportedSnafu,
|
||||
ParseSqlSnafu, PrepareFileTableSnafu, Result, SchemaIncompatibleSnafu,
|
||||
UnrecognizedTableOptionSnafu,
|
||||
};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
@@ -731,7 +732,7 @@ pub fn to_create_flow_task_expr(
|
||||
|
||||
Ok(CreateFlowExpr {
|
||||
catalog_name: query_ctx.current_catalog().to_string(),
|
||||
flow_name: create_flow.flow_name.to_string(),
|
||||
flow_name: sanitize_flow_name(create_flow.flow_name)?,
|
||||
source_table_names,
|
||||
sink_table_name: Some(sink_table_name),
|
||||
or_replace: create_flow.or_replace,
|
||||
@@ -743,6 +744,18 @@ pub fn to_create_flow_task_expr(
|
||||
})
|
||||
}
|
||||
|
||||
/// sanitize the flow name, remove possible quotes
|
||||
fn sanitize_flow_name(mut flow_name: ObjectName) -> Result<String> {
|
||||
ensure!(
|
||||
flow_name.0.len() == 1,
|
||||
InvalidFlowNameSnafu {
|
||||
name: flow_name.to_string(),
|
||||
}
|
||||
);
|
||||
// safety: we've checked flow_name.0 has exactly one element.
|
||||
Ok(flow_name.0.swap_remove(0).value)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::{SetDatabaseOptions, UnsetDatabaseOptions};
|
||||
@@ -755,6 +768,62 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_flow_expr() {
|
||||
let sql = r"
|
||||
CREATE FLOW `task_2`
|
||||
SINK TO schema_1.table_1
|
||||
AS
|
||||
SELECT max(c1), min(c2) FROM schema_2.table_2;";
|
||||
let stmt =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap()
|
||||
.pop()
|
||||
.unwrap();
|
||||
|
||||
let Statement::CreateFlow(create_flow) = stmt else {
|
||||
unreachable!()
|
||||
};
|
||||
let expr = to_create_flow_task_expr(create_flow, &QueryContext::arc()).unwrap();
|
||||
|
||||
let to_dot_sep =
|
||||
|c: TableName| format!("{}.{}.{}", c.catalog_name, c.schema_name, c.table_name);
|
||||
assert_eq!("task_2", expr.flow_name);
|
||||
assert_eq!("greptime", expr.catalog_name);
|
||||
assert_eq!(
|
||||
"greptime.schema_1.table_1",
|
||||
expr.sink_table_name.map(to_dot_sep).unwrap()
|
||||
);
|
||||
assert_eq!(1, expr.source_table_names.len());
|
||||
assert_eq!(
|
||||
"greptime.schema_2.table_2",
|
||||
to_dot_sep(expr.source_table_names[0].clone())
|
||||
);
|
||||
assert_eq!("SELECT max(c1), min(c2) FROM schema_2.table_2", expr.sql);
|
||||
|
||||
let sql = r"
|
||||
CREATE FLOW abc.`task_2`
|
||||
SINK TO schema_1.table_1
|
||||
AS
|
||||
SELECT max(c1), min(c2) FROM schema_2.table_2;";
|
||||
let stmt =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap()
|
||||
.pop()
|
||||
.unwrap();
|
||||
|
||||
let Statement::CreateFlow(create_flow) = stmt else {
|
||||
unreachable!()
|
||||
};
|
||||
let res = to_create_flow_task_expr(create_flow, &QueryContext::arc());
|
||||
|
||||
assert!(res.is_err());
|
||||
assert!(res
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Invalid flow name: abc.`task_2`"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_to_expr() {
|
||||
let sql = "CREATE TABLE monitor (host STRING,ts TIMESTAMP,TIME INDEX (ts),PRIMARY KEY(host)) ENGINE=mito WITH(ttl='3days', write_buffer_size='1024KB');";
|
||||
|
||||
@@ -13,21 +13,22 @@
|
||||
// limitations under the License.
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use pipeline::{parse, Content, GreptimeTransformer, Pipeline, Result};
|
||||
use pipeline::{json_to_intermediate_state, parse, Content, GreptimeTransformer, Pipeline, Result};
|
||||
use serde_json::{Deserializer, Value};
|
||||
|
||||
fn processor_mut(
|
||||
pipeline: &Pipeline<GreptimeTransformer>,
|
||||
input_values: Vec<Value>,
|
||||
) -> Result<Vec<greptime_proto::v1::Row>> {
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
let mut result = Vec::with_capacity(input_values.len());
|
||||
|
||||
for v in input_values {
|
||||
pipeline.prepare(v, &mut payload)?;
|
||||
let r = pipeline.exec_mut(&mut payload)?;
|
||||
let mut payload = json_to_intermediate_state(v).unwrap();
|
||||
let r = pipeline
|
||||
.exec_mut(&mut payload)?
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
result.push(r);
|
||||
pipeline.reset_intermediate_state(&mut payload);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
|
||||
@@ -12,18 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::OptionExt;
|
||||
use yaml_rust::Yaml;
|
||||
|
||||
use crate::etl::error::{Error, Result};
|
||||
use crate::etl_error::{
|
||||
FieldRequiredForDispatcherSnafu, TablePartRequiredForDispatcherRuleSnafu,
|
||||
use crate::etl::error::{
|
||||
Error, FieldRequiredForDispatcherSnafu, Result, TableSuffixRequiredForDispatcherRuleSnafu,
|
||||
ValueRequiredForDispatcherRuleSnafu,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
const FIELD: &str = "field";
|
||||
const TABLE_PARTIAL: &str = "table_part";
|
||||
const TABLE_SUFFIX: &str = "table_suffix";
|
||||
const PIPELINE: &str = "pipeline";
|
||||
const VALUE: &str = "value";
|
||||
const RULES: &str = "rules";
|
||||
@@ -39,10 +41,10 @@ const RULES: &str = "rules";
|
||||
/// rules:
|
||||
/// - value: http
|
||||
/// pipeline: http_pipeline
|
||||
/// table_part: http_log
|
||||
/// table_suffix: http_log
|
||||
/// - value: db
|
||||
/// pipeline: db_pipeline
|
||||
/// table_part: db_log
|
||||
/// table_suffix: db_log
|
||||
/// ```
|
||||
///
|
||||
/// If none of the rules match the value, this pipeline will continue to process
|
||||
@@ -58,12 +60,12 @@ pub(crate) struct Dispatcher {
|
||||
/// - `value`: for pattern matching
|
||||
/// - `pipeline`: the pipeline to call, if it's unspecified, we use default
|
||||
/// `greptime_identity`
|
||||
/// - `table_part`: the table name segment that we use to construct full table
|
||||
/// - `table_suffix`: the table name segment that we use to construct full table
|
||||
/// name
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Rule {
|
||||
pub value: Value,
|
||||
pub table_part: String,
|
||||
pub table_suffix: String,
|
||||
pub pipeline: Option<String>,
|
||||
}
|
||||
|
||||
@@ -80,10 +82,11 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| {
|
||||
let table_part = rule[TABLE_PARTIAL]
|
||||
let table_part = rule[TABLE_SUFFIX]
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.context(TablePartRequiredForDispatcherRuleSnafu)?;
|
||||
.context(TableSuffixRequiredForDispatcherRuleSnafu)?;
|
||||
|
||||
let pipeline = rule[PIPELINE].as_str().map(|s| s.to_string());
|
||||
|
||||
if rule[VALUE].is_badvalue() {
|
||||
@@ -93,7 +96,7 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
|
||||
Ok(Rule {
|
||||
value,
|
||||
table_part,
|
||||
table_suffix: table_part,
|
||||
pipeline,
|
||||
})
|
||||
})
|
||||
@@ -105,3 +108,21 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
Ok(Dispatcher { field, rules })
|
||||
}
|
||||
}
|
||||
|
||||
impl Dispatcher {
|
||||
/// execute dispatcher and returns matched rule if any
|
||||
pub(crate) fn exec(&self, data: &BTreeMap<String, Value>) -> Option<&Rule> {
|
||||
if let Some(value) = data.get(&self.field) {
|
||||
for rule in &self.rules {
|
||||
if rule.value == *value {
|
||||
return Some(rule);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
} else {
|
||||
debug!("field {} not found in keys {:?}", &self.field, data.keys());
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,18 +20,22 @@ pub mod processor;
|
||||
pub mod transform;
|
||||
pub mod value;
|
||||
|
||||
use ahash::HashSet;
|
||||
use common_telemetry::debug;
|
||||
use error::{IntermediateKeyIndexSnafu, PrepareValueMustBeObjectSnafu, YamlLoadSnafu};
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{
|
||||
IntermediateKeyIndexSnafu, PrepareValueMustBeObjectSnafu, YamlLoadSnafu, YamlParseSnafu,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use processor::{Processor, ProcessorBuilder, Processors};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use transform::{TransformBuilders, Transformer, Transforms};
|
||||
use processor::{IntermediateStatus, Processor, Processors};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use transform::{Transformer, Transforms};
|
||||
use value::Value;
|
||||
use yaml_rust::YamlLoader;
|
||||
|
||||
use crate::dispatcher::Dispatcher;
|
||||
use crate::dispatcher::{Dispatcher, Rule};
|
||||
use crate::etl::error::Result;
|
||||
use crate::{GreptimeTransformer, PipelineVersion};
|
||||
|
||||
const DESCRIPTION: &str = "description";
|
||||
const PROCESSORS: &str = "processors";
|
||||
@@ -52,103 +56,23 @@ where
|
||||
Content::Yaml(str) => {
|
||||
let docs = YamlLoader::load_from_str(str).context(YamlLoadSnafu)?;
|
||||
|
||||
ensure!(docs.len() == 1, YamlParseSnafu);
|
||||
|
||||
let doc = &docs[0];
|
||||
|
||||
let description = doc[DESCRIPTION].as_str().map(|s| s.to_string());
|
||||
|
||||
let processor_builder_list = if let Some(v) = doc[PROCESSORS].as_vec() {
|
||||
let processors = if let Some(v) = doc[PROCESSORS].as_vec() {
|
||||
v.try_into()?
|
||||
} else {
|
||||
processor::ProcessorBuilderList::default()
|
||||
Processors::default()
|
||||
};
|
||||
|
||||
let transform_builders =
|
||||
if let Some(v) = doc[TRANSFORMS].as_vec().or(doc[TRANSFORM].as_vec()) {
|
||||
v.try_into()?
|
||||
} else {
|
||||
TransformBuilders::default()
|
||||
};
|
||||
|
||||
let processors_required_keys = &processor_builder_list.input_keys;
|
||||
let processors_output_keys = &processor_builder_list.output_keys;
|
||||
let processors_required_original_keys = &processor_builder_list.original_input_keys;
|
||||
|
||||
debug!(
|
||||
"processors_required_original_keys: {:?}",
|
||||
processors_required_original_keys
|
||||
);
|
||||
debug!("processors_required_keys: {:?}", processors_required_keys);
|
||||
debug!("processors_output_keys: {:?}", processors_output_keys);
|
||||
|
||||
let transforms_required_keys = &transform_builders.required_keys;
|
||||
let mut tr_keys = Vec::with_capacity(50);
|
||||
for key in transforms_required_keys.iter() {
|
||||
if !processors_output_keys.contains(key)
|
||||
&& !processors_required_original_keys.contains(key)
|
||||
{
|
||||
tr_keys.push(key.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut required_keys = processors_required_original_keys.clone();
|
||||
|
||||
required_keys.append(&mut tr_keys);
|
||||
required_keys.sort();
|
||||
|
||||
debug!("required_keys: {:?}", required_keys);
|
||||
|
||||
// intermediate keys are the keys that all processor and transformer required
|
||||
let ordered_intermediate_keys: Vec<String> = [
|
||||
processors_required_keys,
|
||||
transforms_required_keys,
|
||||
processors_output_keys,
|
||||
]
|
||||
.iter()
|
||||
.flat_map(|l| l.iter())
|
||||
.collect::<HashSet<&String>>()
|
||||
.into_iter()
|
||||
.sorted()
|
||||
.cloned()
|
||||
.collect_vec();
|
||||
|
||||
let mut final_intermediate_keys = Vec::with_capacity(ordered_intermediate_keys.len());
|
||||
let mut intermediate_keys_exclude_original =
|
||||
Vec::with_capacity(ordered_intermediate_keys.len());
|
||||
|
||||
for key_name in ordered_intermediate_keys.iter() {
|
||||
if required_keys.contains(key_name) {
|
||||
final_intermediate_keys.push(key_name.clone());
|
||||
} else {
|
||||
intermediate_keys_exclude_original.push(key_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
final_intermediate_keys.extend(intermediate_keys_exclude_original);
|
||||
|
||||
let output_keys = transform_builders.output_keys.clone();
|
||||
|
||||
let processors_kind_list = processor_builder_list
|
||||
.processor_builders
|
||||
.into_iter()
|
||||
.map(|builder| builder.build(&final_intermediate_keys))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let processors = Processors {
|
||||
processors: processors_kind_list,
|
||||
required_keys: processors_required_keys.clone(),
|
||||
output_keys: processors_output_keys.clone(),
|
||||
required_original_keys: processors_required_original_keys.clone(),
|
||||
};
|
||||
|
||||
let transfor_list = transform_builders
|
||||
.builders
|
||||
.into_iter()
|
||||
.map(|builder| builder.build(&final_intermediate_keys, &output_keys))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let transformers = Transforms {
|
||||
transforms: transfor_list,
|
||||
required_keys: transforms_required_keys.clone(),
|
||||
output_keys: output_keys.clone(),
|
||||
let transformers = if let Some(v) = doc[TRANSFORMS].as_vec().or(doc[TRANSFORM].as_vec())
|
||||
{
|
||||
v.try_into()?
|
||||
} else {
|
||||
Transforms::default()
|
||||
};
|
||||
|
||||
let transformer = T::new(transformers)?;
|
||||
@@ -164,9 +88,6 @@ where
|
||||
processors,
|
||||
transformer,
|
||||
dispatcher,
|
||||
required_keys,
|
||||
output_keys,
|
||||
intermediate_keys: final_intermediate_keys,
|
||||
})
|
||||
}
|
||||
Content::Json(_) => unimplemented!(),
|
||||
@@ -182,97 +103,98 @@ where
|
||||
processors: processor::Processors,
|
||||
dispatcher: Option<Dispatcher>,
|
||||
transformer: T,
|
||||
/// required keys for the preprocessing from map data from user
|
||||
/// include all processor required and transformer required keys
|
||||
required_keys: Vec<String>,
|
||||
/// all output keys from the transformer
|
||||
output_keys: Vec<String>,
|
||||
/// intermediate keys from the processors
|
||||
intermediate_keys: Vec<String>,
|
||||
// pub on_failure: processor::Processors,
|
||||
}
|
||||
|
||||
/// Where the pipeline executed is dispatched to, with context information
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
|
||||
pub struct DispatchedTo {
|
||||
pub table_suffix: String,
|
||||
pub pipeline: Option<String>,
|
||||
}
|
||||
|
||||
impl From<&Rule> for DispatchedTo {
|
||||
fn from(value: &Rule) -> Self {
|
||||
DispatchedTo {
|
||||
table_suffix: value.table_suffix.clone(),
|
||||
pipeline: value.pipeline.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DispatchedTo {
|
||||
/// Generate destination table name from input
|
||||
pub fn dispatched_to_table_name(&self, original: &str) -> String {
|
||||
format!("{}_{}", &original, self.table_suffix)
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of pipeline execution
|
||||
#[derive(Debug)]
|
||||
pub enum PipelineExecOutput<O> {
|
||||
Transformed(O),
|
||||
DispatchedTo(DispatchedTo),
|
||||
}
|
||||
|
||||
impl<O> PipelineExecOutput<O> {
|
||||
pub fn into_transformed(self) -> Option<O> {
|
||||
if let Self::Transformed(o) = self {
|
||||
Some(o)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_dispatched(self) -> Option<DispatchedTo> {
|
||||
if let Self::DispatchedTo(d) = self {
|
||||
Some(d)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_to_intermediate_state(val: serde_json::Value) -> Result<IntermediateStatus> {
|
||||
match val {
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut intermediate_state = BTreeMap::new();
|
||||
for (k, v) in map {
|
||||
intermediate_state.insert(k, Value::try_from(v)?);
|
||||
}
|
||||
Ok(intermediate_state)
|
||||
}
|
||||
_ => PrepareValueMustBeObjectSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_array_to_intermediate_state(
|
||||
val: Vec<serde_json::Value>,
|
||||
) -> Result<Vec<IntermediateStatus>> {
|
||||
val.into_iter().map(json_to_intermediate_state).collect()
|
||||
}
|
||||
|
||||
impl<T> Pipeline<T>
|
||||
where
|
||||
T: Transformer,
|
||||
{
|
||||
pub fn exec_mut(&self, val: &mut Vec<Value>) -> Result<T::VecOutput> {
|
||||
pub fn exec_mut(
|
||||
&self,
|
||||
val: &mut BTreeMap<String, Value>,
|
||||
) -> Result<PipelineExecOutput<T::VecOutput>> {
|
||||
for processor in self.processors.iter() {
|
||||
processor.exec_mut(val)?;
|
||||
}
|
||||
|
||||
self.transformer.transform_mut(val)
|
||||
}
|
||||
let matched_rule = self
|
||||
.dispatcher
|
||||
.as_ref()
|
||||
.and_then(|dispatcher| dispatcher.exec(val));
|
||||
|
||||
pub fn prepare_pipeline_value(&self, val: Value, result: &mut [Value]) -> Result<()> {
|
||||
match val {
|
||||
Value::Map(map) => {
|
||||
let mut search_from = 0;
|
||||
// because of the key in the json map is ordered
|
||||
for (payload_key, payload_value) in map.values.into_iter() {
|
||||
if search_from >= self.required_keys.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
// because of map key is ordered, required_keys is ordered too
|
||||
if let Some(pos) = self.required_keys[search_from..]
|
||||
.iter()
|
||||
.position(|k| k == &payload_key)
|
||||
{
|
||||
result[search_from + pos] = payload_value;
|
||||
// next search from is always after the current key
|
||||
search_from += pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
Value::String(_) => {
|
||||
result[0] = val;
|
||||
}
|
||||
_ => {
|
||||
return PrepareValueMustBeObjectSnafu.fail();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn prepare(&self, val: serde_json::Value, result: &mut [Value]) -> Result<()> {
|
||||
match val {
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut search_from = 0;
|
||||
// because of the key in the json map is ordered
|
||||
for (payload_key, payload_value) in map.into_iter() {
|
||||
if search_from >= self.required_keys.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
// because of map key is ordered, required_keys is ordered too
|
||||
if let Some(pos) = self.required_keys[search_from..]
|
||||
.iter()
|
||||
.position(|k| k == &payload_key)
|
||||
{
|
||||
result[search_from + pos] = payload_value.try_into()?;
|
||||
// next search from is always after the current key
|
||||
search_from += pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
serde_json::Value::String(_) => {
|
||||
result[0] = val.try_into()?;
|
||||
}
|
||||
_ => {
|
||||
return PrepareValueMustBeObjectSnafu.fail();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn init_intermediate_state(&self) -> Vec<Value> {
|
||||
vec![Value::Null; self.intermediate_keys.len()]
|
||||
}
|
||||
|
||||
pub fn reset_intermediate_state(&self, result: &mut [Value]) {
|
||||
for i in result {
|
||||
*i = Value::Null;
|
||||
match matched_rule {
|
||||
None => self
|
||||
.transformer
|
||||
.transform_mut(val)
|
||||
.map(PipelineExecOutput::Transformed),
|
||||
Some(rule) => Ok(PipelineExecOutput::DispatchedTo(rule.into())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -284,21 +206,6 @@ where
|
||||
&self.transformer
|
||||
}
|
||||
|
||||
/// Required fields in user-supplied data
|
||||
pub fn required_keys(&self) -> &Vec<String> {
|
||||
&self.required_keys
|
||||
}
|
||||
|
||||
/// All output keys from the pipeline
|
||||
pub fn output_keys(&self) -> &Vec<String> {
|
||||
&self.output_keys
|
||||
}
|
||||
|
||||
/// intermediate keys from the processors
|
||||
pub fn intermediate_keys(&self) -> &Vec<String> {
|
||||
&self.intermediate_keys
|
||||
}
|
||||
|
||||
pub fn schemas(&self) -> &Vec<greptime_proto::v1::ColumnSchema> {
|
||||
self.transformer.schemas()
|
||||
}
|
||||
@@ -312,6 +219,7 @@ pub(crate) fn find_key_index(intermediate_keys: &[String], key: &str, kind: &str
|
||||
}
|
||||
|
||||
/// SelectInfo is used to store the selected keys from OpenTelemetry record attrs
|
||||
/// The key is used to uplift value from the attributes and serve as column name in the table
|
||||
#[derive(Default)]
|
||||
pub struct SelectInfo {
|
||||
pub keys: Vec<String>,
|
||||
@@ -336,9 +244,29 @@ impl SelectInfo {
|
||||
}
|
||||
}
|
||||
|
||||
pub const GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME: &str = "greptime_identity";
|
||||
|
||||
/// Enum for holding information of a pipeline, which is either pipeline itself,
|
||||
/// or information that be used to retrieve a pipeline from `PipelineHandler`
|
||||
pub enum PipelineDefinition {
|
||||
Resolved(Arc<Pipeline<GreptimeTransformer>>),
|
||||
ByNameAndValue((String, PipelineVersion)),
|
||||
GreptimeIdentityPipeline,
|
||||
}
|
||||
|
||||
impl PipelineDefinition {
|
||||
pub fn from_name(name: &str, version: PipelineVersion) -> Self {
|
||||
if name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME {
|
||||
Self::GreptimeIdentityPipeline
|
||||
} else {
|
||||
Self::ByNameAndValue((name.to_owned(), version))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PipelineWay {
|
||||
OtlpLog(Box<SelectInfo>),
|
||||
Custom(std::sync::Arc<Pipeline<crate::GreptimeTransformer>>),
|
||||
OtlpLogDirect(Box<SelectInfo>),
|
||||
Pipeline(PipelineDefinition),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -353,33 +281,31 @@ mod tests {
|
||||
#[test]
|
||||
fn test_pipeline_prepare() {
|
||||
let input_value_str = r#"
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar"
|
||||
}
|
||||
"#;
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar"
|
||||
}
|
||||
"#;
|
||||
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
|
||||
|
||||
let pipeline_yaml = r#"description: 'Pipeline for Apache Tomcat'
|
||||
processors:
|
||||
- csv:
|
||||
field: my_field
|
||||
target_fields: field1, field2
|
||||
- csv:
|
||||
field: my_field
|
||||
target_fields: field1, field2
|
||||
transform:
|
||||
- field: field1
|
||||
type: uint32
|
||||
- field: field2
|
||||
type: uint32
|
||||
"#;
|
||||
- field: field1
|
||||
type: uint32
|
||||
- field: field2
|
||||
type: uint32
|
||||
"#;
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut payload).unwrap();
|
||||
assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
|
||||
assert_eq!(
|
||||
payload,
|
||||
vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
|
||||
);
|
||||
let result = pipeline.exec_mut(&mut payload).unwrap();
|
||||
let mut payload = json_to_intermediate_state(input_value).unwrap();
|
||||
let result = pipeline
|
||||
.exec_mut(&mut payload)
|
||||
.unwrap()
|
||||
.into_transformed()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
|
||||
assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
|
||||
@@ -395,40 +321,42 @@ transform:
|
||||
fn test_dissect_pipeline() {
|
||||
let message = r#"129.37.245.88 - meln1ks [01/Aug/2024:14:22:47 +0800] "PATCH /observability/metrics/production HTTP/1.0" 501 33085"#.to_string();
|
||||
let pipeline_str = r#"processors:
|
||||
- dissect:
|
||||
fields:
|
||||
- message
|
||||
patterns:
|
||||
- "%{ip} %{?ignored} %{username} [%{ts}] \"%{method} %{path} %{proto}\" %{status} %{bytes}"
|
||||
- timestamp:
|
||||
fields:
|
||||
- ts
|
||||
formats:
|
||||
- "%d/%b/%Y:%H:%M:%S %z"
|
||||
- dissect:
|
||||
fields:
|
||||
- message
|
||||
patterns:
|
||||
- "%{ip} %{?ignored} %{username} [%{ts}] \"%{method} %{path} %{proto}\" %{status} %{bytes}"
|
||||
- timestamp:
|
||||
fields:
|
||||
- ts
|
||||
formats:
|
||||
- "%d/%b/%Y:%H:%M:%S %z"
|
||||
|
||||
transform:
|
||||
- fields:
|
||||
- ip
|
||||
- username
|
||||
- method
|
||||
- path
|
||||
- proto
|
||||
type: string
|
||||
- fields:
|
||||
- status
|
||||
type: uint16
|
||||
- fields:
|
||||
- bytes
|
||||
type: uint32
|
||||
- field: ts
|
||||
type: timestamp, ns
|
||||
index: time"#;
|
||||
- fields:
|
||||
- ip
|
||||
- username
|
||||
- method
|
||||
- path
|
||||
- proto
|
||||
type: string
|
||||
- fields:
|
||||
- status
|
||||
type: uint16
|
||||
- fields:
|
||||
- bytes
|
||||
type: uint32
|
||||
- field: ts
|
||||
type: timestamp, ns
|
||||
index: time"#;
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_str)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline
|
||||
.prepare(serde_json::Value::String(message), &mut payload)
|
||||
let mut payload = BTreeMap::new();
|
||||
payload.insert("message".to_string(), Value::String(message));
|
||||
let result = pipeline
|
||||
.exec_mut(&mut payload)
|
||||
.unwrap()
|
||||
.into_transformed()
|
||||
.unwrap();
|
||||
let result = pipeline.exec_mut(&mut payload).unwrap();
|
||||
let sechema = pipeline.schemas();
|
||||
|
||||
assert_eq!(sechema.len(), result.values.len());
|
||||
@@ -479,35 +407,33 @@ transform:
|
||||
#[test]
|
||||
fn test_csv_pipeline() {
|
||||
let input_value_str = r#"
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar"
|
||||
}
|
||||
"#;
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar"
|
||||
}
|
||||
"#;
|
||||
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
|
||||
|
||||
let pipeline_yaml = r#"
|
||||
description: Pipeline for Apache Tomcat
|
||||
processors:
|
||||
- csv:
|
||||
field: my_field
|
||||
target_fields: field1, field2
|
||||
transform:
|
||||
- field: field1
|
||||
type: uint32
|
||||
- field: field2
|
||||
type: uint32
|
||||
"#;
|
||||
description: Pipeline for Apache Tomcat
|
||||
processors:
|
||||
- csv:
|
||||
field: my_field
|
||||
target_fields: field1, field2
|
||||
transform:
|
||||
- field: field1
|
||||
type: uint32
|
||||
- field: field2
|
||||
type: uint32
|
||||
"#;
|
||||
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut payload).unwrap();
|
||||
assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
|
||||
assert_eq!(
|
||||
payload,
|
||||
vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
|
||||
);
|
||||
let result = pipeline.exec_mut(&mut payload).unwrap();
|
||||
let mut payload = json_to_intermediate_state(input_value).unwrap();
|
||||
let result = pipeline
|
||||
.exec_mut(&mut payload)
|
||||
.unwrap()
|
||||
.into_transformed()
|
||||
.unwrap();
|
||||
assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
|
||||
assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
|
||||
match &result.values[2].value_data {
|
||||
@@ -521,33 +447,36 @@ transform:
|
||||
#[test]
|
||||
fn test_date_pipeline() {
|
||||
let input_value_str = r#"
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar",
|
||||
"test_time": "2014-5-17T04:34:56+00:00"
|
||||
}
|
||||
"#;
|
||||
{
|
||||
"my_field": "1,2",
|
||||
"foo": "bar",
|
||||
"test_time": "2014-5-17T04:34:56+00:00"
|
||||
}
|
||||
"#;
|
||||
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
|
||||
|
||||
let pipeline_yaml = r#"
|
||||
---
|
||||
let pipeline_yaml = r#"---
|
||||
description: Pipeline for Apache Tomcat
|
||||
|
||||
processors:
|
||||
- timestamp:
|
||||
field: test_time
|
||||
- timestamp:
|
||||
field: test_time
|
||||
|
||||
transform:
|
||||
- field: test_time
|
||||
type: timestamp, ns
|
||||
index: time
|
||||
"#;
|
||||
- field: test_time
|
||||
type: timestamp, ns
|
||||
index: time
|
||||
"#;
|
||||
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let schema = pipeline.schemas().clone();
|
||||
let mut result = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut result).unwrap();
|
||||
let row = pipeline.exec_mut(&mut result).unwrap();
|
||||
let mut result = json_to_intermediate_state(input_value).unwrap();
|
||||
|
||||
let row = pipeline
|
||||
.exec_mut(&mut result)
|
||||
.unwrap()
|
||||
.into_transformed()
|
||||
.unwrap();
|
||||
let output = Rows {
|
||||
schema,
|
||||
rows: vec![row],
|
||||
@@ -583,9 +512,9 @@ dispatcher:
|
||||
field: typename
|
||||
rules:
|
||||
- value: http
|
||||
table_part: http_events
|
||||
table_suffix: http_events
|
||||
- value: database
|
||||
table_part: db_events
|
||||
table_suffix: db_events
|
||||
pipeline: database_pipeline
|
||||
|
||||
transform:
|
||||
@@ -603,7 +532,7 @@ transform:
|
||||
dispatcher.rules[0],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("http".to_string()),
|
||||
table_part: "http_events".to_string(),
|
||||
table_suffix: "http_events".to_string(),
|
||||
pipeline: None
|
||||
}
|
||||
);
|
||||
@@ -612,7 +541,7 @@ transform:
|
||||
dispatcher.rules[1],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("database".to_string()),
|
||||
table_part: "db_events".to_string(),
|
||||
table_suffix: "db_events".to_string(),
|
||||
pipeline: Some("database_pipeline".to_string()),
|
||||
}
|
||||
);
|
||||
@@ -627,9 +556,9 @@ dispatcher:
|
||||
_field: typename
|
||||
rules:
|
||||
- value: http
|
||||
table_part: http_events
|
||||
table_suffix: http_events
|
||||
- value: database
|
||||
table_part: db_events
|
||||
table_suffix: db_events
|
||||
pipeline: database_pipeline
|
||||
|
||||
transform:
|
||||
@@ -647,9 +576,9 @@ dispatcher:
|
||||
field: typename
|
||||
rules:
|
||||
- value: http
|
||||
_table_part: http_events
|
||||
_table_suffix: http_events
|
||||
- value: database
|
||||
_table_part: db_events
|
||||
_table_suffix: db_events
|
||||
pipeline: database_pipeline
|
||||
|
||||
transform:
|
||||
@@ -667,9 +596,9 @@ dispatcher:
|
||||
field: typename
|
||||
rules:
|
||||
- _value: http
|
||||
table_part: http_events
|
||||
table_suffix: http_events
|
||||
- _value: database
|
||||
table_part: db_events
|
||||
table_suffix: db_events
|
||||
pipeline: database_pipeline
|
||||
|
||||
transform:
|
||||
|
||||
@@ -543,6 +543,11 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Yaml parse error."))]
|
||||
YamlParse {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Prepare value must be an object"))]
|
||||
PrepareValueMustBeObject {
|
||||
#[snafu(implicit)]
|
||||
@@ -590,9 +595,9 @@ pub enum Error {
|
||||
},
|
||||
#[snafu(display("Field is required for dispatcher"))]
|
||||
FieldRequiredForDispatcher,
|
||||
#[snafu(display("table_part is required for dispatcher rule"))]
|
||||
TablePartRequiredForDispatcherRule,
|
||||
#[snafu(display("value is required for dispatcher rule"))]
|
||||
#[snafu(display("Table_suffix is required for dispatcher rule"))]
|
||||
TableSuffixRequiredForDispatcherRule,
|
||||
#[snafu(display("Value is required for dispatcher rule"))]
|
||||
ValueRequiredForDispatcherRule,
|
||||
#[snafu(display(
|
||||
"Reached max nested levels when flattening JSON object: {max_nested_levels}"
|
||||
|
||||
@@ -19,133 +19,12 @@ use snafu::OptionExt;
|
||||
|
||||
use super::error::{EmptyInputFieldSnafu, MissingInputFieldSnafu};
|
||||
use crate::etl::error::{Error, Result};
|
||||
use crate::etl::find_key_index;
|
||||
|
||||
/// Information about the input field including the name and index in intermediate keys.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct InputFieldInfo {
|
||||
pub(crate) name: String,
|
||||
pub(crate) index: usize,
|
||||
}
|
||||
|
||||
impl InputFieldInfo {
|
||||
/// Create a new input field info with the given field name and index.
|
||||
pub(crate) fn new(field: impl Into<String>, index: usize) -> Self {
|
||||
InputFieldInfo {
|
||||
name: field.into(),
|
||||
index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a field that has one input and one output.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct OneInputOneOutputField {
|
||||
input: InputFieldInfo,
|
||||
output: Option<(String, usize)>,
|
||||
}
|
||||
|
||||
impl OneInputOneOutputField {
|
||||
/// Create a new field with the given input and output.
|
||||
pub(crate) fn new(input: InputFieldInfo, output: (String, usize)) -> Self {
|
||||
OneInputOneOutputField {
|
||||
input,
|
||||
output: Some(output),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a new field with the given processor kind, intermediate keys, input field, and target field.
|
||||
pub(crate) fn build(
|
||||
processor_kind: &str,
|
||||
intermediate_keys: &[String],
|
||||
input_field: &str,
|
||||
target_field: &str,
|
||||
) -> Result<Self> {
|
||||
let input_index = find_key_index(intermediate_keys, input_field, processor_kind)?;
|
||||
|
||||
let input_field_info = InputFieldInfo::new(input_field, input_index);
|
||||
let output_index = find_key_index(intermediate_keys, target_field, processor_kind)?;
|
||||
Ok(OneInputOneOutputField::new(
|
||||
input_field_info,
|
||||
(target_field.to_string(), output_index),
|
||||
))
|
||||
}
|
||||
|
||||
/// Get the input field information.
|
||||
pub(crate) fn input(&self) -> &InputFieldInfo {
|
||||
&self.input
|
||||
}
|
||||
|
||||
/// Get the index of the input field.
|
||||
pub(crate) fn input_index(&self) -> usize {
|
||||
self.input.index
|
||||
}
|
||||
|
||||
/// Get the name of the input field.
|
||||
pub(crate) fn input_name(&self) -> &str {
|
||||
&self.input.name
|
||||
}
|
||||
|
||||
/// Get the index of the output field.
|
||||
pub(crate) fn output_index(&self) -> usize {
|
||||
*self.output().1
|
||||
}
|
||||
|
||||
/// Get the name of the output field.
|
||||
pub(crate) fn output_name(&self) -> &str {
|
||||
self.output().0
|
||||
}
|
||||
|
||||
/// Get the output field information.
|
||||
pub(crate) fn output(&self) -> (&String, &usize) {
|
||||
if let Some((name, index)) = &self.output {
|
||||
(name, index)
|
||||
} else {
|
||||
(&self.input.name, &self.input.index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a field that has one input and multiple outputs.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct OneInputMultiOutputField {
|
||||
input: InputFieldInfo,
|
||||
/// Typically, processors that output multiple keys need to be distinguished by splicing the keys together.
|
||||
prefix: Option<String>,
|
||||
}
|
||||
|
||||
impl OneInputMultiOutputField {
|
||||
/// Create a new field with the given input and prefix.
|
||||
pub(crate) fn new(input: InputFieldInfo, prefix: Option<String>) -> Self {
|
||||
OneInputMultiOutputField { input, prefix }
|
||||
}
|
||||
|
||||
/// Get the input field information.
|
||||
pub(crate) fn input(&self) -> &InputFieldInfo {
|
||||
&self.input
|
||||
}
|
||||
|
||||
/// Get the index of the input field.
|
||||
pub(crate) fn input_index(&self) -> usize {
|
||||
self.input.index
|
||||
}
|
||||
|
||||
/// Get the name of the input field.
|
||||
pub(crate) fn input_name(&self) -> &str {
|
||||
&self.input.name
|
||||
}
|
||||
|
||||
/// Get the prefix for the output fields.
|
||||
pub(crate) fn target_prefix(&self) -> &str {
|
||||
self.prefix.as_deref().unwrap_or(&self.input.name)
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw processor-defined inputs and outputs
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct Field {
|
||||
pub(crate) input_field: String,
|
||||
pub(crate) target_field: Option<String>,
|
||||
input_field: String,
|
||||
target_field: Option<String>,
|
||||
}
|
||||
|
||||
impl FromStr for Field {
|
||||
@@ -194,6 +73,10 @@ impl Field {
|
||||
pub(crate) fn target_or_input_field(&self) -> &str {
|
||||
self.target_field.as_deref().unwrap_or(&self.input_field)
|
||||
}
|
||||
|
||||
pub(crate) fn set_target_field(&mut self, target_field: Option<String>) {
|
||||
self.target_field = target_field;
|
||||
}
|
||||
}
|
||||
|
||||
/// A collection of fields.
|
||||
|
||||
@@ -27,32 +27,33 @@ pub mod regex;
|
||||
pub mod timestamp;
|
||||
pub mod urlencoding;
|
||||
|
||||
use ahash::{HashSet, HashSetExt};
|
||||
use cmcd::{CmcdProcessor, CmcdProcessorBuilder};
|
||||
use csv::{CsvProcessor, CsvProcessorBuilder};
|
||||
use date::{DateProcessor, DateProcessorBuilder};
|
||||
use decolorize::{DecolorizeProcessor, DecolorizeProcessorBuilder};
|
||||
use digest::{DigestProcessor, DigestProcessorBuilder};
|
||||
use dissect::{DissectProcessor, DissectProcessorBuilder};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use cmcd::CmcdProcessor;
|
||||
use csv::CsvProcessor;
|
||||
use date::DateProcessor;
|
||||
use decolorize::DecolorizeProcessor;
|
||||
use digest::DigestProcessor;
|
||||
use dissect::DissectProcessor;
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use epoch::{EpochProcessor, EpochProcessorBuilder};
|
||||
use gsub::{GsubProcessor, GsubProcessorBuilder};
|
||||
use itertools::Itertools;
|
||||
use join::{JoinProcessor, JoinProcessorBuilder};
|
||||
use json_path::{JsonPathProcessor, JsonPathProcessorBuilder};
|
||||
use letter::{LetterProcessor, LetterProcessorBuilder};
|
||||
use regex::{RegexProcessor, RegexProcessorBuilder};
|
||||
use epoch::EpochProcessor;
|
||||
use gsub::GsubProcessor;
|
||||
use join::JoinProcessor;
|
||||
use json_path::JsonPathProcessor;
|
||||
use letter::LetterProcessor;
|
||||
use regex::RegexProcessor;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use timestamp::{TimestampProcessor, TimestampProcessorBuilder};
|
||||
use urlencoding::{UrlEncodingProcessor, UrlEncodingProcessorBuilder};
|
||||
use timestamp::TimestampProcessor;
|
||||
use urlencoding::UrlEncodingProcessor;
|
||||
|
||||
use super::error::{
|
||||
FailedParseFieldFromStringSnafu, FieldMustBeTypeSnafu, ProcessorKeyMustBeStringSnafu,
|
||||
ProcessorMustBeMapSnafu, ProcessorMustHaveStringKeySnafu, UnsupportedProcessorSnafu,
|
||||
ProcessorMustBeMapSnafu, ProcessorMustHaveStringKeySnafu,
|
||||
};
|
||||
use super::field::{Field, Fields};
|
||||
use crate::etl::error::{Error, Result};
|
||||
use crate::etl::value::Value;
|
||||
use crate::etl_error::UnsupportedProcessorSnafu;
|
||||
|
||||
const FIELD_NAME: &str = "field";
|
||||
const FIELDS_NAME: &str = "fields";
|
||||
@@ -65,6 +66,8 @@ const TARGET_FIELDS_NAME: &str = "target_fields";
|
||||
const JSON_PATH_NAME: &str = "json_path";
|
||||
const JSON_PATH_RESULT_INDEX_NAME: &str = "result_index";
|
||||
|
||||
pub type IntermediateStatus = BTreeMap<String, Value>;
|
||||
|
||||
/// Processor trait defines the interface for all processors.
|
||||
///
|
||||
/// A processor is a transformation that can be applied to a field in a document
|
||||
@@ -80,7 +83,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
|
||||
fn ignore_missing(&self) -> bool;
|
||||
|
||||
/// Execute the processor on a vector which be preprocessed by the pipeline
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()>;
|
||||
fn exec_mut(&self, val: &mut IntermediateStatus) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -102,57 +105,12 @@ pub enum ProcessorKind {
|
||||
Digest(DigestProcessor),
|
||||
}
|
||||
|
||||
/// ProcessorBuilder trait defines the interface for all processor builders
|
||||
/// A processor builder is used to create a processor
|
||||
#[enum_dispatch(ProcessorBuilders)]
|
||||
pub trait ProcessorBuilder: std::fmt::Debug + Send + Sync + 'static {
|
||||
/// Get the processor's output keys
|
||||
fn output_keys(&self) -> HashSet<&str>;
|
||||
/// Get the processor's input keys
|
||||
fn input_keys(&self) -> HashSet<&str>;
|
||||
/// Build the processor
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[enum_dispatch]
|
||||
pub enum ProcessorBuilders {
|
||||
Cmcd(CmcdProcessorBuilder),
|
||||
Csv(CsvProcessorBuilder),
|
||||
Dissect(DissectProcessorBuilder),
|
||||
Gsub(GsubProcessorBuilder),
|
||||
Join(JoinProcessorBuilder),
|
||||
Letter(LetterProcessorBuilder),
|
||||
Regex(RegexProcessorBuilder),
|
||||
Timestamp(TimestampProcessorBuilder),
|
||||
UrlEncoding(UrlEncodingProcessorBuilder),
|
||||
Epoch(EpochProcessorBuilder),
|
||||
Date(DateProcessorBuilder),
|
||||
JsonPath(JsonPathProcessorBuilder),
|
||||
Decolorize(DecolorizeProcessorBuilder),
|
||||
Digest(DigestProcessorBuilder),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessorBuilderList {
|
||||
pub(crate) processor_builders: Vec<ProcessorBuilders>,
|
||||
pub(crate) input_keys: Vec<String>,
|
||||
pub(crate) output_keys: Vec<String>,
|
||||
pub(crate) original_input_keys: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Processors {
|
||||
/// A ordered list of processors
|
||||
/// The order of processors is important
|
||||
/// The output of the first processor will be the input of the second processor
|
||||
pub processors: Vec<ProcessorKind>,
|
||||
/// all required keys in all processors
|
||||
pub required_keys: Vec<String>,
|
||||
/// all required keys in user-supplied data, not pipeline output fields
|
||||
pub required_original_keys: Vec<String>,
|
||||
/// all output keys in all processors
|
||||
pub output_keys: Vec<String>,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Processors {
|
||||
@@ -169,80 +127,22 @@ impl std::ops::DerefMut for Processors {
|
||||
}
|
||||
}
|
||||
|
||||
impl Processors {
|
||||
/// A collection of all the processor's required input fields
|
||||
pub fn required_keys(&self) -> &Vec<String> {
|
||||
&self.required_keys
|
||||
}
|
||||
|
||||
/// A collection of all the processor's output fields
|
||||
pub fn output_keys(&self) -> &Vec<String> {
|
||||
&self.output_keys
|
||||
}
|
||||
|
||||
/// Required fields in user-supplied data, not pipeline output fields.
|
||||
pub fn required_original_keys(&self) -> &Vec<String> {
|
||||
&self.required_original_keys
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Vec<yaml_rust::Yaml>> for ProcessorBuilderList {
|
||||
impl TryFrom<&Vec<yaml_rust::Yaml>> for Processors {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(vec: &Vec<yaml_rust::Yaml>) -> Result<Self> {
|
||||
let mut processors_builders = vec![];
|
||||
let mut all_output_keys = HashSet::with_capacity(50);
|
||||
let mut all_required_keys = HashSet::with_capacity(50);
|
||||
let mut all_required_original_keys = HashSet::with_capacity(50);
|
||||
for doc in vec {
|
||||
let processor = parse_processor(doc)?;
|
||||
processors_builders.push(processor);
|
||||
}
|
||||
|
||||
for processor in processors_builders.iter() {
|
||||
{
|
||||
// get all required keys
|
||||
let processor_required_keys = processor.input_keys();
|
||||
|
||||
for key in &processor_required_keys {
|
||||
if !all_output_keys.contains(key) {
|
||||
all_required_original_keys.insert(*key);
|
||||
}
|
||||
}
|
||||
|
||||
all_required_keys.extend(processor_required_keys);
|
||||
|
||||
let processor_output_keys = processor.output_keys().into_iter();
|
||||
all_output_keys.extend(processor_output_keys);
|
||||
}
|
||||
}
|
||||
|
||||
let all_required_keys = all_required_keys
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.sorted()
|
||||
.collect();
|
||||
let all_output_keys = all_output_keys
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.sorted()
|
||||
.collect();
|
||||
let all_required_original_keys = all_required_original_keys
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.sorted()
|
||||
.collect();
|
||||
|
||||
Ok(ProcessorBuilderList {
|
||||
processor_builders: processors_builders,
|
||||
input_keys: all_required_keys,
|
||||
output_keys: all_output_keys,
|
||||
original_input_keys: all_required_original_keys,
|
||||
Ok(Processors {
|
||||
processors: processors_builders,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders> {
|
||||
fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
|
||||
let map = doc.as_hash().context(ProcessorMustBeMapSnafu)?;
|
||||
|
||||
let key = map.keys().next().context(ProcessorMustHaveStringKeySnafu)?;
|
||||
@@ -256,34 +156,28 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders> {
|
||||
let str_key = key.as_str().context(ProcessorKeyMustBeStringSnafu)?;
|
||||
|
||||
let processor = match str_key {
|
||||
cmcd::PROCESSOR_CMCD => ProcessorBuilders::Cmcd(CmcdProcessorBuilder::try_from(value)?),
|
||||
csv::PROCESSOR_CSV => ProcessorBuilders::Csv(CsvProcessorBuilder::try_from(value)?),
|
||||
dissect::PROCESSOR_DISSECT => {
|
||||
ProcessorBuilders::Dissect(DissectProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
epoch::PROCESSOR_EPOCH => ProcessorBuilders::Epoch(EpochProcessorBuilder::try_from(value)?),
|
||||
date::PROCESSOR_DATE => ProcessorBuilders::Date(DateProcessorBuilder::try_from(value)?),
|
||||
gsub::PROCESSOR_GSUB => ProcessorBuilders::Gsub(GsubProcessorBuilder::try_from(value)?),
|
||||
join::PROCESSOR_JOIN => ProcessorBuilders::Join(JoinProcessorBuilder::try_from(value)?),
|
||||
letter::PROCESSOR_LETTER => {
|
||||
ProcessorBuilders::Letter(LetterProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
regex::PROCESSOR_REGEX => ProcessorBuilders::Regex(RegexProcessorBuilder::try_from(value)?),
|
||||
cmcd::PROCESSOR_CMCD => ProcessorKind::Cmcd(CmcdProcessor::try_from(value)?),
|
||||
csv::PROCESSOR_CSV => ProcessorKind::Csv(CsvProcessor::try_from(value)?),
|
||||
dissect::PROCESSOR_DISSECT => ProcessorKind::Dissect(DissectProcessor::try_from(value)?),
|
||||
epoch::PROCESSOR_EPOCH => ProcessorKind::Epoch(EpochProcessor::try_from(value)?),
|
||||
date::PROCESSOR_DATE => ProcessorKind::Date(DateProcessor::try_from(value)?),
|
||||
gsub::PROCESSOR_GSUB => ProcessorKind::Gsub(GsubProcessor::try_from(value)?),
|
||||
join::PROCESSOR_JOIN => ProcessorKind::Join(JoinProcessor::try_from(value)?),
|
||||
letter::PROCESSOR_LETTER => ProcessorKind::Letter(LetterProcessor::try_from(value)?),
|
||||
regex::PROCESSOR_REGEX => ProcessorKind::Regex(RegexProcessor::try_from(value)?),
|
||||
timestamp::PROCESSOR_TIMESTAMP => {
|
||||
ProcessorBuilders::Timestamp(TimestampProcessorBuilder::try_from(value)?)
|
||||
ProcessorKind::Timestamp(TimestampProcessor::try_from(value)?)
|
||||
}
|
||||
urlencoding::PROCESSOR_URL_ENCODING => {
|
||||
ProcessorBuilders::UrlEncoding(UrlEncodingProcessorBuilder::try_from(value)?)
|
||||
ProcessorKind::UrlEncoding(UrlEncodingProcessor::try_from(value)?)
|
||||
}
|
||||
json_path::PROCESSOR_JSON_PATH => {
|
||||
ProcessorBuilders::JsonPath(json_path::JsonPathProcessorBuilder::try_from(value)?)
|
||||
ProcessorKind::JsonPath(json_path::JsonPathProcessor::try_from(value)?)
|
||||
}
|
||||
decolorize::PROCESSOR_DECOLORIZE => {
|
||||
ProcessorBuilders::Decolorize(DecolorizeProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
digest::PROCESSOR_DIGEST => {
|
||||
ProcessorBuilders::Digest(DigestProcessorBuilder::try_from(value)?)
|
||||
ProcessorKind::Decolorize(DecolorizeProcessor::try_from(value)?)
|
||||
}
|
||||
digest::PROCESSOR_DIGEST => ProcessorKind::Digest(DigestProcessor::try_from(value)?),
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
|
||||
@@ -18,20 +18,19 @@
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use ahash::HashSet;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::decode;
|
||||
|
||||
use super::IntermediateStatus;
|
||||
use crate::etl::error::{
|
||||
CmcdMissingKeySnafu, CmcdMissingValueSnafu, Error, FailedToParseFloatKeySnafu,
|
||||
FailedToParseIntKeySnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
};
|
||||
use crate::etl::field::{Field, Fields, InputFieldInfo, OneInputMultiOutputField};
|
||||
use crate::etl::find_key_index;
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, Processor, ProcessorBuilder, ProcessorKind,
|
||||
FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
@@ -77,139 +76,6 @@ const CMCD_KEYS: [&str; 18] = [
|
||||
CMCD_KEY_V,
|
||||
];
|
||||
|
||||
/// CmcdProcessorBuilder is a builder for CmcdProcessor
|
||||
/// parse from raw yaml
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CmcdProcessorBuilder {
|
||||
fields: Fields,
|
||||
output_keys: HashSet<String>,
|
||||
ignore_missing: bool,
|
||||
}
|
||||
|
||||
impl CmcdProcessorBuilder {
|
||||
/// build_cmcd_outputs build cmcd output info
|
||||
/// generate index and function for each output
|
||||
pub(super) fn build_cmcd_outputs(
|
||||
field: &Field,
|
||||
intermediate_keys: &[String],
|
||||
) -> Result<(BTreeMap<String, usize>, Vec<CmcdOutputInfo>)> {
|
||||
let mut output_index = BTreeMap::new();
|
||||
let mut cmcd_field_outputs = Vec::with_capacity(CMCD_KEYS.len());
|
||||
for cmcd in CMCD_KEYS {
|
||||
let final_key = generate_key(field.target_or_input_field(), cmcd);
|
||||
let index = find_key_index(intermediate_keys, &final_key, "cmcd")?;
|
||||
output_index.insert(final_key.clone(), index);
|
||||
match cmcd {
|
||||
CMCD_KEY_BS | CMCD_KEY_SU => {
|
||||
let output_info = CmcdOutputInfo::new(final_key, cmcd, index, bs_su);
|
||||
cmcd_field_outputs.push(output_info);
|
||||
}
|
||||
CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
|
||||
| CMCD_KEY_RTP | CMCD_KEY_TB => {
|
||||
let output_info = CmcdOutputInfo::new(final_key, cmcd, index, br_tb);
|
||||
cmcd_field_outputs.push(output_info);
|
||||
}
|
||||
CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
|
||||
| CMCD_KEY_ST | CMCD_KEY_V => {
|
||||
let output_info = CmcdOutputInfo::new(final_key, cmcd, index, cid_v);
|
||||
cmcd_field_outputs.push(output_info);
|
||||
}
|
||||
CMCD_KEY_NOR => {
|
||||
let output_info = CmcdOutputInfo::new(final_key, cmcd, index, nor);
|
||||
cmcd_field_outputs.push(output_info);
|
||||
}
|
||||
CMCD_KEY_PR => {
|
||||
let output_info = CmcdOutputInfo::new(final_key, cmcd, index, pr);
|
||||
cmcd_field_outputs.push(output_info);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok((output_index, cmcd_field_outputs))
|
||||
}
|
||||
|
||||
/// build CmcdProcessor from CmcdProcessorBuilder
|
||||
pub fn build(self, intermediate_keys: &[String]) -> Result<CmcdProcessor> {
|
||||
let mut real_fields = vec![];
|
||||
let mut cmcd_outputs = Vec::with_capacity(CMCD_KEYS.len());
|
||||
for field in self.fields.into_iter() {
|
||||
let input_index = find_key_index(intermediate_keys, field.input_field(), "cmcd")?;
|
||||
|
||||
let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
|
||||
|
||||
let (_, cmcd_field_outputs) = Self::build_cmcd_outputs(&field, intermediate_keys)?;
|
||||
|
||||
cmcd_outputs.push(cmcd_field_outputs);
|
||||
|
||||
let real_field = OneInputMultiOutputField::new(input_field_info, field.target_field);
|
||||
real_fields.push(real_field);
|
||||
}
|
||||
Ok(CmcdProcessor {
|
||||
fields: real_fields,
|
||||
cmcd_outputs,
|
||||
ignore_missing: self.ignore_missing,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessorBuilder for CmcdProcessorBuilder {
|
||||
fn output_keys(&self) -> HashSet<&str> {
|
||||
self.output_keys.iter().map(|s| s.as_str()).collect()
|
||||
}
|
||||
|
||||
fn input_keys(&self) -> HashSet<&str> {
|
||||
self.fields.iter().map(|f| f.input_field()).collect()
|
||||
}
|
||||
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind> {
|
||||
self.build(intermediate_keys).map(ProcessorKind::Cmcd)
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_key(prefix: &str, key: &str) -> String {
|
||||
format!("{}_{}", prefix, key)
|
||||
}
|
||||
|
||||
/// CmcdOutputInfo is a struct to store output info
|
||||
#[derive(Debug)]
|
||||
pub(super) struct CmcdOutputInfo {
|
||||
/// {input_field}_{cmcd_key}
|
||||
final_key: String,
|
||||
/// cmcd key
|
||||
key: &'static str,
|
||||
/// index in intermediate_keys
|
||||
index: usize,
|
||||
/// function to resolve value
|
||||
f: fn(&str, &str, Option<&str>) -> Result<Value>,
|
||||
}
|
||||
|
||||
impl CmcdOutputInfo {
|
||||
fn new(
|
||||
final_key: String,
|
||||
key: &'static str,
|
||||
index: usize,
|
||||
f: fn(&str, &str, Option<&str>) -> Result<Value>,
|
||||
) -> Self {
|
||||
Self {
|
||||
final_key,
|
||||
key,
|
||||
index,
|
||||
f,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CmcdOutputInfo {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
final_key: String::default(),
|
||||
key: "",
|
||||
index: 0,
|
||||
f: |_, _, _| Ok(Value::Null),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
|
||||
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value> {
|
||||
Ok(Value::Boolean(true))
|
||||
@@ -286,9 +152,7 @@ fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
/// 12. Transport Layer Security SHOULD be used to protect all transmission of CMCD data.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CmcdProcessor {
|
||||
fields: Vec<OneInputMultiOutputField>,
|
||||
cmcd_outputs: Vec<Vec<CmcdOutputInfo>>,
|
||||
|
||||
fields: Fields,
|
||||
ignore_missing: bool,
|
||||
}
|
||||
|
||||
@@ -297,27 +161,52 @@ impl CmcdProcessor {
|
||||
format!("{}_{}", prefix, key)
|
||||
}
|
||||
|
||||
fn parse(&self, field_index: usize, s: &str) -> Result<Vec<(usize, Value)>> {
|
||||
let parts = s.split(',');
|
||||
let mut result = Vec::new();
|
||||
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<String, Value>> {
|
||||
let mut working_set = BTreeMap::new();
|
||||
|
||||
let parts = value.split(',');
|
||||
|
||||
for part in parts {
|
||||
let mut kv = part.split('=');
|
||||
let k = kv.next().context(CmcdMissingKeySnafu { part, s })?;
|
||||
let k = kv.next().context(CmcdMissingKeySnafu { part, s: value })?;
|
||||
let v = kv.next();
|
||||
|
||||
for cmcd_key in self.cmcd_outputs[field_index].iter() {
|
||||
if cmcd_key.key == k {
|
||||
let val = (cmcd_key.f)(s, k, v)?;
|
||||
result.push((cmcd_key.index, val));
|
||||
for cmcd_key in CMCD_KEYS {
|
||||
if cmcd_key == k {
|
||||
match cmcd_key {
|
||||
CMCD_KEY_BS | CMCD_KEY_SU => {
|
||||
working_set
|
||||
.insert(Self::generate_key(name, cmcd_key), bs_su(value, k, v)?);
|
||||
}
|
||||
CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
|
||||
| CMCD_KEY_RTP | CMCD_KEY_TB => {
|
||||
working_set
|
||||
.insert(Self::generate_key(name, cmcd_key), br_tb(value, k, v)?);
|
||||
}
|
||||
CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
|
||||
| CMCD_KEY_ST | CMCD_KEY_V => {
|
||||
working_set
|
||||
.insert(Self::generate_key(name, cmcd_key), cid_v(value, k, v)?);
|
||||
}
|
||||
CMCD_KEY_NOR => {
|
||||
working_set
|
||||
.insert(Self::generate_key(name, cmcd_key), nor(value, k, v)?);
|
||||
}
|
||||
CMCD_KEY_PR => {
|
||||
working_set
|
||||
.insert(Self::generate_key(name, cmcd_key), pr(value, k, v)?);
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
Ok(working_set)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessorBuilder {
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessor {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self> {
|
||||
@@ -344,22 +233,12 @@ impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessorBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
let output_keys = fields
|
||||
.iter()
|
||||
.flat_map(|f| {
|
||||
CMCD_KEYS
|
||||
.iter()
|
||||
.map(|cmcd_key| generate_key(f.target_or_input_field(), cmcd_key))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let builder = CmcdProcessorBuilder {
|
||||
let proc = CmcdProcessor {
|
||||
fields,
|
||||
output_keys,
|
||||
ignore_missing,
|
||||
};
|
||||
|
||||
Ok(builder)
|
||||
Ok(proc)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,21 +251,20 @@ impl Processor for CmcdProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()> {
|
||||
for (field_index, field) in self.fields.iter().enumerate() {
|
||||
let field_value_index = field.input_index();
|
||||
match val.get(field_value_index) {
|
||||
Some(Value::String(v)) => {
|
||||
let result_list = self.parse(field_index, v)?;
|
||||
for (output_index, v) in result_list {
|
||||
val[output_index] = v;
|
||||
}
|
||||
fn exec_mut(&self, val: &mut IntermediateStatus) -> Result<()> {
|
||||
for field in self.fields.iter() {
|
||||
let name = field.input_field();
|
||||
|
||||
match val.get(name) {
|
||||
Some(Value::String(s)) => {
|
||||
let results = self.parse(field.target_or_input_field(), s)?;
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
field: field.input_name().to_string(),
|
||||
field: name.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
@@ -400,6 +278,7 @@ impl Processor for CmcdProcessor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -410,9 +289,9 @@ mod tests {
|
||||
|
||||
use urlencoding::decode;
|
||||
|
||||
use super::{CmcdProcessorBuilder, CMCD_KEYS};
|
||||
use super::CmcdProcessor;
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::value::{Map, Value};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_cmcd() {
|
||||
@@ -546,37 +425,20 @@ mod tests {
|
||||
|
||||
let field = Field::new("prefix", None);
|
||||
|
||||
let output_keys = CMCD_KEYS
|
||||
.iter()
|
||||
.map(|k| format!("prefix_{}", k))
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let mut intermediate_keys = vec!["prefix".to_string()];
|
||||
intermediate_keys.append(&mut (output_keys.clone()));
|
||||
|
||||
let builder = CmcdProcessorBuilder {
|
||||
let processor = CmcdProcessor {
|
||||
fields: Fields::new(vec![field]),
|
||||
output_keys: output_keys.iter().map(|s| s.to_string()).collect(),
|
||||
ignore_missing: false,
|
||||
};
|
||||
|
||||
let processor = builder.build(&intermediate_keys).unwrap();
|
||||
|
||||
for (s, vec) in ss.into_iter() {
|
||||
let decoded = decode(s).unwrap().to_string();
|
||||
|
||||
let values = vec
|
||||
let expected = vec
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect::<BTreeMap<String, Value>>();
|
||||
let expected = Map { values };
|
||||
|
||||
let actual = processor.parse(0, &decoded).unwrap();
|
||||
let actual = actual
|
||||
.into_iter()
|
||||
.map(|(index, value)| (intermediate_keys[index].clone(), value))
|
||||
.collect::<BTreeMap<String, Value>>();
|
||||
let actual = Map { values: actual };
|
||||
let actual = processor.parse("prefix", &decoded).unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
|
||||
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/csv-processor.html
|
||||
|
||||
use ahash::HashSet;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use csv::{ReaderBuilder, Trim};
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
use itertools::Itertools;
|
||||
@@ -24,11 +25,10 @@ use crate::etl::error::{
|
||||
CsvNoRecordSnafu, CsvQuoteNameSnafu, CsvReadSnafu, CsvSeparatorNameSnafu, Error,
|
||||
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
};
|
||||
use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
|
||||
use crate::etl::find_key_index;
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
|
||||
ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
@@ -40,11 +40,12 @@ const TRIM_NAME: &str = "trim";
|
||||
const EMPTY_VALUE_NAME: &str = "empty_value";
|
||||
const TARGET_FIELDS: &str = "target_fields";
|
||||
|
||||
/// only support string value
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CsvProcessorBuilder {
|
||||
pub struct CsvProcessor {
|
||||
reader: ReaderBuilder,
|
||||
|
||||
fields: Fields,
|
||||
|
||||
ignore_missing: bool,
|
||||
|
||||
// Value used to fill empty fields, empty fields will be skipped if this is not provided.
|
||||
@@ -57,80 +58,22 @@ pub struct CsvProcessorBuilder {
|
||||
// tag
|
||||
}
|
||||
|
||||
impl CsvProcessorBuilder {
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<CsvProcessor> {
|
||||
let mut real_fields = vec![];
|
||||
|
||||
for field in self.fields {
|
||||
let input_index = find_key_index(intermediate_keys, field.input_field(), "csv")?;
|
||||
|
||||
let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
|
||||
let real_field = OneInputMultiOutputField::new(input_field_info, None);
|
||||
real_fields.push(real_field);
|
||||
}
|
||||
|
||||
let output_index_info = self
|
||||
.target_fields
|
||||
.iter()
|
||||
.map(|f| find_key_index(intermediate_keys, f, "csv"))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(CsvProcessor {
|
||||
reader: self.reader,
|
||||
fields: real_fields,
|
||||
ignore_missing: self.ignore_missing,
|
||||
empty_value: self.empty_value,
|
||||
output_index_info,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessorBuilder for CsvProcessorBuilder {
|
||||
fn output_keys(&self) -> HashSet<&str> {
|
||||
self.target_fields.iter().map(|s| s.as_str()).collect()
|
||||
}
|
||||
|
||||
fn input_keys(&self) -> HashSet<&str> {
|
||||
self.fields.iter().map(|f| f.input_field()).collect()
|
||||
}
|
||||
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind> {
|
||||
self.build(intermediate_keys).map(ProcessorKind::Csv)
|
||||
}
|
||||
}
|
||||
|
||||
/// only support string value
|
||||
#[derive(Debug)]
|
||||
pub struct CsvProcessor {
|
||||
reader: ReaderBuilder,
|
||||
|
||||
fields: Vec<OneInputMultiOutputField>,
|
||||
|
||||
ignore_missing: bool,
|
||||
|
||||
// Value used to fill empty fields, empty fields will be skipped if this is not provided.
|
||||
empty_value: Option<String>,
|
||||
output_index_info: Vec<usize>,
|
||||
// description
|
||||
// if
|
||||
// ignore_failure
|
||||
// on_failure
|
||||
// tag
|
||||
}
|
||||
|
||||
impl CsvProcessor {
|
||||
// process the csv format string to a map with target_fields as keys
|
||||
fn process(&self, val: &str) -> Result<Vec<(usize, Value)>> {
|
||||
fn process(&self, val: &str) -> Result<BTreeMap<String, Value>> {
|
||||
let mut reader = self.reader.from_reader(val.as_bytes());
|
||||
|
||||
if let Some(result) = reader.records().next() {
|
||||
let record: csv::StringRecord = result.context(CsvReadSnafu)?;
|
||||
|
||||
let values: Vec<(usize, Value)> = self
|
||||
.output_index_info
|
||||
let values = self
|
||||
.target_fields
|
||||
.iter()
|
||||
.zip_longest(record.iter())
|
||||
.filter_map(|zipped| match zipped {
|
||||
Both(target_field, val) => Some((*target_field, Value::String(val.into()))),
|
||||
Both(target_field, val) => {
|
||||
Some((target_field.clone(), Value::String(val.into())))
|
||||
}
|
||||
// if target fields are more than extracted fields, fill the rest with empty value
|
||||
Left(target_field) => {
|
||||
let value = self
|
||||
@@ -138,7 +81,7 @@ impl CsvProcessor {
|
||||
.as_ref()
|
||||
.map(|s| Value::String(s.clone()))
|
||||
.unwrap_or(Value::Null);
|
||||
Some((*target_field, value))
|
||||
Some((target_field.clone(), value))
|
||||
}
|
||||
// if extracted fields are more than target fields, ignore the rest
|
||||
Right(_) => None,
|
||||
@@ -152,7 +95,7 @@ impl CsvProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessorBuilder {
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessor {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
|
||||
@@ -224,8 +167,8 @@ impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessorBuilder {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let builder = {
|
||||
CsvProcessorBuilder {
|
||||
let proc = {
|
||||
CsvProcessor {
|
||||
reader,
|
||||
fields,
|
||||
ignore_missing,
|
||||
@@ -234,7 +177,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessorBuilder {
|
||||
}
|
||||
};
|
||||
|
||||
Ok(builder)
|
||||
Ok(proc)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,21 +190,20 @@ impl Processor for CsvProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()> {
|
||||
fn exec_mut(&self, val: &mut BTreeMap<String, Value>) -> Result<()> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_index();
|
||||
match val.get(index) {
|
||||
let name = field.input_field();
|
||||
|
||||
match val.get(name) {
|
||||
Some(Value::String(v)) => {
|
||||
let resule_list = self.process(v)?;
|
||||
for (k, v) in resule_list {
|
||||
val[k] = v;
|
||||
}
|
||||
let results = self.process(v)?;
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
field: field.input_name().to_string(),
|
||||
field: name.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
@@ -282,37 +224,28 @@ impl Processor for CsvProcessor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use ahash::HashMap;
|
||||
|
||||
use super::Value;
|
||||
use crate::etl::processor::csv::CsvProcessorBuilder;
|
||||
use super::*;
|
||||
use crate::etl::field::Field;
|
||||
|
||||
#[test]
|
||||
fn test_equal_length() {
|
||||
let mut reader = csv::ReaderBuilder::new();
|
||||
reader.has_headers(false);
|
||||
let builder = CsvProcessorBuilder {
|
||||
let processor = CsvProcessor {
|
||||
reader,
|
||||
fields: Fields::new(vec![Field::new("data", None)]),
|
||||
target_fields: vec!["a".into(), "b".into()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
|
||||
|
||||
let processor = builder.build(&intermediate_keys).unwrap();
|
||||
let result = processor
|
||||
.process("1,2")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (intermediate_keys[k].clone(), v))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let result = processor.process("1,2").unwrap();
|
||||
|
||||
let values = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>();
|
||||
.collect();
|
||||
|
||||
assert_eq!(result, values);
|
||||
}
|
||||
@@ -324,21 +257,14 @@ mod tests {
|
||||
{
|
||||
let mut reader = csv::ReaderBuilder::new();
|
||||
reader.has_headers(false);
|
||||
let builder = CsvProcessorBuilder {
|
||||
let processor = CsvProcessor {
|
||||
reader,
|
||||
fields: Fields::new(vec![Field::new("data", None)]),
|
||||
target_fields: vec!["a".into(), "b".into(), "c".into()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
|
||||
|
||||
let processor = builder.build(&intermediate_keys).unwrap();
|
||||
let result = processor
|
||||
.process("1,2")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (intermediate_keys[k].clone(), v))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let result = processor.process("1,2").unwrap();
|
||||
|
||||
let values = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
@@ -346,7 +272,7 @@ mod tests {
|
||||
("c".into(), Value::Null),
|
||||
]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>();
|
||||
.collect();
|
||||
|
||||
assert_eq!(result, values);
|
||||
}
|
||||
@@ -355,22 +281,15 @@ mod tests {
|
||||
{
|
||||
let mut reader = csv::ReaderBuilder::new();
|
||||
reader.has_headers(false);
|
||||
let builder = CsvProcessorBuilder {
|
||||
let processor = CsvProcessor {
|
||||
reader,
|
||||
fields: Fields::new(vec![Field::new("data", None)]),
|
||||
target_fields: vec!["a".into(), "b".into(), "c".into()],
|
||||
empty_value: Some("default".into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
|
||||
|
||||
let processor = builder.build(&intermediate_keys).unwrap();
|
||||
let result = processor
|
||||
.process("1,2")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (intermediate_keys[k].clone(), v))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let result = processor.process("1,2").unwrap();
|
||||
|
||||
let values = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
@@ -389,22 +308,14 @@ mod tests {
|
||||
fn test_target_fields_has_less_length() {
|
||||
let mut reader = csv::ReaderBuilder::new();
|
||||
reader.has_headers(false);
|
||||
let builder = CsvProcessorBuilder {
|
||||
let processor = CsvProcessor {
|
||||
reader,
|
||||
target_fields: vec!["a".into(), "b".into()],
|
||||
empty_value: Some("default".into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
|
||||
|
||||
let processor = builder.build(&intermediate_keys).unwrap();
|
||||
let result = processor
|
||||
.process("1,2")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (intermediate_keys[k].clone(), v))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let result = processor.process("1,2").unwrap();
|
||||
|
||||
let values = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
|
||||
@@ -14,21 +14,21 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use ahash::HashSet;
|
||||
use chrono::{DateTime, NaiveDateTime};
|
||||
use chrono_tz::Tz;
|
||||
use lazy_static::lazy_static;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::IntermediateStatus;
|
||||
use crate::etl::error::{
|
||||
DateFailedToGetLocalTimezoneSnafu, DateFailedToGetTimestampSnafu, DateParseSnafu,
|
||||
DateParseTimezoneSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorFailedToParseStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
};
|
||||
use crate::etl::field::{Fields, OneInputOneOutputField};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
|
||||
ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
|
||||
FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
@@ -88,55 +88,7 @@ impl std::ops::Deref for Formats {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DateProcessorBuilder {
|
||||
fields: Fields,
|
||||
formats: Formats,
|
||||
timezone: Option<Arc<String>>,
|
||||
locale: Option<Arc<String>>,
|
||||
ignore_missing: bool,
|
||||
}
|
||||
|
||||
impl ProcessorBuilder for DateProcessorBuilder {
|
||||
fn output_keys(&self) -> HashSet<&str> {
|
||||
self.fields
|
||||
.iter()
|
||||
.map(|f| f.target_or_input_field())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn input_keys(&self) -> HashSet<&str> {
|
||||
self.fields.iter().map(|f| f.input_field()).collect()
|
||||
}
|
||||
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind> {
|
||||
self.build(intermediate_keys).map(ProcessorKind::Date)
|
||||
}
|
||||
}
|
||||
|
||||
impl DateProcessorBuilder {
|
||||
pub fn build(self, intermediate_keys: &[String]) -> Result<DateProcessor> {
|
||||
let mut real_fields = vec![];
|
||||
for field in self.fields.into_iter() {
|
||||
let input = OneInputOneOutputField::build(
|
||||
"date",
|
||||
intermediate_keys,
|
||||
field.input_field(),
|
||||
field.target_or_input_field(),
|
||||
)?;
|
||||
real_fields.push(input);
|
||||
}
|
||||
Ok(DateProcessor {
|
||||
fields: real_fields,
|
||||
formats: self.formats,
|
||||
timezone: self.timezone,
|
||||
locale: self.locale,
|
||||
ignore_missing: self.ignore_missing,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessorBuilder {
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessor {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
|
||||
@@ -181,7 +133,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessorBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
let builder = DateProcessorBuilder {
|
||||
let builder = DateProcessor {
|
||||
fields,
|
||||
formats,
|
||||
timezone,
|
||||
@@ -197,7 +149,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessorBuilder {
|
||||
/// Reserved for compatibility only
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DateProcessor {
|
||||
fields: Vec<OneInputOneOutputField>,
|
||||
fields: Fields,
|
||||
formats: Formats,
|
||||
timezone: Option<Arc<String>>,
|
||||
locale: Option<Arc<String>>, // to support locale
|
||||
@@ -242,20 +194,20 @@ impl Processor for DateProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()> {
|
||||
fn exec_mut(&self, val: &mut IntermediateStatus) -> Result<()> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_index();
|
||||
let index = field.input_field();
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let timestamp = self.parse(s)?;
|
||||
let output_index = field.output_index();
|
||||
val[output_index] = Value::Timestamp(timestamp);
|
||||
let output_key = field.target_or_input_field();
|
||||
val.insert(output_key.to_string(), Value::Timestamp(timestamp));
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
field: field.input_name().to_string(),
|
||||
field: field.input_field().to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user