mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-05 21:02:58 +00:00
Compare commits
20 Commits
poc-write-
...
v0.12.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
480b05c590 | ||
|
|
0de0fd80b0 | ||
|
|
059cb6fdc3 | ||
|
|
29218b5fe7 | ||
|
|
59e6ec0395 | ||
|
|
79ee230f2a | ||
|
|
0e4bd59fac | ||
|
|
6eccadbf73 | ||
|
|
f29a1c56e9 | ||
|
|
88c3d331a1 | ||
|
|
79acc9911e | ||
|
|
0a169980b7 | ||
|
|
c80d2a3222 | ||
|
|
116bdaf690 | ||
|
|
6341fb86c7 | ||
|
|
fa09e181be | ||
|
|
ab4663ec2b | ||
|
|
fac22575aa | ||
|
|
0e249f69cd | ||
|
|
5d1761f3e5 |
671
Cargo.lock
generated
671
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@@ -127,8 +127,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
# branch: poc-write-path
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1915576b113a494f5352fd61f211d899b7f87aab" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "683e9d10ae7f3dfb8aaabd89082fc600c17e3795" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -139,8 +138,8 @@ itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
local-ip-address = "0.6"
|
||||
loki-api = { git = "https://github.com/shuiyisong/tracing-loki", branch = "chore/prost_version" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
nalgebra = "0.33"
|
||||
@@ -279,12 +278,10 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls", rev = "46
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
# Apply a fix for pprof for unaligned pointer access
|
||||
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:0.2.5"
|
||||
|
||||
[build]
|
||||
pre-build = [
|
||||
"dpkg --add-architecture $CROSS_DEB_ARCH",
|
||||
@@ -5,3 +8,8 @@ pre-build = [
|
||||
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
|
||||
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
|
||||
]
|
||||
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"JEMALLOC_SYS_WITH_LG_PAGE",
|
||||
]
|
||||
|
||||
@@ -116,7 +116,7 @@ docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
--name greptime --rm \
|
||||
greptime/greptimedb:latest standalone start \
|
||||
--http-addr 0.0.0.0:4000 \
|
||||
--rpc-addr 0.0.0.0:4001 \
|
||||
--rpc-bind-addr 0.0.0.0:4001 \
|
||||
--mysql-addr 0.0.0.0:4002 \
|
||||
--postgres-addr 0.0.0.0:4003
|
||||
```
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
| `grpc.tls.mode` | String | `disable` | TLS mode. |
|
||||
@@ -65,8 +65,8 @@
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -88,8 +88,9 @@
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
| `metadata_store.file_size` | String | `64MB` | The size of the metadata store log file. |
|
||||
| `metadata_store.purge_threshold` | String | `256MB` | The threshold of the metadata store size to trigger a purge. |
|
||||
| `metadata_store.purge_interval` | String | `1m` | The interval of the metadata store to trigger a purge. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
@@ -221,8 +222,8 @@
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1:4001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
| `grpc.tls.mode` | String | `disable` | TLS mode. |
|
||||
@@ -300,7 +301,7 @@
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
|
||||
| `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store` |
|
||||
@@ -376,19 +377,14 @@
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
|
||||
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
|
||||
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
|
||||
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
|
||||
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1:3001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:3001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
@@ -549,8 +545,8 @@
|
||||
| `flow` | -- | -- | flow engine options. |
|
||||
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.hostname` | String | `127.0.0.1` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
| `grpc.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
|
||||
@@ -19,26 +19,6 @@ init_regions_parallelism = 16
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Deprecated, use `grpc.addr` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
|
||||
## Deprecated, use `grpc.hostname` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_hostname = "127.0.0.1"
|
||||
|
||||
## Deprecated, use `grpc.runtime_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_runtime_size = 8
|
||||
|
||||
## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_max_recv_message_size = "512MB"
|
||||
|
||||
## Deprecated, use `grpc.rpc_max_send_message_size` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
@@ -56,10 +36,11 @@ body_limit = "64MB"
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:3001"
|
||||
## The hostname advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1:3001"
|
||||
bind_addr = "127.0.0.1:3001"
|
||||
## The address advertised to the metasrv, and used for connections from outside the host.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
|
||||
server_addr = "127.0.0.1:3001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
## The maximum receive message size for gRPC server.
|
||||
|
||||
@@ -14,10 +14,10 @@ node_id = 14
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:6800"
|
||||
## The hostname advertised to the metasrv,
|
||||
bind_addr = "127.0.0.1:6800"
|
||||
## The address advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1"
|
||||
server_addr = "127.0.0.1:6800"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
## The maximum receive message size for gRPC server.
|
||||
|
||||
@@ -41,10 +41,11 @@ cors_allowed_origins = ["https://example.com"]
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
## The hostname advertised to the metasrv,
|
||||
## and used for connections from outside the host
|
||||
hostname = "127.0.0.1:4001"
|
||||
bind_addr = "127.0.0.1:4001"
|
||||
## The address advertised to the metasrv, and used for connections from outside the host.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
|
||||
server_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ data_home = "/tmp/metasrv/"
|
||||
## The bind address of metasrv.
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
|
||||
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
## The communication server address for the frontend and datanode to connect to metasrv.
|
||||
## If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
## on the host, with the same port number as the one specified in `bind_addr`.
|
||||
server_addr = "127.0.0.1:3002"
|
||||
|
||||
## Store server address default to etcd store.
|
||||
|
||||
@@ -49,7 +49,7 @@ cors_allowed_origins = ["https://example.com"]
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
bind_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
@@ -159,11 +159,11 @@ dir = "/tmp/greptimedb/wal"
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## The threshold of the WAL size to trigger a purge.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## The interval to trigger a purge.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "1m"
|
||||
|
||||
@@ -278,10 +278,12 @@ overwrite_entry_start_id = false
|
||||
|
||||
## Metadata storage options.
|
||||
[metadata_store]
|
||||
## Kv file size in bytes.
|
||||
file_size = "256MB"
|
||||
## Kv purge threshold.
|
||||
purge_threshold = "4GB"
|
||||
## The size of the metadata store log file.
|
||||
file_size = "64MB"
|
||||
## The threshold of the metadata store size to trigger a purge.
|
||||
purge_threshold = "256MB"
|
||||
## The interval of the metadata store to trigger a purge.
|
||||
purge_interval = "1m"
|
||||
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
|
||||
@@ -43,8 +43,8 @@ services:
|
||||
command:
|
||||
- metasrv
|
||||
- start
|
||||
- --bind-addr=0.0.0.0:3002
|
||||
- --server-addr=metasrv:3002
|
||||
- --rpc-bind-addr=0.0.0.0:3002
|
||||
- --rpc-server-addr=metasrv:3002
|
||||
- --store-addrs=etcd0:2379
|
||||
- --http-addr=0.0.0.0:3000
|
||||
healthcheck:
|
||||
@@ -68,8 +68,8 @@ services:
|
||||
- datanode
|
||||
- start
|
||||
- --node-id=0
|
||||
- --rpc-addr=0.0.0.0:3001
|
||||
- --rpc-hostname=datanode0:3001
|
||||
- --rpc-bind-addr=0.0.0.0:3001
|
||||
- --rpc-server-addr=datanode0:3001
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --http-addr=0.0.0.0:5000
|
||||
volumes:
|
||||
@@ -98,7 +98,7 @@ services:
|
||||
- start
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --http-addr=0.0.0.0:4000
|
||||
- --rpc-addr=0.0.0.0:4001
|
||||
- --rpc-bind-addr=0.0.0.0:4001
|
||||
- --mysql-addr=0.0.0.0:4002
|
||||
- --postgres-addr=0.0.0.0:4003
|
||||
healthcheck:
|
||||
@@ -123,8 +123,8 @@ services:
|
||||
- start
|
||||
- --node-id=0
|
||||
- --metasrv-addrs=metasrv:3002
|
||||
- --rpc-addr=0.0.0.0:4004
|
||||
- --rpc-hostname=flownode0:4004
|
||||
- --rpc-bind-addr=0.0.0.0:4004
|
||||
- --rpc-server-addr=flownode0:4004
|
||||
- --http-addr=0.0.0.0:4005
|
||||
depends_on:
|
||||
frontend0:
|
||||
|
||||
@@ -4,6 +4,16 @@ This crate provides an easy approach to dump memory profiling info.
|
||||
|
||||
## Prerequisites
|
||||
### jemalloc
|
||||
jeprof is already compiled in the target directory of GreptimeDB. You can find the binary and use it.
|
||||
```
|
||||
# find jeprof binary
|
||||
find . -name 'jeprof'
|
||||
# add executable permission
|
||||
chmod +x <path_to_jeprof>
|
||||
```
|
||||
The path is usually under `./target/${PROFILE}/build/tikv-jemalloc-sys-${HASH}/out/build/bin/jeprof`.
|
||||
The default version of jemalloc installed from the package manager may not have the `--collapsed` option.
|
||||
You may need to check the whether the `jeprof` version is >= `5.3.0` if you want to install it from the package manager.
|
||||
```bash
|
||||
# for macOS
|
||||
brew install jemalloc
|
||||
@@ -23,7 +33,11 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
# for Linux
|
||||
MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
|
||||
# for macOS
|
||||
_RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
@@ -28,6 +28,7 @@ use common_meta::kv_backend::postgres::PgStore;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_telemetry::info;
|
||||
use common_wal::options::WalOptions;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use rand::Rng;
|
||||
@@ -184,7 +185,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
|
||||
region_routes
|
||||
}
|
||||
|
||||
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, String> {
|
||||
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, WalOptions> {
|
||||
// TODO(niebayes): construct region wal options for benchmark.
|
||||
let _ = regions;
|
||||
HashMap::default()
|
||||
|
||||
@@ -49,7 +49,12 @@ impl TableMetadataBencher {
|
||||
|
||||
let regions: Vec<_> = (0..64).collect();
|
||||
let region_routes = create_region_routes(regions.clone());
|
||||
let region_wal_options = create_region_wal_options(regions);
|
||||
let region_wal_options = create_region_wal_options(regions)
|
||||
.into_iter()
|
||||
.map(|(region_id, wal_options)| {
|
||||
(region_id, serde_json::to_string(&wal_options).unwrap())
|
||||
})
|
||||
.collect();
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
@@ -109,9 +114,17 @@ impl TableMetadataBencher {
|
||||
let table_info = table_info.unwrap();
|
||||
let table_route = table_route.unwrap();
|
||||
let table_id = table_info.table_info.ident.table_id;
|
||||
|
||||
let regions: Vec<_> = (0..64).collect();
|
||||
let region_wal_options = create_region_wal_options(regions);
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_info.table_name(), &table_route)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_info.table_name(),
|
||||
&table_route,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await;
|
||||
start.elapsed()
|
||||
},
|
||||
|
||||
@@ -126,10 +126,14 @@ impl SubCommand {
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
rpc_hostname: Option<String>,
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long, value_delimiter = ',', num_args = 1..)]
|
||||
metasrv_addrs: Option<Vec<String>>,
|
||||
#[clap(short, long)]
|
||||
@@ -181,18 +185,18 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
} else if let Some(addr) = &opts.rpc_addr {
|
||||
warn!("Use the deprecated attribute `DatanodeOptions.rpc_addr`, please use `grpc.addr` instead.");
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(hostname) = &self.rpc_hostname {
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
} else if let Some(hostname) = &opts.rpc_hostname {
|
||||
if let Some(server_addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
} else if let Some(server_addr) = &opts.rpc_hostname {
|
||||
warn!("Use the deprecated attribute `DatanodeOptions.rpc_hostname`, please use `grpc.hostname` instead.");
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
}
|
||||
|
||||
if let Some(runtime_size) = opts.rpc_runtime_size {
|
||||
@@ -277,7 +281,7 @@ impl StartCommand {
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -357,8 +361,8 @@ mod tests {
|
||||
rpc_addr = "127.0.0.1:4001"
|
||||
rpc_hostname = "192.168.0.1"
|
||||
[grpc]
|
||||
addr = "127.0.0.1:3001"
|
||||
hostname = "127.0.0.1"
|
||||
bind_addr = "127.0.0.1:3001"
|
||||
server_addr = "127.0.0.1"
|
||||
runtime_size = 8
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
@@ -369,8 +373,8 @@ mod tests {
|
||||
};
|
||||
|
||||
let options = cmd.load_options(&Default::default()).unwrap().component;
|
||||
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.addr);
|
||||
assert_eq!("192.168.0.1".to_string(), options.grpc.hostname);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.bind_addr);
|
||||
assert_eq!("192.168.0.1".to_string(), options.grpc.server_addr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -431,7 +435,7 @@ mod tests {
|
||||
|
||||
let options = cmd.load_options(&Default::default()).unwrap().component;
|
||||
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.addr);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.bind_addr);
|
||||
assert_eq!(Some(42), options.node_id);
|
||||
|
||||
let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
|
||||
@@ -645,7 +649,7 @@ mod tests {
|
||||
opts.http.addr,
|
||||
DatanodeOptions::default().component.http.addr
|
||||
);
|
||||
assert_eq!(opts.grpc.hostname, "10.103.174.219");
|
||||
assert_eq!(opts.grpc.server_addr, "10.103.174.219");
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -129,11 +129,13 @@ struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
/// Bind address for the gRPC server.
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
/// Hostname for the gRPC server.
|
||||
#[clap(long)]
|
||||
rpc_hostname: Option<String>,
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
/// Metasrv address list;
|
||||
#[clap(long, value_delimiter = ',', num_args = 1..)]
|
||||
metasrv_addrs: Option<Vec<String>>,
|
||||
@@ -184,12 +186,12 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(hostname) = &self.rpc_hostname {
|
||||
opts.grpc.hostname.clone_from(hostname);
|
||||
if let Some(server_addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(server_addr);
|
||||
}
|
||||
|
||||
if let Some(node_id) = self.node_id {
|
||||
@@ -237,7 +239,7 @@ impl StartCommand {
|
||||
info!("Flownode options: {:#?}", opts);
|
||||
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
|
||||
// TODO(discord9): make it not optionale after cluster id is required
|
||||
let cluster_id = opts.cluster_id.unwrap_or(0);
|
||||
|
||||
@@ -136,13 +136,19 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
pub struct StartCommand {
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The address advertised to the metasrv, and used for connections from outside the host.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "rpc-hostname")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
@@ -218,11 +224,15 @@ impl StartCommand {
|
||||
opts.http.disable_dashboard = disable_dashboard;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.grpc.bind_addr.clone_from(addr);
|
||||
opts.grpc.tls = tls_opts.clone();
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_server_addr {
|
||||
opts.grpc.server_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
opts.mysql.enable = true;
|
||||
opts.mysql.addr.clone_from(addr);
|
||||
@@ -269,7 +279,7 @@ impl StartCommand {
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -413,7 +423,7 @@ mod tests {
|
||||
|
||||
let default_opts = FrontendOptions::default().component;
|
||||
|
||||
assert_eq!(opts.grpc.addr, default_opts.grpc.addr);
|
||||
assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
|
||||
assert!(opts.mysql.enable);
|
||||
assert_eq!(opts.mysql.runtime_size, default_opts.mysql.runtime_size);
|
||||
assert!(opts.postgres.enable);
|
||||
@@ -604,7 +614,7 @@ mod tests {
|
||||
assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
|
||||
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -133,11 +133,15 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
bind_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
server_addr: Option<String>,
|
||||
#[clap(long, aliases = ["store-addr"], value_delimiter = ',', num_args = 1..)]
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "bind-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
/// The communication server address for the frontend and datanode to connect to metasrv.
|
||||
/// If left empty or unset, the server will automatically use the IP address of the first network interface
|
||||
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
|
||||
#[clap(long, alias = "server-addr")]
|
||||
rpc_server_addr: Option<String>,
|
||||
#[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
|
||||
store_addrs: Option<Vec<String>>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
@@ -201,11 +205,11 @@ impl StartCommand {
|
||||
tokio_console_addr: global_options.tokio_console_addr.clone(),
|
||||
};
|
||||
|
||||
if let Some(addr) = &self.bind_addr {
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
opts.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.server_addr {
|
||||
if let Some(addr) = &self.rpc_server_addr {
|
||||
opts.server_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
@@ -269,11 +273,13 @@ impl StartCommand {
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.detect_server_addr();
|
||||
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
@@ -306,8 +312,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_read_from_cmd() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
|
||||
selector: Some("LoadBased".to_string()),
|
||||
..Default::default()
|
||||
@@ -381,8 +387,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_load_log_options_from_cli() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
|
||||
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
|
||||
selector: Some("LoadBased".to_string()),
|
||||
..Default::default()
|
||||
|
||||
@@ -329,8 +329,8 @@ impl App for Instance {
|
||||
pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
rpc_addr: Option<String>,
|
||||
#[clap(long, alias = "rpc-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -407,9 +407,9 @@ impl StartCommand {
|
||||
opts.storage.data_home.clone_from(data_home);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
if let Some(addr) = &self.rpc_bind_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
let datanode_grpc_addr = DatanodeOptions::default().grpc.addr;
|
||||
let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
|
||||
if addr.eq(&datanode_grpc_addr) {
|
||||
return IllegalConfigSnafu {
|
||||
msg: format!(
|
||||
@@ -417,7 +417,7 @@ impl StartCommand {
|
||||
),
|
||||
}.fail();
|
||||
}
|
||||
opts.grpc.addr.clone_from(addr)
|
||||
opts.grpc.bind_addr.clone_from(addr)
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
@@ -464,7 +464,7 @@ impl StartCommand {
|
||||
let mut plugins = Plugins::new();
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_hostname();
|
||||
opts.grpc.detect_server_addr();
|
||||
let fe_opts = opts.frontend_options();
|
||||
let dn_opts = opts.datanode_options();
|
||||
|
||||
@@ -486,8 +486,8 @@ impl StartCommand {
|
||||
let metadata_dir = metadata_store_dir(data_home);
|
||||
let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
|
||||
metadata_dir,
|
||||
opts.metadata_store.clone(),
|
||||
opts.procedure.clone(),
|
||||
opts.metadata_store,
|
||||
opts.procedure,
|
||||
)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
@@ -907,7 +907,7 @@ mod tests {
|
||||
assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
|
||||
assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
|
||||
assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.addr);
|
||||
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
|
||||
assert!(fe_opts.mysql.enable);
|
||||
assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
|
||||
assert_eq!(2, fe_opts.mysql.runtime_size);
|
||||
@@ -1037,7 +1037,7 @@ mod tests {
|
||||
assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
|
||||
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ mod tests {
|
||||
.args([
|
||||
"datanode",
|
||||
"start",
|
||||
"--rpc-addr=0.0.0.0:4321",
|
||||
"--rpc-bind-addr=0.0.0.0:4321",
|
||||
"--node-id=1",
|
||||
&format!("--data-home={}", data_home.path().display()),
|
||||
&format!("--wal-dir={}", wal_dir.path().display()),
|
||||
@@ -80,7 +80,7 @@ mod tests {
|
||||
"--log-level=off",
|
||||
"cli",
|
||||
"attach",
|
||||
"--grpc-addr=0.0.0.0:4321",
|
||||
"--grpc-bind-addr=0.0.0.0:4321",
|
||||
// history commands can sneaky into stdout and mess up our tests, so disable it
|
||||
"--disable-helper",
|
||||
]);
|
||||
|
||||
@@ -17,9 +17,6 @@ use std::time::Duration;
|
||||
use cmd::options::GreptimeOptions;
|
||||
use cmd::standalone::StandaloneOptions;
|
||||
use common_config::Configurable;
|
||||
use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
};
|
||||
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
@@ -91,13 +88,8 @@ fn test_load_datanode_example_config() {
|
||||
..Default::default()
|
||||
},
|
||||
grpc: GrpcOptions::default()
|
||||
.with_addr("127.0.0.1:3001")
|
||||
.with_hostname("127.0.0.1:3001"),
|
||||
rpc_addr: Some("127.0.0.1:3001".to_string()),
|
||||
rpc_hostname: Some("127.0.0.1".to_string()),
|
||||
rpc_runtime_size: Some(8),
|
||||
rpc_max_recv_message_size: Some(DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE),
|
||||
rpc_max_send_message_size: Some(DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE),
|
||||
.with_bind_addr("127.0.0.1:3001")
|
||||
.with_server_addr("127.0.0.1:3001"),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
@@ -144,7 +136,9 @@ fn test_load_frontend_example_config() {
|
||||
remote_write: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
grpc: GrpcOptions::default().with_hostname("127.0.0.1:4001"),
|
||||
grpc: GrpcOptions::default()
|
||||
.with_bind_addr("127.0.0.1:4001")
|
||||
.with_server_addr("127.0.0.1:4001"),
|
||||
http: HttpOptions {
|
||||
cors_allowed_origins: vec!["https://example.com".to_string()],
|
||||
..Default::default()
|
||||
|
||||
@@ -12,9 +12,11 @@ common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
config.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
num_cpus.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
snafu.workspace = true
|
||||
sysinfo.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
@@ -16,6 +16,8 @@ pub mod config;
|
||||
pub mod error;
|
||||
pub mod utils;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
pub use config::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -34,22 +36,27 @@ pub enum Mode {
|
||||
Distributed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct KvBackendConfig {
|
||||
// Kv file size in bytes
|
||||
/// The size of the metadata store backend log file.
|
||||
pub file_size: ReadableSize,
|
||||
// Kv purge threshold in bytes
|
||||
/// The threshold of the metadata store size to trigger a purge.
|
||||
pub purge_threshold: ReadableSize,
|
||||
/// The interval of the metadata store to trigger a purge.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub purge_interval: Duration,
|
||||
}
|
||||
|
||||
impl Default for KvBackendConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
// log file size 256MB
|
||||
file_size: ReadableSize::mb(256),
|
||||
// purge threshold 4GB
|
||||
purge_threshold: ReadableSize::gb(4),
|
||||
// The log file size 64MB
|
||||
file_size: ReadableSize::mb(64),
|
||||
// The log purge threshold 256MB
|
||||
purge_threshold: ReadableSize::mb(256),
|
||||
// The log purge interval 1m
|
||||
purge_interval: Duration::from_secs(60),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,11 +20,12 @@ pub mod impl_conv;
|
||||
pub(crate) mod product;
|
||||
mod scalar_add;
|
||||
mod scalar_mul;
|
||||
mod sub;
|
||||
pub(crate) mod sum;
|
||||
mod vector_add;
|
||||
mod vector_div;
|
||||
mod vector_mul;
|
||||
mod vector_norm;
|
||||
mod vector_sub;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -48,10 +49,11 @@ impl VectorFunction {
|
||||
registry.register(Arc::new(scalar_mul::ScalarMulFunction));
|
||||
|
||||
// vector calculation
|
||||
registry.register(Arc::new(vector_add::VectorAddFunction));
|
||||
registry.register(Arc::new(vector_sub::VectorSubFunction));
|
||||
registry.register(Arc::new(vector_mul::VectorMulFunction));
|
||||
registry.register(Arc::new(vector_norm::VectorNormFunction));
|
||||
registry.register(Arc::new(vector_div::VectorDivFunction));
|
||||
registry.register(Arc::new(sub::SubFunction));
|
||||
registry.register(Arc::new(vector_norm::VectorNormFunction));
|
||||
registry.register(Arc::new(elem_sum::ElemSumFunction));
|
||||
registry.register(Arc::new(elem_product::ElemProductFunction));
|
||||
}
|
||||
|
||||
214
src/common/function/src/scalars/vector/vector_add.rs
Normal file
214
src/common/function/src/scalars/vector/vector_add.rs
Normal file
@@ -0,0 +1,214 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::InvalidFuncArgsSnafu;
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use nalgebra::DVectorView;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
|
||||
|
||||
const NAME: &str = "vec_add";
|
||||
|
||||
/// Adds corresponding elements of two vectors, returns a vector.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT vec_to_string(vec_add("[1.0, 1.0]", "[1.0, 2.0]")) as result;
|
||||
///
|
||||
/// +---------------------------------------------------------------+
|
||||
/// | vec_to_string(vec_add(Utf8("[1.0, 1.0]"),Utf8("[1.0, 2.0]"))) |
|
||||
/// +---------------------------------------------------------------+
|
||||
/// | [2,3] |
|
||||
/// +---------------------------------------------------------------+
|
||||
///
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VectorAddFunction;
|
||||
|
||||
impl Function for VectorAddFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(
|
||||
&self,
|
||||
_input_types: &[ConcreteDataType],
|
||||
) -> common_query::error::Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(
|
||||
&self,
|
||||
_func_ctx: FunctionContext,
|
||||
columns: &[VectorRef],
|
||||
) -> common_query::error::Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
ensure!(
|
||||
arg0.len() == arg1.len(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The lengths of the vector are not aligned, args 0: {}, args 1: {}",
|
||||
arg0.len(),
|
||||
arg1.len(),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
let len = arg0.len();
|
||||
let mut result = BinaryVectorBuilder::with_capacity(len);
|
||||
if len == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg0_const = as_veclit_if_const(arg0)?;
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..len {
|
||||
let arg0 = match arg0_const.as_ref() {
|
||||
Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
|
||||
None => as_veclit(arg0.get_ref(i))?,
|
||||
};
|
||||
let arg1 = match arg1_const.as_ref() {
|
||||
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
let (Some(arg0), Some(arg1)) = (arg0, arg1) else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
let vec0 = DVectorView::from_slice(&arg0, arg0.len());
|
||||
let vec1 = DVectorView::from_slice(&arg1, arg1.len());
|
||||
|
||||
let vec_res = vec0 + vec1;
|
||||
let veclit = vec_res.as_slice();
|
||||
let binlit = veclit_to_binlit(veclit);
|
||||
result.push(Some(&binlit));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for VectorAddFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Error;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sub() {
|
||||
let func = VectorAddFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
Some("[2.0,3.0,3.0]".to_string()),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,1.0,1.0]".to_string()),
|
||||
Some("[6.0,5.0,4.0]".to_string()),
|
||||
Some("[3.0,2.0,2.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[input0, input1])
|
||||
.unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.get_ref(0).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get_ref(1).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[10.0, 10.0, 10.0]).as_slice())
|
||||
);
|
||||
assert!(result.get_ref(2).is_null());
|
||||
assert!(result.get_ref(3).is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sub_error() {
|
||||
let func = VectorAddFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
Some("[2.0,3.0,3.0]".to_string()),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,1.0,1.0]".to_string()),
|
||||
Some("[6.0,5.0,4.0]".to_string()),
|
||||
Some("[3.0,2.0,2.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input0, input1]);
|
||||
|
||||
match result {
|
||||
Err(Error::InvalidFuncArgs { err_msg, .. }) => {
|
||||
assert_eq!(
|
||||
err_msg,
|
||||
"The lengths of the vector are not aligned, args 0: 4, args 1: 3"
|
||||
)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -42,19 +42,10 @@ const NAME: &str = "vec_sub";
|
||||
/// | [0,-1] |
|
||||
/// +---------------------------------------------------------------+
|
||||
///
|
||||
/// -- Negative scalar to simulate subtraction
|
||||
/// SELECT vec_to_string(vec_sub('[-1.0, -1.0]', '[1.0, 2.0]'));
|
||||
///
|
||||
/// +-----------------------------------------------------------------+
|
||||
/// | vec_to_string(vec_sub(Utf8("[-1.0, -1.0]"),Utf8("[1.0, 2.0]"))) |
|
||||
/// +-----------------------------------------------------------------+
|
||||
/// | [-2,-3] |
|
||||
/// +-----------------------------------------------------------------+
|
||||
///
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SubFunction;
|
||||
pub struct VectorSubFunction;
|
||||
|
||||
impl Function for SubFunction {
|
||||
impl Function for VectorSubFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
@@ -142,7 +133,7 @@ impl Function for SubFunction {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SubFunction {
|
||||
impl Display for VectorSubFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
@@ -159,7 +150,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_sub() {
|
||||
let func = SubFunction;
|
||||
let func = VectorSubFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
@@ -194,7 +185,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_sub_error() {
|
||||
let func = SubFunction;
|
||||
let func = VectorSubFunction;
|
||||
|
||||
let input0 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_procedure::Status;
|
||||
use common_telemetry::info;
|
||||
@@ -25,6 +26,7 @@ use super::cursor::DropDatabaseCursor;
|
||||
use super::{DropDatabaseContext, DropTableTarget};
|
||||
use crate::ddl::drop_database::State;
|
||||
use crate::ddl::drop_table::executor::DropTableExecutor;
|
||||
use crate::ddl::utils::extract_region_wal_options;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
@@ -107,8 +109,22 @@ impl State for DropDatabaseExecutor {
|
||||
self.physical_table_id,
|
||||
self.physical_region_routes.clone(),
|
||||
);
|
||||
|
||||
// Deletes topic-region mapping if dropping physical table
|
||||
let region_wal_options =
|
||||
if let TableRouteValue::Physical(table_route_value) = &table_route_value {
|
||||
let datanode_table_values = ddl_ctx
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.regions(self.physical_table_id, table_route_value)
|
||||
.await?;
|
||||
extract_region_wal_options(&datanode_table_values)?
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
executor
|
||||
.on_destroy_metadata(ddl_ctx, &table_route_value)
|
||||
.on_destroy_metadata(ddl_ctx, &table_route_value, ®ion_wal_options)
|
||||
.await?;
|
||||
executor.invalidate_table_cache(ddl_ctx).await?;
|
||||
executor
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
pub(crate) mod executor;
|
||||
mod metadata;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{ExternalSnafu, FromJsonSnafu, ToJsonSnafu};
|
||||
@@ -24,8 +26,10 @@ use common_procedure::{
|
||||
};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_wal::options::WalOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionNumber;
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
@@ -131,7 +135,11 @@ impl DropTableProcedure {
|
||||
);
|
||||
// Deletes table metadata logically.
|
||||
self.executor
|
||||
.on_delete_metadata(&self.context, table_route_value)
|
||||
.on_delete_metadata(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
info!("Deleted table metadata for table {table_id}");
|
||||
self.data.state = DropTableState::InvalidateTableCache;
|
||||
@@ -163,7 +171,11 @@ impl DropTableProcedure {
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_delete_metadata_tombstone(&self.context, table_route_value)
|
||||
.on_delete_metadata_tombstone(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
self.dropping_regions.clear();
|
||||
@@ -243,7 +255,11 @@ impl Procedure for DropTableProcedure {
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_restore_metadata(&self.context, table_route_value)
|
||||
.on_restore_metadata(
|
||||
&self.context,
|
||||
table_route_value,
|
||||
&self.data.region_wal_options,
|
||||
)
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
@@ -257,6 +273,8 @@ pub struct DropTableData {
|
||||
pub physical_region_routes: Vec<RegionRoute>,
|
||||
pub physical_table_id: Option<TableId>,
|
||||
#[serde(default)]
|
||||
pub region_wal_options: HashMap<RegionNumber, WalOptions>,
|
||||
#[serde(default)]
|
||||
pub allow_rollback: bool,
|
||||
}
|
||||
|
||||
@@ -268,6 +286,7 @@ impl DropTableData {
|
||||
task,
|
||||
physical_region_routes: vec![],
|
||||
physical_table_id: None,
|
||||
region_wal_options: HashMap::new(),
|
||||
allow_rollback: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::region::{
|
||||
region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
@@ -19,9 +21,10 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_wal::options::WalOptions;
|
||||
use futures::future::join_all;
|
||||
use snafu::ensure;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
use table::table_name::TableName;
|
||||
|
||||
@@ -113,9 +116,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.delete_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -124,9 +133,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata_tombstone(self.table_id, &self.table, table_route_value)
|
||||
.delete_table_metadata_tombstone(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -135,9 +150,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.destroy_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.destroy_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let detecting_regions = if table_route_value.is_physical() {
|
||||
@@ -156,9 +177,15 @@ impl DropTableExecutor {
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<u32, WalOptions>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.restore_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.restore_table_metadata(
|
||||
self.table_id,
|
||||
&self.table,
|
||||
table_route_value,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ use snafu::OptionExt;
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::utils::extract_region_wal_options;
|
||||
use crate::error::{self, Result};
|
||||
|
||||
impl DropTableProcedure {
|
||||
@@ -30,9 +31,6 @@ impl DropTableProcedure {
|
||||
.get_physical_table_route(task.table_id)
|
||||
.await?;
|
||||
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
if physical_table_id == self.data.table_id() {
|
||||
let table_info_value = self
|
||||
.context
|
||||
@@ -47,9 +45,21 @@ impl DropTableProcedure {
|
||||
|
||||
let engine = table_info_value.table_info.meta.engine;
|
||||
// rollback only if dropping the metric physical table fails
|
||||
self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME
|
||||
self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME;
|
||||
|
||||
// Deletes topic-region mapping if dropping physical table
|
||||
let datanode_table_values = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.regions(physical_table_id, &physical_table_route_value)
|
||||
.await?;
|
||||
self.data.region_wal_options = extract_region_wal_options(&datanode_table_values)?;
|
||||
}
|
||||
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,16 +12,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_catalog::consts::METRIC_ENGINE;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::Error as ProcedureError;
|
||||
use common_wal::options::WalOptions;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::ddl::DetectingRegion;
|
||||
use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
|
||||
use crate::error::{
|
||||
Error, OperateDatanodeSnafu, ParseWalOptionsSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu,
|
||||
};
|
||||
use crate::key::datanode_table::DatanodeTableValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::peer::Peer;
|
||||
@@ -151,6 +158,32 @@ pub fn convert_region_routes_to_detecting_regions(
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Parses [WalOptions] from serialized strings in hashmap.
|
||||
pub fn parse_region_wal_options(
|
||||
serialized_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<HashMap<RegionNumber, WalOptions>> {
|
||||
let mut region_wal_options = HashMap::with_capacity(serialized_options.len());
|
||||
for (region_number, wal_options) in serialized_options {
|
||||
let wal_option = serde_json::from_str::<WalOptions>(wal_options)
|
||||
.context(ParseWalOptionsSnafu { wal_options })?;
|
||||
region_wal_options.insert(*region_number, wal_option);
|
||||
}
|
||||
Ok(region_wal_options)
|
||||
}
|
||||
|
||||
/// Extracts region wal options from [DatanodeTableValue]s.
|
||||
pub fn extract_region_wal_options(
|
||||
datanode_table_values: &Vec<DatanodeTableValue>,
|
||||
) -> Result<HashMap<RegionNumber, WalOptions>> {
|
||||
let mut region_wal_options = HashMap::new();
|
||||
for value in datanode_table_values {
|
||||
let serialized_options = &value.region_info.region_wal_options;
|
||||
let parsed_options = parse_region_wal_options(serialized_options)?;
|
||||
region_wal_options.extend(parsed_options);
|
||||
}
|
||||
Ok(region_wal_options)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -710,6 +710,15 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse wal options: {}", wal_options))]
|
||||
ParseWalOptions {
|
||||
wal_options: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -762,7 +771,8 @@ impl ErrorExt for Error {
|
||||
| UnexpectedLogicalRouteTable { .. }
|
||||
| ProcedureOutput { .. }
|
||||
| FromUtf8 { .. }
|
||||
| MetadataCorruption { .. } => StatusCode::Unexpected,
|
||||
| MetadataCorruption { .. }
|
||||
| ParseWalOptions { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
//! - This key is mainly used in constructing the view in Datanode and Frontend.
|
||||
//!
|
||||
//! 12. Kafka topic key: `__topic_name/kafka/{topic_name}`
|
||||
//! - The key is used to mark existing topics in kafka for WAL.
|
||||
//! - The key is used to mark existing topics in kafka for WAL.
|
||||
//!
|
||||
//! 13. Topic name to region map key `__topic_region/{topic_name}/{region_id}`
|
||||
//! - Mapping {topic_name} to {region_id}
|
||||
@@ -122,6 +122,7 @@ use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
|
||||
};
|
||||
use common_telemetry::warn;
|
||||
use common_wal::options::WalOptions;
|
||||
use datanode_table::{DatanodeTableKey, DatanodeTableManager, DatanodeTableValue};
|
||||
use flow::flow_route::FlowRouteValue;
|
||||
use flow::table_flow::TableFlowValue;
|
||||
@@ -136,6 +137,7 @@ use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_name::TableName;
|
||||
use table_info::{TableInfoKey, TableInfoManager, TableInfoValue};
|
||||
use table_name::{TableNameKey, TableNameManager, TableNameValue};
|
||||
use topic_region::{TopicRegionKey, TopicRegionManager};
|
||||
use view_info::{ViewInfoKey, ViewInfoManager, ViewInfoValue};
|
||||
|
||||
use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
|
||||
@@ -306,6 +308,7 @@ pub struct TableMetadataManager {
|
||||
schema_manager: SchemaManager,
|
||||
table_route_manager: TableRouteManager,
|
||||
tombstone_manager: TombstoneManager,
|
||||
topic_region_manager: TopicRegionManager,
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
@@ -456,6 +459,7 @@ impl TableMetadataManager {
|
||||
schema_manager: SchemaManager::new(kv_backend.clone()),
|
||||
table_route_manager: TableRouteManager::new(kv_backend.clone()),
|
||||
tombstone_manager: TombstoneManager::new(kv_backend.clone()),
|
||||
topic_region_manager: TopicRegionManager::new(kv_backend.clone()),
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
@@ -648,10 +652,15 @@ impl TableMetadataManager {
|
||||
.table_route_storage()
|
||||
.build_create_txn(table_id, &table_route_value)?;
|
||||
|
||||
let create_topic_region_txn = self
|
||||
.topic_region_manager
|
||||
.build_create_txn(table_id, ®ion_wal_options)?;
|
||||
|
||||
let mut txn = Txn::merge_all(vec![
|
||||
create_table_name_txn,
|
||||
create_table_info_txn,
|
||||
create_table_route_txn,
|
||||
create_topic_region_txn,
|
||||
]);
|
||||
|
||||
if let TableRouteValue::Physical(x) = &table_route_value {
|
||||
@@ -785,6 +794,7 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<Vec<Vec<u8>>> {
|
||||
// Builds keys
|
||||
let datanode_ids = if table_route_value.is_physical() {
|
||||
@@ -806,13 +816,22 @@ impl TableMetadataManager {
|
||||
.into_iter()
|
||||
.map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let topic_region_map = self
|
||||
.topic_region_manager
|
||||
.get_topic_region_mapping(table_id, region_wal_options);
|
||||
let topic_region_keys = topic_region_map
|
||||
.iter()
|
||||
.map(|(region_id, topic)| TopicRegionKey::new(*region_id, topic))
|
||||
.collect::<Vec<_>>();
|
||||
keys.push(table_name.to_bytes());
|
||||
keys.push(table_info_key.to_bytes());
|
||||
keys.push(table_route_key.to_bytes());
|
||||
for key in &datanode_table_keys {
|
||||
keys.push(key.to_bytes());
|
||||
}
|
||||
for key in topic_region_keys {
|
||||
keys.push(key.to_bytes());
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
@@ -823,8 +842,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.create(keys).await
|
||||
}
|
||||
|
||||
@@ -835,9 +856,11 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
self.tombstone_manager.delete(keys).await
|
||||
let table_metadata_keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.delete(table_metadata_keys).await
|
||||
}
|
||||
|
||||
/// Restores metadata for table.
|
||||
@@ -847,8 +870,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
self.tombstone_manager.restore(keys).await
|
||||
}
|
||||
|
||||
@@ -859,8 +884,10 @@ impl TableMetadataManager {
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
region_wal_options: &HashMap<RegionNumber, WalOptions>,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
let keys =
|
||||
self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
|
||||
let _ = self
|
||||
.kv_backend
|
||||
.batch_delete(BatchDeleteRequest::new().with_keys(keys))
|
||||
@@ -1309,8 +1336,9 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_time::util::current_time_millis;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions};
|
||||
use futures::TryStreamExt;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::{RawTableInfo, TableInfo};
|
||||
use table::table_name::TableName;
|
||||
|
||||
@@ -1323,10 +1351,15 @@ mod tests {
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue};
|
||||
use crate::key::{
|
||||
DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue, TOPIC_REGION_PREFIX,
|
||||
};
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::{region_distribution, LeaderState, Region, RegionRoute};
|
||||
use crate::rpc::store::RangeRequest;
|
||||
use crate::wal_options_allocator::{allocate_region_wal_options, WalOptionsAllocator};
|
||||
|
||||
#[test]
|
||||
fn test_deserialized_value_with_bytes() {
|
||||
@@ -1398,16 +1431,63 @@ mod tests {
|
||||
table_metadata_manager: &TableMetadataManager,
|
||||
table_info: RawTableInfo,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) -> Result<()> {
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
table_info,
|
||||
TableRouteValue::physical(region_routes),
|
||||
HashMap::default(),
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn create_mock_region_wal_options() -> HashMap<RegionNumber, WalOptions> {
|
||||
let topics = (0..2)
|
||||
.map(|i| format!("greptimedb_topic{}", i))
|
||||
.collect::<Vec<_>>();
|
||||
let wal_options = topics
|
||||
.iter()
|
||||
.map(|topic| {
|
||||
WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: topic.clone(),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
(0..16)
|
||||
.enumerate()
|
||||
.map(|(i, region_number)| (region_number, wal_options[i % wal_options.len()].clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_raft_engine_topic_region_map() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let wal_allocator = WalOptionsAllocator::RaftEngine;
|
||||
let regions = (0..16).collect();
|
||||
let region_wal_options = allocate_region_wal_options(regions, &wal_allocator).unwrap();
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let topic_region_key = TOPIC_REGION_PREFIX.to_string();
|
||||
let range_req = RangeRequest::new().with_prefix(topic_region_key);
|
||||
let resp = mem_kv.range(range_req).await.unwrap();
|
||||
// Should be empty because the topic region map is empty for raft engine.
|
||||
assert!(resp.kvs.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_metadata() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
@@ -1416,11 +1496,17 @@ mod tests {
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let region_wal_options = create_mock_region_wal_options()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, serde_json::to_string(&v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1430,6 +1516,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.is_ok());
|
||||
@@ -1440,7 +1527,8 @@ mod tests {
|
||||
assert!(create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
modified_region_routes
|
||||
modified_region_routes,
|
||||
region_wal_options.clone(),
|
||||
)
|
||||
.await
|
||||
.is_err());
|
||||
@@ -1462,6 +1550,19 @@ mod tests {
|
||||
.unwrap(),
|
||||
region_routes
|
||||
);
|
||||
|
||||
for i in 0..2 {
|
||||
let region_number = i as u32;
|
||||
let region_id = RegionId::new(table_info.ident.table_id, region_number);
|
||||
let topic = format!("greptimedb_topic{}", i);
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions(&topic)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 8);
|
||||
assert_eq!(regions[0], region_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1557,12 +1658,18 @@ mod tests {
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let datanode_id = 2;
|
||||
let region_wal_options = create_mock_region_wal_options();
|
||||
let serialized_region_wal_options = region_wal_options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
serialized_region_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1575,12 +1682,22 @@ mod tests {
|
||||
let table_route_value = &TableRouteValue::physical(region_routes.clone());
|
||||
// deletes metadata.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, table_route_value)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_name,
|
||||
table_route_value,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// Should be ignored.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, table_route_value)
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_name,
|
||||
table_route_value,
|
||||
®ion_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(table_metadata_manager
|
||||
@@ -1617,6 +1734,19 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(table_route.is_none());
|
||||
// Logical delete removes the topic region mapping as well.
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions("greptimedb_topic0")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 0);
|
||||
let regions = table_metadata_manager
|
||||
.topic_region_manager
|
||||
.regions("greptimedb_topic1")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(regions.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1633,6 +1763,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1705,6 +1836,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1790,6 +1922,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1870,6 +2003,7 @@ mod tests {
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
region_routes.clone(),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1980,7 +2114,11 @@ mod tests {
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
let options = create_mock_region_wal_options();
|
||||
let serialized_options = options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
@@ -2007,7 +2145,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
serialized_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -2020,7 +2158,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.destroy_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.destroy_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(mem_kv.is_empty());
|
||||
@@ -2033,7 +2171,11 @@ mod tests {
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
let options = create_mock_region_wal_options();
|
||||
let serialized_options = options
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
@@ -2060,7 +2202,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
serialized_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -2076,18 +2218,18 @@ mod tests {
|
||||
let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.delete_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
let kvs = mem_kv.dump();
|
||||
assert_eq!(kvs, expected_result);
|
||||
// Should be ignored.
|
||||
table_metadata_manager
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value, &options)
|
||||
.await
|
||||
.unwrap();
|
||||
let kvs = mem_kv.dump();
|
||||
|
||||
@@ -21,6 +21,7 @@ use snafu::OptionExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use super::table_route::PhysicalTableRouteValue;
|
||||
use super::MetadataKey;
|
||||
use crate::error::{DatanodeTableInfoNotFoundSnafu, InvalidMetadataSnafu, Result};
|
||||
use crate::key::{
|
||||
@@ -29,7 +30,8 @@ use crate::key::{
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
|
||||
use crate::rpc::store::RangeRequest;
|
||||
use crate::rpc::router::region_distribution;
|
||||
use crate::rpc::store::{BatchGetRequest, RangeRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::DatanodeId;
|
||||
|
||||
@@ -172,6 +174,26 @@ impl DatanodeTableManager {
|
||||
Box::pin(stream)
|
||||
}
|
||||
|
||||
/// Find the [DatanodeTableValue]s for the given [TableId] and [PhysicalTableRouteValue].
|
||||
pub async fn regions(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_routes: &PhysicalTableRouteValue,
|
||||
) -> Result<Vec<DatanodeTableValue>> {
|
||||
let keys = region_distribution(&table_routes.region_routes)
|
||||
.into_keys()
|
||||
.map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
|
||||
.collect::<Vec<_>>();
|
||||
let req = BatchGetRequest {
|
||||
keys: keys.iter().map(|k| k.to_bytes()).collect(),
|
||||
};
|
||||
let resp = self.kv_backend.batch_get(req).await?;
|
||||
resp.kvs
|
||||
.into_iter()
|
||||
.map(datanode_table_value_decoder)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Builds the create datanode table transactions. It only executes while the primary keys comparing successes.
|
||||
pub fn build_create_txn(
|
||||
&self,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::warn;
|
||||
use futures::stream::BoxStream;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -37,6 +38,12 @@ lazy_static! {
|
||||
"^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/({NAME_PATTERN})$"
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
/// for compatibility with older flow name with less strict name pattern
|
||||
static ref COMPAT_FLOW_NAME_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/(.*)$"
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// The key of mapping {flow_name} to [FlowId].
|
||||
@@ -114,12 +121,18 @@ impl<'a> MetadataKey<'a, FlowNameKeyInner<'a>> for FlowNameKeyInner<'_> {
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let captures =
|
||||
FLOW_NAME_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(error::InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
|
||||
})?;
|
||||
let captures = FLOW_NAME_KEY_PATTERN
|
||||
.captures(key)
|
||||
.or_else(|| {
|
||||
warn!(
|
||||
"FlowNameKeyInner '{}' is not a valid flow name in newer version.",
|
||||
key
|
||||
);
|
||||
COMPAT_FLOW_NAME_KEY_PATTERN.captures(key)
|
||||
})
|
||||
.context(error::InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
|
||||
})?;
|
||||
// Safety: pass the regex check above
|
||||
let catalog_name = captures.get(1).unwrap().as_str();
|
||||
let flow_name = captures.get(2).unwrap().as_str();
|
||||
@@ -284,6 +297,12 @@ mod tests {
|
||||
let key = FlowNameKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key.catalog(), "my_catalog");
|
||||
assert_eq!(key.flow_name(), "my_task");
|
||||
|
||||
// compatibility with older version
|
||||
let bytes = b"__flow/name/my_catalog/a/`b`".to_vec();
|
||||
let key = FlowNameKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key.catalog(), "my_catalog");
|
||||
assert_eq!(key.flow_name(), "a/`b`");
|
||||
}
|
||||
#[test]
|
||||
fn test_key_start_range() {
|
||||
|
||||
@@ -26,18 +26,25 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_wal::options::WalOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::ddl::utils::parse_region_wal_options;
|
||||
use crate::error::{Error, InvalidMetadataSnafu, Result};
|
||||
use crate::key::{MetadataKey, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::{BatchPutRequest, PutRequest, RangeRequest};
|
||||
use crate::rpc::store::{BatchDeleteRequest, BatchPutRequest, PutRequest, RangeRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
// The TopicRegionKey is a key for the topic-region mapping in the kvbackend.
|
||||
// The layout of the key is `__topic_region/{topic_name}/{region_id}`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TopicRegionKey<'a> {
|
||||
pub region_id: RegionId,
|
||||
@@ -53,7 +60,7 @@ impl<'a> TopicRegionKey<'a> {
|
||||
}
|
||||
|
||||
pub fn range_topic_key(topic: &str) -> String {
|
||||
format!("{}/{}", TOPIC_REGION_PREFIX, topic)
|
||||
format!("{}/{}/", TOPIC_REGION_PREFIX, topic)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +87,7 @@ impl Display for TopicRegionKey<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}/{}",
|
||||
"{}{}",
|
||||
Self::range_topic_key(self.topic),
|
||||
self.region_id.as_u64()
|
||||
)
|
||||
@@ -151,6 +158,24 @@ impl TopicRegionManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn build_create_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<Txn> {
|
||||
let region_wal_options = parse_region_wal_options(region_wal_options)?;
|
||||
let topic_region_mapping = self.get_topic_region_mapping(table_id, ®ion_wal_options);
|
||||
let topic_region_keys = topic_region_mapping
|
||||
.iter()
|
||||
.map(|(topic, region_id)| TopicRegionKey::new(*topic, region_id))
|
||||
.collect::<Vec<_>>();
|
||||
let operations = topic_region_keys
|
||||
.into_iter()
|
||||
.map(|key| TxnOp::Put(key.to_bytes(), vec![]))
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Txn::new().and_then(operations))
|
||||
}
|
||||
|
||||
/// Returns the list of region ids using specified topic.
|
||||
pub async fn regions(&self, topic: &str) -> Result<Vec<RegionId>> {
|
||||
let prefix = TopicRegionKey::range_topic_key(topic);
|
||||
@@ -169,12 +194,49 @@ impl TopicRegionManager {
|
||||
self.kv_backend.delete(&raw_key, false).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn batch_delete(&self, keys: Vec<TopicRegionKey<'_>>) -> Result<()> {
|
||||
let raw_keys = keys.iter().map(|key| key.to_bytes()).collect::<Vec<_>>();
|
||||
let req = BatchDeleteRequest {
|
||||
keys: raw_keys,
|
||||
prev_kv: false,
|
||||
};
|
||||
self.kv_backend.batch_delete(req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieves a mapping of [`RegionId`]s to their corresponding topics name
|
||||
/// based on the provided table ID and WAL options.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of tuples, where each tuple contains a [`RegionId`] and its corresponding topic name.
|
||||
pub fn get_topic_region_mapping<'a>(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_wal_options: &'a HashMap<RegionNumber, WalOptions>,
|
||||
) -> Vec<(RegionId, &'a str)> {
|
||||
region_wal_options
|
||||
.keys()
|
||||
.filter_map(
|
||||
|region_number| match region_wal_options.get(region_number) {
|
||||
Some(WalOptions::Kafka(kafka)) => {
|
||||
let region_id = RegionId::new(table_id, *region_number);
|
||||
Some((region_id, kafka.topic.as_str()))
|
||||
}
|
||||
Some(WalOptions::RaftEngine) => None,
|
||||
None => None,
|
||||
},
|
||||
)
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_wal::options::KafkaWalOptions;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
|
||||
@@ -220,4 +282,45 @@ mod tests {
|
||||
key_values.sort_by_key(|id| id.as_u64());
|
||||
assert_eq!(key_values, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topic_region_map() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let manager = TopicRegionManager::new(kv_backend.clone());
|
||||
|
||||
let table_id = 1;
|
||||
let region_wal_options = (0..64)
|
||||
.map(|i| {
|
||||
let region_number = i;
|
||||
let wal_options = if i % 2 == 0 {
|
||||
WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: format!("topic_{}", i),
|
||||
})
|
||||
} else {
|
||||
WalOptions::RaftEngine
|
||||
};
|
||||
(region_number, serde_json::to_string(&wal_options).unwrap())
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let region_wal_options = parse_region_wal_options(®ion_wal_options).unwrap();
|
||||
let mut topic_region_mapping =
|
||||
manager.get_topic_region_mapping(table_id, ®ion_wal_options);
|
||||
let mut expected = (0..64)
|
||||
.filter_map(|i| {
|
||||
if i % 2 == 0 {
|
||||
Some((RegionId::new(table_id, i), format!("topic_{}", i)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
topic_region_mapping.sort_by_key(|(region_id, _)| region_id.as_u64());
|
||||
let topic_region_map = topic_region_mapping
|
||||
.iter()
|
||||
.map(|(region_id, topic)| (*region_id, topic.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
expected.sort_by_key(|(region_id, _)| region_id.as_u64());
|
||||
assert_eq!(topic_region_map, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod selector;
|
||||
mod topic_creator;
|
||||
pub(crate) mod topic_creator;
|
||||
mod topic_manager;
|
||||
mod topic_pool;
|
||||
pub(crate) mod topic_pool;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::time::Duration;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct ProcedureConfig {
|
||||
/// Max retry times of procedure.
|
||||
|
||||
@@ -394,7 +394,7 @@ impl Default for DatanodeOptions {
|
||||
require_lease_before_startup: false,
|
||||
init_regions_in_background: false,
|
||||
init_regions_parallelism: 16,
|
||||
grpc: GrpcOptions::default().with_addr("127.0.0.1:3001"),
|
||||
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3001"),
|
||||
http: HttpOptions::default(),
|
||||
meta_client: None,
|
||||
wal: DatanodeWalConfig::default(),
|
||||
|
||||
@@ -260,13 +260,6 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to handle batch request"))]
|
||||
HandleBatchRequest {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("RegionId {} not found", region_id))]
|
||||
RegionNotFound {
|
||||
region_id: RegionId,
|
||||
@@ -445,8 +438,7 @@ impl ErrorExt for Error {
|
||||
UnsupportedOutput { .. } => StatusCode::Unsupported,
|
||||
HandleRegionRequest { source, .. }
|
||||
| GetRegionMetadata { source, .. }
|
||||
| HandleBatchOpenRequest { source, .. }
|
||||
| HandleBatchRequest { source, .. } => source.status_code(),
|
||||
| HandleBatchOpenRequest { source, .. } => source.status_code(),
|
||||
StopRegionEngine { source, .. } => source.status_code(),
|
||||
|
||||
FindLogicalRegions { source, .. } => source.status_code(),
|
||||
|
||||
@@ -89,7 +89,7 @@ impl HeartbeatTask {
|
||||
node_id: opts.node_id.unwrap_or(0),
|
||||
// We use datanode's start time millis as the node's epoch.
|
||||
node_epoch: common_time::util::current_time_millis() as u64,
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
meta_client,
|
||||
region_server,
|
||||
|
||||
@@ -38,7 +38,7 @@ use datafusion::datasource::{provider_as_source, TableProvider};
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
|
||||
use datafusion_expr::{LogicalPlan, TableSource};
|
||||
use futures::future::try_join_all;
|
||||
use futures_util::future::try_join_all;
|
||||
use metric_engine::engine::MetricEngine;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use prost::Message;
|
||||
@@ -59,8 +59,7 @@ use store_api::region_engine::{
|
||||
SettableRegionRoleState,
|
||||
};
|
||||
use store_api::region_request::{
|
||||
convert_body_to_requests, AffectedRows, BatchRegionRequest, RegionCloseRequest,
|
||||
RegionOpenRequest, RegionPutRequest, RegionRequest, RegionRequestBundle,
|
||||
AffectedRows, RegionCloseRequest, RegionOpenRequest, RegionRequest,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::{Semaphore, SemaphorePermit};
|
||||
@@ -71,9 +70,8 @@ use crate::error::{
|
||||
self, BuildRegionRequestsSnafu, ConcurrentQueryLimiterClosedSnafu,
|
||||
ConcurrentQueryLimiterTimeoutSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
|
||||
ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, HandleBatchOpenRequestSnafu,
|
||||
HandleBatchRequestSnafu, HandleRegionRequestSnafu, NewPlanDecoderSnafu,
|
||||
RegionEngineNotFoundSnafu, RegionNotFoundSnafu, RegionNotReadySnafu, Result,
|
||||
StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
|
||||
HandleRegionRequestSnafu, NewPlanDecoderSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu,
|
||||
RegionNotReadySnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
|
||||
};
|
||||
use crate::event_listener::RegionServerEventListenerRef;
|
||||
|
||||
@@ -160,18 +158,6 @@ impl RegionServer {
|
||||
self.inner.handle_request(region_id, request).await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(request_type = "Put"))]
|
||||
pub async fn handle_batch_body(&self, body: region_request::Body) -> Result<RegionResponse> {
|
||||
self.inner.handle_batch_body(body).await
|
||||
}
|
||||
|
||||
pub async fn handle_batch_request(
|
||||
&self,
|
||||
batch_request: BatchRegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
self.inner.handle_batch_request(batch_request).await
|
||||
}
|
||||
|
||||
async fn table_provider(&self, region_id: RegionId) -> Result<Arc<dyn TableProvider>> {
|
||||
let status = self
|
||||
.inner
|
||||
@@ -358,41 +344,62 @@ impl RegionServer {
|
||||
.region_map
|
||||
.insert(region_id, RegionEngineWithStatus::Ready(engine));
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_single_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let tracing_context = TracingContext::from_current_span();
|
||||
let span = tracing_context.attach(info_span!(
|
||||
"RegionServer::handle_region_request",
|
||||
region_id = region_id.to_string()
|
||||
));
|
||||
self.handle_request(region_id, request).trace(span).await
|
||||
}
|
||||
#[async_trait]
|
||||
impl RegionServerHandler for RegionServer {
|
||||
async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
|
||||
let is_parallel = matches!(
|
||||
request,
|
||||
region_request::Body::Inserts(_) | region_request::Body::Deletes(_)
|
||||
);
|
||||
let requests = RegionRequest::try_from_request_body(request)
|
||||
.context(BuildRegionRequestsSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
|
||||
async fn handle_vector_request(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionRequest)>,
|
||||
) -> Result<RegionResponse> {
|
||||
let tracing_context = TracingContext::from_current_span();
|
||||
|
||||
let join_tasks = requests.into_iter().map(|(region_id, req)| {
|
||||
let self_to_move = self.clone();
|
||||
let span = tracing_context.attach(info_span!(
|
||||
"RegionServer::handle_region_request",
|
||||
region_id = region_id.to_string()
|
||||
));
|
||||
async move {
|
||||
self_to_move
|
||||
let results = if is_parallel {
|
||||
let join_tasks = requests.into_iter().map(|(region_id, req)| {
|
||||
let self_to_move = self.clone();
|
||||
let span = tracing_context.attach(info_span!(
|
||||
"RegionServer::handle_region_request",
|
||||
region_id = region_id.to_string()
|
||||
));
|
||||
async move {
|
||||
self_to_move
|
||||
.handle_request(region_id, req)
|
||||
.trace(span)
|
||||
.await
|
||||
}
|
||||
});
|
||||
|
||||
try_join_all(join_tasks)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?
|
||||
} else {
|
||||
let mut results = Vec::with_capacity(requests.len());
|
||||
// FIXME(jeremy, ruihang): Once the engine supports merged calls, we should immediately
|
||||
// modify this part to avoid inefficient serial loop calls.
|
||||
for (region_id, req) in requests {
|
||||
let span = tracing_context.attach(info_span!(
|
||||
"RegionServer::handle_region_request",
|
||||
region_id = region_id.to_string()
|
||||
));
|
||||
let result = self
|
||||
.handle_request(region_id, req)
|
||||
.trace(span)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
results.push(result);
|
||||
}
|
||||
});
|
||||
results
|
||||
};
|
||||
|
||||
let results = try_join_all(join_tasks).await?;
|
||||
// merge results by sum up affected rows and merge extensions.
|
||||
let mut affected_rows = 0;
|
||||
let mut extensions = HashMap::new();
|
||||
for result in results {
|
||||
@@ -400,57 +407,6 @@ impl RegionServer {
|
||||
extensions.extend(result.extensions);
|
||||
}
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RegionServerHandler for RegionServer {
|
||||
async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
|
||||
if matches!(request, region_request::Body::Inserts(_)) {
|
||||
let resp = self
|
||||
.handle_batch_body(request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
return Ok(RegionResponseV1 {
|
||||
header: Some(ResponseHeader {
|
||||
status: Some(Status {
|
||||
status_code: StatusCode::Success as _,
|
||||
..Default::default()
|
||||
}),
|
||||
}),
|
||||
affected_rows: resp.affected_rows as _,
|
||||
extensions: resp.extensions,
|
||||
});
|
||||
}
|
||||
|
||||
let bundle = convert_body_to_requests(request)
|
||||
.context(BuildRegionRequestsSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
|
||||
let result = match bundle {
|
||||
RegionRequestBundle::Single((region_id, request)) => self
|
||||
.handle_single_request(region_id, request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?,
|
||||
RegionRequestBundle::Vector(requests) => self
|
||||
.handle_vector_request(requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?,
|
||||
RegionRequestBundle::Batch(requests) => self
|
||||
.handle_batch_request(requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)?,
|
||||
};
|
||||
|
||||
Ok(RegionResponseV1 {
|
||||
header: Some(ResponseHeader {
|
||||
status: Some(Status {
|
||||
@@ -458,8 +414,8 @@ impl RegionServerHandler for RegionServer {
|
||||
..Default::default()
|
||||
}),
|
||||
}),
|
||||
affected_rows: result.affected_rows as _,
|
||||
extensions: result.extensions,
|
||||
affected_rows: affected_rows as _,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -771,72 +727,6 @@ impl RegionServerInner {
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
// Handle requests in batch.
|
||||
//
|
||||
// limitation: all create requests must be in the same engine.
|
||||
pub async fn handle_batch_request(
|
||||
&self,
|
||||
batch_request: BatchRegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let region_changes = match &batch_request {
|
||||
BatchRegionRequest::Create(requests) => requests
|
||||
.iter()
|
||||
.map(|(region_id, create)| {
|
||||
let attribute = parse_region_attribute(&create.engine, &create.options)?;
|
||||
Ok((*region_id, RegionChange::Register(attribute)))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
BatchRegionRequest::Drop(requests) => requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| (*region_id, RegionChange::Deregisters))
|
||||
.collect::<Vec<_>>(),
|
||||
BatchRegionRequest::Alter(requests) => requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| (*region_id, RegionChange::None))
|
||||
.collect::<Vec<_>>(),
|
||||
BatchRegionRequest::Put(requests) => requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| (*region_id, RegionChange::None))
|
||||
.collect::<Vec<_>>(),
|
||||
};
|
||||
|
||||
let (first_region_id, first_region_change) = region_changes.first().unwrap();
|
||||
let engine = match self.get_engine(*first_region_id, first_region_change)? {
|
||||
CurrentEngine::Engine(engine) => engine,
|
||||
CurrentEngine::EarlyReturn(rows) => return Ok(RegionResponse::new(rows)),
|
||||
};
|
||||
|
||||
for (region_id, region_change) in region_changes.iter() {
|
||||
self.set_region_status_not_ready(*region_id, &engine, region_change);
|
||||
}
|
||||
|
||||
let result = engine
|
||||
.handle_batch_request(batch_request)
|
||||
.await
|
||||
.context(HandleBatchRequestSnafu {});
|
||||
|
||||
match result {
|
||||
Ok(result) => {
|
||||
for (region_id, region_change) in region_changes {
|
||||
self.set_region_status_ready(region_id, engine.clone(), region_change)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows: result.affected_rows,
|
||||
extensions: result.extensions,
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
for (region_id, region_change) in region_changes {
|
||||
self.unset_region_status(region_id, region_change);
|
||||
}
|
||||
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
@@ -896,71 +786,6 @@ impl RegionServerInner {
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_batch_body(&self, body: region_request::Body) -> Result<RegionResponse> {
|
||||
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
|
||||
.with_label_values(&["Put"])
|
||||
.start_timer();
|
||||
|
||||
// Group requests by engine.
|
||||
let mut engine_requests: HashMap<
|
||||
String,
|
||||
(RegionEngineRef, Vec<(RegionId, RegionPutRequest)>),
|
||||
> = HashMap::with_capacity(1);
|
||||
match body {
|
||||
region_request::Body::Inserts(inserts) => {
|
||||
let num_requests = inserts.requests.len();
|
||||
for request in inserts.requests {
|
||||
let region_id = RegionId::from_u64(request.region_id);
|
||||
let CurrentEngine::Engine(engine) =
|
||||
self.get_engine(region_id, &RegionChange::None)?
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let Some(rows) = request.rows else {
|
||||
continue;
|
||||
};
|
||||
|
||||
match engine_requests.get_mut(engine.name()) {
|
||||
Some((_, requests)) => {
|
||||
requests.push((region_id, RegionPutRequest { rows, hint: None }))
|
||||
}
|
||||
None => {
|
||||
let mut requests = Vec::with_capacity(num_requests);
|
||||
requests.push((region_id, RegionPutRequest { rows, hint: None }));
|
||||
engine_requests.insert(engine.name().to_string(), (engine, requests));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
for (_, (engine, request)) in engine_requests {
|
||||
engine
|
||||
.handle_batch_request(BatchRegionRequest::Put(request))
|
||||
.await
|
||||
.context(HandleBatchRequestSnafu)?;
|
||||
}
|
||||
|
||||
// match engine
|
||||
// .handle_request(region_id, request)
|
||||
// .await
|
||||
// .with_context(|_| HandleRegionRequestSnafu { region_id })
|
||||
// {
|
||||
// Ok(result) => {
|
||||
// Ok(RegionResponse {
|
||||
// affected_rows: result.affected_rows,
|
||||
// extensions: result.extensions,
|
||||
// })
|
||||
// }
|
||||
// Err(err) => {
|
||||
// Err(err)
|
||||
// }
|
||||
// }
|
||||
|
||||
Ok(RegionResponse::new(0))
|
||||
}
|
||||
|
||||
fn set_region_status_not_ready(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
|
||||
@@ -66,8 +66,8 @@ impl<'a> DatanodeServiceBuilder<'a> {
|
||||
let handlers = ServerHandlers::default();
|
||||
|
||||
if let Some(grpc_server) = self.grpc_server.take() {
|
||||
let addr: SocketAddr = self.opts.grpc.addr.parse().context(ParseAddrSnafu {
|
||||
addr: &self.opts.grpc.addr,
|
||||
let addr: SocketAddr = self.opts.grpc.bind_addr.parse().context(ParseAddrSnafu {
|
||||
addr: &self.opts.grpc.bind_addr,
|
||||
})?;
|
||||
let handler: ServerHandler = (Box::new(grpc_server), addr);
|
||||
handlers.insert(handler).await;
|
||||
|
||||
@@ -77,27 +77,32 @@ impl BinaryVector {
|
||||
.unwrap()
|
||||
.iter()
|
||||
{
|
||||
let v = if let Some(binary) = binary {
|
||||
let bytes_size = dim as usize * std::mem::size_of::<f32>();
|
||||
if let Ok(s) = String::from_utf8(binary.to_vec()) {
|
||||
let v = parse_string_to_vector_type_value(&s, Some(dim))?;
|
||||
Some(v)
|
||||
} else if binary.len() == dim as usize * std::mem::size_of::<f32>() {
|
||||
Some(binary.to_vec())
|
||||
} else {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!(
|
||||
"Unexpected bytes size for vector value, expected {}, got {}",
|
||||
bytes_size,
|
||||
binary.len()
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
} else {
|
||||
None
|
||||
let Some(binary) = binary else {
|
||||
vector.push(None);
|
||||
continue;
|
||||
};
|
||||
vector.push(v);
|
||||
|
||||
if let Ok(s) = String::from_utf8(binary.to_vec()) {
|
||||
if let Ok(v) = parse_string_to_vector_type_value(&s, Some(dim)) {
|
||||
vector.push(Some(v));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let expected_bytes_size = dim as usize * std::mem::size_of::<f32>();
|
||||
if binary.len() == expected_bytes_size {
|
||||
vector.push(Some(binary.to_vec()));
|
||||
continue;
|
||||
} else {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!(
|
||||
"Unexpected bytes size for vector value, expected {}, got {}",
|
||||
expected_bytes_size,
|
||||
binary.len()
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
Ok(BinaryVector::from(vector))
|
||||
}
|
||||
|
||||
@@ -387,6 +387,43 @@ impl Decimal128VectorBuilder {
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(Decimal128Array, Decimal128Vector);
|
||||
|
||||
pub(crate) fn replicate_decimal128(
|
||||
vector: &Decimal128Vector,
|
||||
offsets: &[usize],
|
||||
) -> Decimal128Vector {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.get_slice(0, 0);
|
||||
}
|
||||
|
||||
// Safety: safe to unwrap because we the vector ensures precision and scale are valid.
|
||||
let mut builder = Decimal128VectorBuilder::with_capacity(*offsets.last().unwrap())
|
||||
.with_precision_and_scale(vector.precision(), vector.scale())
|
||||
.unwrap();
|
||||
|
||||
let mut previous_offset = 0;
|
||||
|
||||
for (offset, value) in offsets.iter().zip(vector.array.iter()) {
|
||||
let repeat_times = *offset - previous_offset;
|
||||
match value {
|
||||
Some(data) => {
|
||||
unsafe {
|
||||
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
|
||||
builder
|
||||
.mutable_array
|
||||
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
builder.mutable_array.append_nulls(repeat_times);
|
||||
}
|
||||
}
|
||||
previous_offset = *offset;
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow_array::Decimal128Array;
|
||||
|
||||
@@ -114,13 +114,30 @@ macro_rules! impl_scalar_vector_op {
|
||||
)+};
|
||||
}
|
||||
|
||||
impl_scalar_vector_op!(
|
||||
BinaryVector,
|
||||
BooleanVector,
|
||||
ListVector,
|
||||
StringVector,
|
||||
Decimal128Vector
|
||||
);
|
||||
impl_scalar_vector_op!(BinaryVector, BooleanVector, ListVector, StringVector);
|
||||
|
||||
impl VectorOp for Decimal128Vector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
std::sync::Arc::new(replicate::replicate_decimal128(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<Decimal128Vector>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, Decimal128Vector, filter)
|
||||
}
|
||||
|
||||
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
|
||||
cast::cast_non_constant!(self, to_type)
|
||||
}
|
||||
|
||||
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
|
||||
take::take_indices!(self, Decimal128Vector, indices)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use crate::prelude::*;
|
||||
pub(crate) use crate::vectors::decimal::replicate_decimal128;
|
||||
pub(crate) use crate::vectors::null::replicate_null;
|
||||
pub(crate) use crate::vectors::primitive::replicate_primitive;
|
||||
|
||||
@@ -45,7 +46,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{Int32Vector, NullVector, StringVector, VectorOp};
|
||||
use crate::vectors::{Decimal128Vector, Int32Vector, NullVector, StringVector, VectorOp};
|
||||
|
||||
#[test]
|
||||
fn test_replicate_primitive() {
|
||||
@@ -167,4 +168,23 @@ mod tests {
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
impl_replicate_timestamp_test!(Nanosecond);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_decimal() {
|
||||
let data = vec![100];
|
||||
// create a decimal vector
|
||||
let v = Decimal128Vector::from_values(data.clone())
|
||||
.with_precision_and_scale(10, 2)
|
||||
.unwrap();
|
||||
let offsets = [5];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(5, v.len());
|
||||
|
||||
let expect: VectorRef = Arc::new(
|
||||
Decimal128Vector::from_values(vec![100; 5])
|
||||
.with_precision_and_scale(10, 2)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ impl Default for FlownodeOptions {
|
||||
cluster_id: None,
|
||||
node_id: None,
|
||||
flow: FlowConfig::default(),
|
||||
grpc: GrpcOptions::default().with_addr("127.0.0.1:3004"),
|
||||
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3004"),
|
||||
http: HttpOptions::default(),
|
||||
meta_client: None,
|
||||
logging: LoggingOptions::default(),
|
||||
|
||||
@@ -83,7 +83,7 @@ impl HeartbeatTask {
|
||||
) -> Self {
|
||||
Self {
|
||||
node_id: opts.node_id.unwrap_or(0),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval,
|
||||
retry_interval: heartbeat_opts.retry_interval,
|
||||
|
||||
@@ -334,7 +334,7 @@ impl FlownodeBuilder {
|
||||
|
||||
let heartbeat_task = self.heartbeat_task;
|
||||
|
||||
let addr = self.opts.grpc.addr;
|
||||
let addr = self.opts.grpc.bind_addr;
|
||||
let instance = FlownodeInstance {
|
||||
server,
|
||||
addr: addr.parse().context(ParseAddrSnafu { addr })?,
|
||||
|
||||
@@ -51,7 +51,6 @@ prometheus.workspace = true
|
||||
promql-parser.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
raft-engine.workspace = true
|
||||
serde.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
|
||||
@@ -56,7 +56,7 @@ impl HeartbeatTask {
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
) -> Self {
|
||||
HeartbeatTask {
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.addr, Some(&opts.grpc.hostname)),
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval.as_millis() as u64,
|
||||
retry_interval: heartbeat_opts.retry_interval.as_millis() as u64,
|
||||
|
||||
@@ -40,7 +40,7 @@ use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
use common_procedure::options::ProcedureConfig;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_query::Output;
|
||||
use common_telemetry::{debug, error, tracing};
|
||||
use common_telemetry::{debug, error, info, tracing};
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use log_store::raft_engine::RaftEngineBackend;
|
||||
use operator::delete::DeleterRef;
|
||||
@@ -55,7 +55,6 @@ use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
use servers::error as server_error;
|
||||
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
@@ -134,19 +133,15 @@ impl Instance {
|
||||
kv_backend_config: KvBackendConfig,
|
||||
procedure_config: ProcedureConfig,
|
||||
) -> Result<(KvBackendRef, ProcedureManagerRef)> {
|
||||
let kv_backend = Arc::new(
|
||||
RaftEngineBackend::try_open_with_cfg(Config {
|
||||
dir,
|
||||
purge_threshold: ReadableSize(kv_backend_config.purge_threshold.0),
|
||||
recovery_mode: RecoveryMode::TolerateTailCorruption,
|
||||
batch_compression_threshold: ReadableSize::kb(8),
|
||||
target_file_size: ReadableSize(kv_backend_config.file_size.0),
|
||||
..Default::default()
|
||||
})
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OpenRaftEngineBackendSnafu)?,
|
||||
info!(
|
||||
"Creating metadata kvbackend with config: {:?}",
|
||||
kv_backend_config
|
||||
);
|
||||
let kv_backend = RaftEngineBackend::try_open_with_cfg(dir, &kv_backend_config)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OpenRaftEngineBackendSnafu)?;
|
||||
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
|
||||
|
||||
let manager_config = ManagerConfig {
|
||||
|
||||
@@ -20,11 +20,11 @@ use common_telemetry::tracing;
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use pipeline::PipelineWay;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use servers::error::{self, AuthSnafu, InFlightWriteBytesExceededSnafu, Result as ServerResult};
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
use servers::query_handler::OpenTelemetryProtocolHandler;
|
||||
use servers::query_handler::{OpenTelemetryProtocolHandler, PipelineHandlerRef};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -112,8 +112,10 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn logs(
|
||||
&self,
|
||||
pipeline_handler: PipelineHandlerRef,
|
||||
request: ExportLogsServiceRequest,
|
||||
pipeline: PipelineWay,
|
||||
pipeline_params: GreptimePipelineParams,
|
||||
table_name: String,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
@@ -128,7 +130,15 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(request, pipeline, table_name)?;
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(
|
||||
request,
|
||||
pipeline,
|
||||
pipeline_params,
|
||||
table_name,
|
||||
&ctx,
|
||||
pipeline_handler,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
let result = limiter.limit_row_inserts(&requests);
|
||||
|
||||
@@ -193,7 +193,7 @@ where
|
||||
|
||||
{
|
||||
// Always init GRPC server
|
||||
let grpc_addr = parse_addr(&opts.grpc.addr)?;
|
||||
let grpc_addr = parse_addr(&opts.grpc.bind_addr)?;
|
||||
let grpc_server = self.build_grpc_server(&opts)?;
|
||||
handlers.insert((Box::new(grpc_server), grpc_addr)).await;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
|
||||
@@ -17,8 +17,9 @@
|
||||
use std::any::Any;
|
||||
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||
use std::path::Path;
|
||||
use std::sync::RwLock;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_config::KvBackendConfig;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::error as meta_error;
|
||||
use common_meta::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
|
||||
@@ -30,16 +31,19 @@ use common_meta::rpc::store::{
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::util::get_next_prefix_key;
|
||||
use raft_engine::{Config, Engine, LogBatch};
|
||||
use common_runtime::RepeatedTask;
|
||||
use raft_engine::{Config, Engine, LogBatch, ReadableSize, RecoveryMode};
|
||||
use snafu::{IntoError, ResultExt};
|
||||
|
||||
use crate::error::{self, IoSnafu, RaftEngineSnafu};
|
||||
use crate::error::{self, Error, IoSnafu, RaftEngineSnafu, StartGcTaskSnafu};
|
||||
use crate::raft_engine::log_store::PurgeExpiredFilesFunction;
|
||||
|
||||
pub(crate) const SYSTEM_NAMESPACE: u64 = 0;
|
||||
|
||||
/// RaftEngine based [KvBackend] implementation.
|
||||
pub struct RaftEngineBackend {
|
||||
engine: RwLock<Engine>,
|
||||
engine: RwLock<Arc<Engine>>,
|
||||
_gc_task: RepeatedTask<Error>,
|
||||
}
|
||||
|
||||
fn ensure_dir(dir: &str) -> error::Result<()> {
|
||||
@@ -65,15 +69,34 @@ fn ensure_dir(dir: &str) -> error::Result<()> {
|
||||
}
|
||||
|
||||
impl RaftEngineBackend {
|
||||
pub fn try_open_with_cfg(config: Config) -> error::Result<Self> {
|
||||
ensure_dir(&config.dir)?;
|
||||
if let Some(spill_dir) = &config.spill_dir {
|
||||
pub fn try_open_with_cfg(dir: String, config: &KvBackendConfig) -> error::Result<Self> {
|
||||
let cfg = Config {
|
||||
dir: dir.to_string(),
|
||||
purge_threshold: ReadableSize(config.purge_threshold.0),
|
||||
recovery_mode: RecoveryMode::TolerateTailCorruption,
|
||||
batch_compression_threshold: ReadableSize::kb(8),
|
||||
target_file_size: ReadableSize(config.file_size.0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
ensure_dir(&dir)?;
|
||||
if let Some(spill_dir) = &cfg.spill_dir {
|
||||
ensure_dir(spill_dir)?;
|
||||
}
|
||||
|
||||
let engine = Engine::open(config).context(RaftEngineSnafu)?;
|
||||
let engine = Arc::new(Engine::open(cfg).context(RaftEngineSnafu)?);
|
||||
let gc_task = RepeatedTask::new(
|
||||
config.purge_interval,
|
||||
Box::new(PurgeExpiredFilesFunction {
|
||||
engine: engine.clone(),
|
||||
}),
|
||||
);
|
||||
gc_task
|
||||
.start(common_runtime::global_runtime())
|
||||
.context(StartGcTaskSnafu)?;
|
||||
Ok(Self {
|
||||
engine: RwLock::new(engine),
|
||||
_gc_task: gc_task,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -398,21 +421,11 @@ mod tests {
|
||||
};
|
||||
use common_meta::rpc::store::{CompareAndPutRequest, CompareAndPutResponse};
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn build_kv_backend(dir: String) -> RaftEngineBackend {
|
||||
let config = Config {
|
||||
dir,
|
||||
spill_dir: None,
|
||||
recovery_mode: RecoveryMode::AbsoluteConsistency,
|
||||
target_file_size: ReadableSize::mb(4),
|
||||
purge_threshold: ReadableSize::mb(16),
|
||||
..Default::default()
|
||||
};
|
||||
let engine = RwLock::new(Engine::open(config).unwrap());
|
||||
RaftEngineBackend { engine }
|
||||
RaftEngineBackend::try_open_with_cfg(dir, &KvBackendConfig::default()).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -50,7 +50,7 @@ pub struct RaftEngineLogStore {
|
||||
}
|
||||
|
||||
pub struct PurgeExpiredFilesFunction {
|
||||
engine: Arc<Engine>,
|
||||
pub engine: Arc<Engine>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -203,7 +203,7 @@ impl Configurable for MetasrvOptions {
|
||||
}
|
||||
|
||||
impl MetasrvOptions {
|
||||
/// Detect server address if `auto_server_addr` is true.
|
||||
/// Detect server address.
|
||||
#[cfg(not(target_os = "android"))]
|
||||
pub fn detect_server_addr(&mut self) {
|
||||
if self.server_addr.is_empty() {
|
||||
|
||||
@@ -42,7 +42,7 @@ use store_api::region_engine::{
|
||||
RegionEngine, RegionRole, RegionScannerRef, RegionStatistic, SetRegionRoleStateResponse,
|
||||
SettableRegionRoleState,
|
||||
};
|
||||
use store_api::region_request::{BatchRegionRequest, RegionRequest};
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
|
||||
use self::state::MetricEngineState;
|
||||
@@ -127,60 +127,6 @@ impl RegionEngine for MetricEngine {
|
||||
METRIC_ENGINE_NAME
|
||||
}
|
||||
|
||||
async fn handle_batch_request(
|
||||
&self,
|
||||
batch_request: BatchRegionRequest,
|
||||
) -> Result<RegionResponse, BoxedError> {
|
||||
match batch_request {
|
||||
BatchRegionRequest::Put(requests) => {
|
||||
let rows = self
|
||||
.inner
|
||||
.batch_put_region(requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: HashMap::new(),
|
||||
})
|
||||
}
|
||||
BatchRegionRequest::Create(requests) => {
|
||||
let mut extension_return_value = HashMap::new();
|
||||
let rows = self
|
||||
.inner
|
||||
.create_regions(requests, &mut extension_return_value)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: extension_return_value,
|
||||
})
|
||||
}
|
||||
BatchRegionRequest::Alter(requests) => {
|
||||
let mut extension_return_value = HashMap::new();
|
||||
let rows = self
|
||||
.inner
|
||||
.alter_regions(requests, &mut extension_return_value)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: extension_return_value,
|
||||
})
|
||||
}
|
||||
BatchRegionRequest::Drop(requests) => {
|
||||
self.handle_requests(
|
||||
requests
|
||||
.into_iter()
|
||||
.map(|(region_id, req)| (region_id, RegionRequest::Drop(req))),
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles non-query request to the region. Returns the count of affected rows.
|
||||
async fn handle_request(
|
||||
&self,
|
||||
@@ -217,8 +163,18 @@ impl RegionEngine for MetricEngine {
|
||||
}
|
||||
}
|
||||
RegionRequest::Flush(req) => self.inner.flush_region(region_id, req).await,
|
||||
RegionRequest::Delete(_) | RegionRequest::Truncate(_) => {
|
||||
UnsupportedRegionRequestSnafu { request }.fail()
|
||||
RegionRequest::Truncate(_) => UnsupportedRegionRequestSnafu { request }.fail(),
|
||||
RegionRequest::Delete(_) => {
|
||||
if self.inner.is_physical_region(region_id) {
|
||||
self.inner
|
||||
.mito
|
||||
.handle_request(region_id, request)
|
||||
.await
|
||||
.context(error::MitoDeleteOperationSnafu)
|
||||
.map(|response| response.affected_rows)
|
||||
} else {
|
||||
UnsupportedRegionRequestSnafu { request }.fail()
|
||||
}
|
||||
}
|
||||
RegionRequest::Catchup(req) => self.inner.catchup_region(region_id, req).await,
|
||||
};
|
||||
@@ -312,24 +268,6 @@ impl RegionEngine for MetricEngine {
|
||||
}
|
||||
|
||||
impl MetricEngine {
|
||||
async fn handle_requests(
|
||||
&self,
|
||||
requests: impl IntoIterator<Item = (RegionId, RegionRequest)>,
|
||||
) -> Result<RegionResponse, BoxedError> {
|
||||
let mut affected_rows = 0;
|
||||
let mut extensions = HashMap::new();
|
||||
for (region_id, request) in requests {
|
||||
let response = self.handle_request(region_id, request).await?;
|
||||
affected_rows += response.affected_rows;
|
||||
extensions.extend(response.extensions);
|
||||
}
|
||||
|
||||
Ok(RegionResponse {
|
||||
affected_rows,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new(mito: MitoEngine, config: EngineConfig) -> Self {
|
||||
let metadata_region = MetadataRegion::new(mito.clone());
|
||||
let data_region = DataRegion::new(mito.clone());
|
||||
|
||||
@@ -12,23 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod extract_new_columns;
|
||||
mod validate;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_telemetry::error;
|
||||
use extract_new_columns::extract_new_columns;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::ALTER_PHYSICAL_EXTENSION_KEY;
|
||||
use store_api::region_request::{AffectedRows, AlterKind, RegionAlterRequest};
|
||||
use store_api::storage::RegionId;
|
||||
use validate::validate_alter_region_requests;
|
||||
|
||||
use crate::engine::create::{
|
||||
add_columns_to_physical_data_region, add_logical_regions_to_meta_region,
|
||||
};
|
||||
use crate::engine::MetricEngineInner;
|
||||
use crate::error::{
|
||||
LogicalRegionNotFoundSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
|
||||
@@ -36,133 +28,6 @@ use crate::error::{
|
||||
use crate::utils::{to_data_region_id, to_metadata_region_id};
|
||||
|
||||
impl MetricEngineInner {
|
||||
pub async fn alter_regions(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionAlterRequest)>,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<AffectedRows> {
|
||||
if requests.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let first_region_id = &requests.first().unwrap().0;
|
||||
if self.is_physical_region(*first_region_id) {
|
||||
for (region_id, request) in requests {
|
||||
self.alter_physical_region(region_id, request).await?;
|
||||
}
|
||||
} else {
|
||||
self.alter_logical_regions(requests, extension_return_value)
|
||||
.await?;
|
||||
}
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Alter multiple logical regions on the same physical region.
|
||||
pub async fn alter_logical_regions(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionAlterRequest)>,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<AffectedRows> {
|
||||
validate_alter_region_requests(&requests)?;
|
||||
|
||||
let first_logical_region_id = requests[0].0;
|
||||
|
||||
// Finds new columns to add
|
||||
let mut new_column_names = HashSet::new();
|
||||
let mut new_columns_to_add = vec![];
|
||||
|
||||
let (physical_region_id, index_options) = {
|
||||
let state = &self.state.read().unwrap();
|
||||
let physical_region_id = state
|
||||
.get_physical_region_id(first_logical_region_id)
|
||||
.with_context(|| {
|
||||
error!("Trying to alter an nonexistent region {first_logical_region_id}");
|
||||
LogicalRegionNotFoundSnafu {
|
||||
region_id: first_logical_region_id,
|
||||
}
|
||||
})?;
|
||||
let region_state = state
|
||||
.physical_region_states()
|
||||
.get(&physical_region_id)
|
||||
.with_context(|| PhysicalRegionNotFoundSnafu {
|
||||
region_id: physical_region_id,
|
||||
})?;
|
||||
let physical_columns = region_state.physical_columns();
|
||||
|
||||
extract_new_columns(
|
||||
&requests,
|
||||
physical_columns,
|
||||
&mut new_column_names,
|
||||
&mut new_columns_to_add,
|
||||
)?;
|
||||
|
||||
(physical_region_id, region_state.options().index)
|
||||
};
|
||||
let data_region_id = to_data_region_id(physical_region_id);
|
||||
|
||||
add_columns_to_physical_data_region(
|
||||
data_region_id,
|
||||
index_options,
|
||||
&mut new_columns_to_add,
|
||||
&self.data_region,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let physical_columns = self.data_region.physical_columns(data_region_id).await?;
|
||||
let physical_schema_map = physical_columns
|
||||
.iter()
|
||||
.map(|metadata| (metadata.column_schema.name.as_str(), metadata))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let logical_region_columns = requests.iter().map(|(region_id, request)| {
|
||||
let AlterKind::AddColumns { columns } = &request.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
(
|
||||
*region_id,
|
||||
columns
|
||||
.iter()
|
||||
.map(|col| {
|
||||
let column_name = col.column_metadata.column_schema.name.as_str();
|
||||
let column_metadata = *physical_schema_map.get(column_name).unwrap();
|
||||
(column_name, column_metadata)
|
||||
})
|
||||
.collect::<HashMap<_, _>>(),
|
||||
)
|
||||
});
|
||||
|
||||
let new_add_columns = new_columns_to_add.iter().map(|metadata| {
|
||||
// Safety: previous steps ensure the physical region exist
|
||||
let column_metadata = *physical_schema_map
|
||||
.get(metadata.column_schema.name.as_str())
|
||||
.unwrap();
|
||||
(
|
||||
metadata.column_schema.name.to_string(),
|
||||
column_metadata.column_id,
|
||||
)
|
||||
});
|
||||
|
||||
// Writes logical regions metadata to metadata region
|
||||
add_logical_regions_to_meta_region(
|
||||
&self.metadata_region,
|
||||
physical_region_id,
|
||||
false,
|
||||
logical_region_columns,
|
||||
)
|
||||
.await?;
|
||||
|
||||
extension_return_value.insert(
|
||||
ALTER_PHYSICAL_EXTENSION_KEY.to_string(),
|
||||
ColumnMetadata::encode_list(&physical_columns).context(SerializeColumnMetadataSnafu)?,
|
||||
);
|
||||
|
||||
let mut state = self.state.write().unwrap();
|
||||
state.add_physical_columns(data_region_id, new_add_columns);
|
||||
state.invalid_logical_regions_cache(requests.iter().map(|(region_id, _)| *region_id));
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Dispatch region alter request
|
||||
pub async fn alter_region(
|
||||
&self,
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::region_request::{AlterKind, RegionAlterRequest};
|
||||
use store_api::storage::{ColumnId, RegionId};
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
/// Extract new columns from the create requests.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This function will panic if the alter kind is not `AddColumns`.
|
||||
pub fn extract_new_columns<'a>(
|
||||
requests: &'a [(RegionId, RegionAlterRequest)],
|
||||
physical_columns: &HashMap<String, ColumnId>,
|
||||
new_column_names: &mut HashSet<&'a str>,
|
||||
new_columns: &mut Vec<ColumnMetadata>,
|
||||
) -> Result<()> {
|
||||
for (_, request) in requests {
|
||||
let AlterKind::AddColumns { columns } = &request.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
for col in columns {
|
||||
let column_name = col.column_metadata.column_schema.name.as_str();
|
||||
if !physical_columns.contains_key(column_name)
|
||||
&& !new_column_names.contains(column_name)
|
||||
{
|
||||
new_column_names.insert(column_name);
|
||||
// TODO(weny): avoid clone
|
||||
new_columns.push(col.column_metadata.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::ensure;
|
||||
use store_api::region_request::{AlterKind, RegionAlterRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{Result, UnsupportedAlterKindSnafu};
|
||||
|
||||
/// Validate the alter region requests.
|
||||
pub fn validate_alter_region_requests(requests: &[(RegionId, RegionAlterRequest)]) -> Result<()> {
|
||||
for (_, request) in requests {
|
||||
ensure!(
|
||||
matches!(request.kind, AlterKind::AddColumns { .. }),
|
||||
UnsupportedAlterKindSnafu {
|
||||
kind: request.kind.as_ref()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -12,15 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod add_columns;
|
||||
mod add_logical_regions;
|
||||
mod extract_new_columns;
|
||||
mod validate;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub(crate) use add_columns::add_columns_to_physical_data_region;
|
||||
pub(crate) use add_logical_regions::add_logical_regions_to_meta_region;
|
||||
use api::v1::SemanticType;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{info, warn};
|
||||
@@ -46,9 +39,7 @@ use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
use store_api::storage::RegionId;
|
||||
use validate::validate_create_logical_regions;
|
||||
|
||||
use crate::engine::create::extract_new_columns::extract_new_columns;
|
||||
use crate::engine::options::{set_data_region_options, IndexOptions, PhysicalRegionOptions};
|
||||
use crate::engine::MetricEngineInner;
|
||||
use crate::error::{
|
||||
@@ -59,41 +50,9 @@ use crate::error::{
|
||||
Result, SerializeColumnMetadataSnafu,
|
||||
};
|
||||
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT};
|
||||
use crate::utils::{self, to_data_region_id, to_metadata_region_id};
|
||||
use crate::utils::{to_data_region_id, to_metadata_region_id};
|
||||
|
||||
impl MetricEngineInner {
|
||||
pub async fn create_regions(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionCreateRequest)>,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<AffectedRows> {
|
||||
if requests.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
for (_, request) in requests.iter() {
|
||||
Self::verify_region_create_request(request)?;
|
||||
}
|
||||
|
||||
let first_request = &requests.first().unwrap().1;
|
||||
if first_request.is_physical_table() {
|
||||
for (region_id, request) in requests {
|
||||
self.create_physical_region(region_id, request).await?;
|
||||
}
|
||||
return Ok(0);
|
||||
} else if first_request
|
||||
.options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
self.create_logical_regions(requests, extension_return_value)
|
||||
.await?;
|
||||
} else {
|
||||
return MissingRegionOptionSnafu {}.fail();
|
||||
}
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Dispatch region creation request to physical region creation or logical
|
||||
pub async fn create_region(
|
||||
&self,
|
||||
@@ -185,116 +144,6 @@ impl MetricEngineInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create multiple logical regions on the same physical region.
|
||||
async fn create_logical_regions(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionCreateRequest)>,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
let physical_region_id = validate_create_logical_regions(&requests)?;
|
||||
let data_region_id = utils::to_data_region_id(physical_region_id);
|
||||
|
||||
// Filters out the requests that the logical region already exists
|
||||
let requests = {
|
||||
let state = self.state.read().unwrap();
|
||||
let logical_region_exists = state.logical_region_exists_filter(data_region_id);
|
||||
// TODO(weny): log the skipped logical regions
|
||||
requests
|
||||
.into_iter()
|
||||
.filter(|(region_id, _)| !logical_region_exists(region_id))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
// Finds new columns to add to physical region
|
||||
let mut new_column_names = HashSet::new();
|
||||
let mut new_columns = Vec::new();
|
||||
|
||||
let index_option = {
|
||||
let state = &self.state.read().unwrap();
|
||||
let region_state = state
|
||||
.physical_region_states()
|
||||
.get(&data_region_id)
|
||||
.with_context(|| PhysicalRegionNotFoundSnafu {
|
||||
region_id: data_region_id,
|
||||
})?;
|
||||
let physical_columns = region_state.physical_columns();
|
||||
|
||||
extract_new_columns(
|
||||
&requests,
|
||||
physical_columns,
|
||||
&mut new_column_names,
|
||||
&mut new_columns,
|
||||
)?;
|
||||
region_state.options().index
|
||||
};
|
||||
|
||||
// TODO(weny): we dont need to pass a mutable new_columns here.
|
||||
add_columns_to_physical_data_region(
|
||||
data_region_id,
|
||||
index_option,
|
||||
&mut new_columns,
|
||||
&self.data_region,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let physical_columns = self.data_region.physical_columns(data_region_id).await?;
|
||||
let physical_schema_map = physical_columns
|
||||
.iter()
|
||||
.map(|metadata| (metadata.column_schema.name.as_str(), metadata))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let logical_regions = requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| (*region_id))
|
||||
.collect::<Vec<_>>();
|
||||
let logical_region_columns = requests.iter().map(|(region_id, request)| {
|
||||
(
|
||||
*region_id,
|
||||
request
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|metadata| {
|
||||
// Safety: previous steps ensure the physical region exist
|
||||
let column_metadata = *physical_schema_map
|
||||
.get(metadata.column_schema.name.as_str())
|
||||
.unwrap();
|
||||
(metadata.column_schema.name.as_str(), column_metadata)
|
||||
})
|
||||
.collect::<HashMap<_, _>>(),
|
||||
)
|
||||
});
|
||||
|
||||
let new_add_columns = new_columns.iter().map(|metadata| {
|
||||
// Safety: previous steps ensure the physical region exist
|
||||
let column_metadata = *physical_schema_map
|
||||
.get(metadata.column_schema.name.as_str())
|
||||
.unwrap();
|
||||
(
|
||||
metadata.column_schema.name.to_string(),
|
||||
column_metadata.column_id,
|
||||
)
|
||||
});
|
||||
|
||||
extension_return_value.insert(
|
||||
ALTER_PHYSICAL_EXTENSION_KEY.to_string(),
|
||||
ColumnMetadata::encode_list(&physical_columns).context(SerializeColumnMetadataSnafu)?,
|
||||
);
|
||||
|
||||
// Writes logical regions metadata to metadata region
|
||||
add_logical_regions_to_meta_region(
|
||||
&self.metadata_region,
|
||||
physical_region_id,
|
||||
true,
|
||||
logical_region_columns,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut state = self.state.write().unwrap();
|
||||
state.add_physical_columns(data_region_id, new_add_columns);
|
||||
state.add_logical_regions(physical_region_id, logical_regions);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a logical region.
|
||||
///
|
||||
/// Physical table and logical table can have multiple regions, and their
|
||||
@@ -444,16 +293,16 @@ impl MetricEngineInner {
|
||||
new_columns: &mut [ColumnMetadata],
|
||||
index_options: IndexOptions,
|
||||
) -> Result<()> {
|
||||
// Return early if no new columns are added.
|
||||
if new_columns.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// alter data region
|
||||
self.data_region
|
||||
.add_columns(data_region_id, new_columns, index_options)
|
||||
.await?;
|
||||
|
||||
// Return early if no new columns are added.
|
||||
if new_columns.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// correct the column id
|
||||
let after_alter_physical_schema = self.data_region.physical_columns(data_region_id).await?;
|
||||
let after_alter_physical_schema_map = after_alter_physical_schema
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::data_region::DataRegion;
|
||||
use crate::engine::IndexOptions;
|
||||
use crate::error::Result;
|
||||
use crate::metrics::PHYSICAL_COLUMN_COUNT;
|
||||
|
||||
/// Add new columns to the physical data region.
|
||||
pub(crate) async fn add_columns_to_physical_data_region(
|
||||
data_region_id: RegionId,
|
||||
index_options: IndexOptions,
|
||||
new_columns: &mut [ColumnMetadata],
|
||||
data_region: &DataRegion,
|
||||
) -> Result<()> {
|
||||
// Return early if no new columns are added.
|
||||
if new_columns.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
data_region
|
||||
.add_columns(data_region_id, new_columns, index_options)
|
||||
.await?;
|
||||
|
||||
PHYSICAL_COLUMN_COUNT.add(new_columns.len() as _);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{MitoWriteOperationSnafu, Result};
|
||||
use crate::metadata_region::MetadataRegion;
|
||||
use crate::utils;
|
||||
|
||||
/// Add logical regions to the metadata region.
|
||||
pub async fn add_logical_regions_to_meta_region(
|
||||
metadata_region: &MetadataRegion,
|
||||
physical_region_id: RegionId,
|
||||
write_region_id: bool,
|
||||
logical_regions: impl Iterator<Item = (RegionId, HashMap<&str, &ColumnMetadata>)>,
|
||||
) -> Result<()> {
|
||||
let region_id = utils::to_metadata_region_id(physical_region_id);
|
||||
let iter =
|
||||
logical_regions
|
||||
.into_iter()
|
||||
.flat_map(|(logical_region_id, column_metadatas)| {
|
||||
if write_region_id {
|
||||
Some((
|
||||
MetadataRegion::concat_region_key(logical_region_id),
|
||||
String::new(),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.into_iter()
|
||||
.chain(column_metadatas.into_iter().map(
|
||||
move |(name, column_metadata)| {
|
||||
(
|
||||
MetadataRegion::concat_column_key(logical_region_id, name),
|
||||
MetadataRegion::serialize_column_metadata(column_metadata),
|
||||
)
|
||||
},
|
||||
))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let put_request = MetadataRegion::build_put_request_from_iter(iter.into_iter());
|
||||
metadata_region
|
||||
.mito
|
||||
.handle_request(
|
||||
region_id,
|
||||
store_api::region_request::RegionRequest::Put(put_request),
|
||||
)
|
||||
.await
|
||||
.context(MitoWriteOperationSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use snafu::ensure;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::region_request::RegionCreateRequest;
|
||||
use store_api::storage::{ColumnId, RegionId};
|
||||
|
||||
use crate::error::{AddingFieldColumnSnafu, Result};
|
||||
|
||||
/// Extract new columns from the create requests.
|
||||
pub fn extract_new_columns<'a>(
|
||||
requests: &'a [(RegionId, RegionCreateRequest)],
|
||||
physical_columns: &HashMap<String, ColumnId>,
|
||||
new_column_names: &mut HashSet<&'a str>,
|
||||
new_columns: &mut Vec<ColumnMetadata>,
|
||||
) -> Result<()> {
|
||||
for (_, request) in requests {
|
||||
for col in &request.column_metadatas {
|
||||
if !physical_columns.contains_key(&col.column_schema.name)
|
||||
&& !new_column_names.contains(&col.column_schema.name.as_str())
|
||||
{
|
||||
ensure!(
|
||||
col.semantic_type != SemanticType::Field,
|
||||
AddingFieldColumnSnafu {
|
||||
name: col.column_schema.name.to_string(),
|
||||
}
|
||||
);
|
||||
new_column_names.insert(&col.column_schema.name);
|
||||
// TODO(weny): avoid clone
|
||||
new_columns.push(col.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use store_api::region_request::RegionCreateRequest;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{
|
||||
ConflictRegionOptionSnafu, MissingRegionOptionSnafu, ParseRegionIdSnafu, Result,
|
||||
};
|
||||
|
||||
/// Validate the create logical regions request.
|
||||
///
|
||||
/// Returns extracted physical region id from the first request.
|
||||
pub fn validate_create_logical_regions(
|
||||
requests: &[(RegionId, RegionCreateRequest)],
|
||||
) -> Result<RegionId> {
|
||||
let (_, request) = requests.first().unwrap();
|
||||
let first_physical_region_id_raw = request
|
||||
.options
|
||||
.get(LOGICAL_TABLE_METADATA_KEY)
|
||||
.ok_or(MissingRegionOptionSnafu {}.build())?;
|
||||
|
||||
let physical_region_id: RegionId = first_physical_region_id_raw
|
||||
.parse::<u64>()
|
||||
.with_context(|_| ParseRegionIdSnafu {
|
||||
raw: first_physical_region_id_raw,
|
||||
})?
|
||||
.into();
|
||||
|
||||
// TODO(weny): Can we remove the check?
|
||||
for (_, request) in requests.iter().skip(1) {
|
||||
let physical_region_id_raw = request
|
||||
.options
|
||||
.get(LOGICAL_TABLE_METADATA_KEY)
|
||||
.ok_or(MissingRegionOptionSnafu {}.build())?;
|
||||
|
||||
ensure!(
|
||||
physical_region_id_raw == first_physical_region_id_raw,
|
||||
ConflictRegionOptionSnafu {}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(physical_region_id)
|
||||
}
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::{Rows, WriteHint};
|
||||
use common_telemetry::{error, info};
|
||||
use snafu::{ensure, OptionExt};
|
||||
@@ -52,26 +50,6 @@ impl MetricEngineInner {
|
||||
}
|
||||
}
|
||||
|
||||
/// Dispatch batch region put request
|
||||
pub async fn batch_put_region(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionPutRequest)>,
|
||||
) -> Result<AffectedRows> {
|
||||
{
|
||||
let state = self.state.read().unwrap();
|
||||
for region_id in requests.iter().map(|(region_id, _)| region_id) {
|
||||
if state.physical_region_states().contains_key(region_id) {
|
||||
info!("Metric region received put request on physical region {region_id:?}");
|
||||
FORBIDDEN_OPERATION_COUNT.inc();
|
||||
|
||||
return ForbiddenPhysicalAlterSnafu.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.batch_put_logical_regions(requests).await
|
||||
}
|
||||
|
||||
async fn put_logical_region(
|
||||
&self,
|
||||
logical_region_id: RegionId,
|
||||
@@ -120,110 +98,6 @@ impl MetricEngineInner {
|
||||
self.data_region.write_data(data_region_id, request).await
|
||||
}
|
||||
|
||||
async fn batch_put_logical_regions(
|
||||
&self,
|
||||
requests: Vec<(RegionId, RegionPutRequest)>,
|
||||
) -> Result<AffectedRows> {
|
||||
let _timer = MITO_OPERATION_ELAPSED
|
||||
.with_label_values(&["put"])
|
||||
.start_timer();
|
||||
|
||||
let mut physical_requests = HashMap::with_capacity(1);
|
||||
// Group requests by physical region, also verify put requests.
|
||||
{
|
||||
let state = self.state.read().unwrap();
|
||||
for (logical_region_id, request) in requests {
|
||||
let physical_region_id = *state
|
||||
.logical_regions()
|
||||
.get(&logical_region_id)
|
||||
.with_context(|| LogicalRegionNotFoundSnafu {
|
||||
region_id: logical_region_id,
|
||||
})?;
|
||||
let data_region_id = to_data_region_id(physical_region_id);
|
||||
// Check if a physical column exists.
|
||||
let physical_columns = state
|
||||
.physical_region_states()
|
||||
.get(&data_region_id)
|
||||
.context(PhysicalRegionNotFoundSnafu {
|
||||
region_id: data_region_id,
|
||||
})?
|
||||
.physical_columns();
|
||||
for col in &request.rows.schema {
|
||||
ensure!(
|
||||
physical_columns.contains_key(&col.column_name),
|
||||
ColumnNotFoundSnafu {
|
||||
name: col.column_name.clone(),
|
||||
region_id: logical_region_id,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
physical_requests
|
||||
.entry(physical_region_id)
|
||||
.or_insert_with(Vec::new)
|
||||
.push((logical_region_id, request));
|
||||
}
|
||||
}
|
||||
|
||||
let mut affected_rows = 0;
|
||||
for (physical_region_id, mut requests) in physical_requests {
|
||||
if requests.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let data_region_id = to_data_region_id(physical_region_id);
|
||||
let primary_key_encoding = {
|
||||
let state = self.state.read().unwrap();
|
||||
state.get_primary_key_encoding(data_region_id).context(
|
||||
PhysicalRegionNotFoundSnafu {
|
||||
region_id: data_region_id,
|
||||
},
|
||||
)?
|
||||
};
|
||||
|
||||
for (logical_region_id, request) in &mut requests {
|
||||
self.modify_rows(
|
||||
physical_region_id,
|
||||
logical_region_id.table_id(),
|
||||
&mut request.rows,
|
||||
primary_key_encoding,
|
||||
)?;
|
||||
}
|
||||
|
||||
let total_rows = requests
|
||||
.iter()
|
||||
.map(|(_, request)| request.rows.rows.len())
|
||||
.sum::<usize>();
|
||||
if primary_key_encoding == PrimaryKeyEncoding::Sparse {
|
||||
let mut merged_request = RegionPutRequest {
|
||||
rows: Rows {
|
||||
schema: requests[0].1.rows.schema.clone(),
|
||||
rows: Vec::with_capacity(total_rows),
|
||||
},
|
||||
hint: None,
|
||||
};
|
||||
merged_request.hint = Some(WriteHint {
|
||||
primary_key_encoding: api::v1::PrimaryKeyEncoding::Sparse.into(),
|
||||
});
|
||||
for (_, mut request) in requests {
|
||||
merged_request.rows.rows.append(&mut request.rows.rows);
|
||||
}
|
||||
|
||||
self.data_region
|
||||
.write_data(data_region_id, merged_request)
|
||||
.await?;
|
||||
} else {
|
||||
for (_, request) in requests {
|
||||
self.data_region.write_data(data_region_id, request).await?;
|
||||
}
|
||||
}
|
||||
|
||||
affected_rows += total_rows;
|
||||
}
|
||||
|
||||
Ok(affected_rows)
|
||||
}
|
||||
|
||||
/// Verifies a put request for a logical region against its corresponding metadata region.
|
||||
///
|
||||
/// Includes:
|
||||
|
||||
@@ -83,18 +83,6 @@ pub(crate) struct MetricEngineState {
|
||||
}
|
||||
|
||||
impl MetricEngineState {
|
||||
pub fn logical_region_exists_filter(
|
||||
&self,
|
||||
physical_region_id: RegionId,
|
||||
) -> impl for<'a> Fn(&'a RegionId) -> bool + use<'_> {
|
||||
let state = self
|
||||
.physical_region_states()
|
||||
.get(&physical_region_id)
|
||||
.unwrap();
|
||||
|
||||
move |logical_region_id| state.logical_regions().contains(logical_region_id)
|
||||
}
|
||||
|
||||
pub fn add_physical_region(
|
||||
&mut self,
|
||||
physical_region_id: RegionId,
|
||||
@@ -123,31 +111,6 @@ impl MetricEngineState {
|
||||
}
|
||||
}
|
||||
|
||||
/// # Panic
|
||||
/// if the physical region does not exist
|
||||
pub fn add_logical_regions(
|
||||
&mut self,
|
||||
physical_region_id: RegionId,
|
||||
logical_region_ids: impl IntoIterator<Item = RegionId>,
|
||||
) {
|
||||
let physical_region_id = to_data_region_id(physical_region_id);
|
||||
let state = self.physical_regions.get_mut(&physical_region_id).unwrap();
|
||||
for logical_region_id in logical_region_ids {
|
||||
state.logical_regions.insert(logical_region_id);
|
||||
self.logical_regions
|
||||
.insert(logical_region_id, physical_region_id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn invalid_logical_regions_cache(
|
||||
&mut self,
|
||||
logical_region_ids: impl IntoIterator<Item = RegionId>,
|
||||
) {
|
||||
for logical_region_id in logical_region_ids {
|
||||
self.logical_columns.remove(&logical_region_id);
|
||||
}
|
||||
}
|
||||
|
||||
/// # Panic
|
||||
/// if the physical region does not exist
|
||||
pub fn add_logical_region(
|
||||
|
||||
@@ -125,6 +125,12 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Mito delete operation fails"))]
|
||||
MitoDeleteOperation {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Mito catchup operation fails"))]
|
||||
MitoCatchupOperation {
|
||||
@@ -219,13 +225,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported alter kind: {}", kind))]
|
||||
UnsupportedAlterKind {
|
||||
kind: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Multiple field column found: {} and {}", previous, current))]
|
||||
MultipleFieldColumn {
|
||||
previous: String,
|
||||
@@ -270,8 +269,7 @@ impl ErrorExt for Error {
|
||||
| MultipleFieldColumn { .. }
|
||||
| NoFieldColumn { .. }
|
||||
| AddingFieldColumn { .. }
|
||||
| ParseRegionOptions { .. }
|
||||
| UnsupportedAlterKind { .. } => StatusCode::InvalidArguments,
|
||||
| ParseRegionOptions { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => {
|
||||
StatusCode::Unsupported
|
||||
@@ -296,7 +294,8 @@ impl ErrorExt for Error {
|
||||
| MitoReadOperation { source, .. }
|
||||
| MitoWriteOperation { source, .. }
|
||||
| MitoCatchupOperation { source, .. }
|
||||
| MitoFlushOperation { source, .. } => source.status_code(),
|
||||
| MitoFlushOperation { source, .. }
|
||||
| MitoDeleteOperation { source, .. } => source.status_code(),
|
||||
|
||||
EncodePrimaryKey { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ const COLUMN_PREFIX: &str = "__column_";
|
||||
/// table id + region sequence. This handler will transform the region group by
|
||||
/// itself.
|
||||
pub struct MetadataRegion {
|
||||
pub(crate) mito: MitoEngine,
|
||||
mito: MitoEngine,
|
||||
/// Logical lock for operations that need to be serialized. Like update & read region columns.
|
||||
///
|
||||
/// Region entry will be registered on creating and opening logical region, and deregistered on
|
||||
@@ -474,52 +474,6 @@ impl MetadataRegion {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_put_request_from_iter(
|
||||
kv: impl Iterator<Item = (String, String)>,
|
||||
) -> RegionPutRequest {
|
||||
let cols = vec![
|
||||
ColumnSchema {
|
||||
column_name: METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as _,
|
||||
semantic_type: SemanticType::Timestamp as _,
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: METADATA_SCHEMA_KEY_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String as _,
|
||||
semantic_type: SemanticType::Tag as _,
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: METADATA_SCHEMA_VALUE_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String as _,
|
||||
semantic_type: SemanticType::Field as _,
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let rows = Rows {
|
||||
schema: cols,
|
||||
rows: kv
|
||||
.into_iter()
|
||||
.map(|(key, value)| Row {
|
||||
values: vec![
|
||||
Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(0)),
|
||||
},
|
||||
Value {
|
||||
value_data: Some(ValueData::StringValue(key)),
|
||||
},
|
||||
Value {
|
||||
value_data: Some(ValueData::StringValue(value)),
|
||||
},
|
||||
],
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
|
||||
RegionPutRequest { rows, hint: None }
|
||||
}
|
||||
|
||||
fn build_put_request(key: &str, value: &str) -> RegionPutRequest {
|
||||
let cols = vec![
|
||||
ColumnSchema {
|
||||
|
||||
@@ -276,7 +276,6 @@ impl CpuDataGenerator {
|
||||
rows,
|
||||
}),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
|
||||
KeyValues::new(&self.metadata, mutation).unwrap()
|
||||
|
||||
@@ -416,7 +416,9 @@ impl EngineInner {
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
let (request, receiver) = WorkerRequest::try_from_region_request(region_id, request)?;
|
||||
let region_metadata = self.get_metadata(region_id).ok();
|
||||
let (request, receiver) =
|
||||
WorkerRequest::try_from_region_request(region_id, request, region_metadata)?;
|
||||
self.workers.submit_to_worker(region_id, request).await?;
|
||||
|
||||
receiver.await.context(RecvSnafu)?
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -30,7 +29,6 @@ use table::predicate::Predicate;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::bulk::BulkMemtableBuilder;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
pub use crate::memtable::key_values::KeyValues;
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder};
|
||||
@@ -42,7 +40,6 @@ use crate::region::options::{MemtableOptions, MergeMode};
|
||||
use crate::sst::file::FileTimeRange;
|
||||
|
||||
pub mod bulk;
|
||||
mod encoder;
|
||||
pub mod key_values;
|
||||
pub mod partition_tree;
|
||||
mod stats;
|
||||
@@ -293,19 +290,6 @@ impl MemtableBuilderProvider {
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
) -> MemtableBuilderRef {
|
||||
// todo(hl): make it an option
|
||||
if std::env::var("enable_bulk_memtable")
|
||||
.ok()
|
||||
.and_then(|v| bool::from_str(&v).ok())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return Arc::new(BulkMemtableBuilder::new(
|
||||
self.write_buffer_manager.clone(),
|
||||
dedup,
|
||||
merge_mode,
|
||||
));
|
||||
}
|
||||
|
||||
match options {
|
||||
Some(MemtableOptions::TimeSeries) => Arc::new(TimeSeriesMemtableBuilder::new(
|
||||
self.write_buffer_manager.clone(),
|
||||
|
||||
@@ -14,27 +14,18 @@
|
||||
|
||||
//! Memtable implementation for bulk load
|
||||
|
||||
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::{ColumnId, SequenceNumber};
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::bulk::context::BulkIterContext;
|
||||
use crate::memtable::bulk::part::BulkPart;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder};
|
||||
use crate::memtable::{
|
||||
AllocTracker, BoxedBatchIterator, KeyValues, Memtable, MemtableBuilder, MemtableId,
|
||||
MemtableRanges, MemtableRef, MemtableStats,
|
||||
BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRanges, MemtableRef, MemtableStats,
|
||||
};
|
||||
use crate::read::dedup::{LastNonNull, LastRow};
|
||||
use crate::read::sync::dedup::DedupReader;
|
||||
use crate::region::options::MergeMode;
|
||||
|
||||
#[allow(unused)]
|
||||
mod context;
|
||||
@@ -43,86 +34,10 @@ pub(crate) mod part;
|
||||
mod part_reader;
|
||||
mod row_group_reader;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BulkMemtableBuilder {
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
fallback_builder: PartitionTreeMemtableBuilder,
|
||||
}
|
||||
|
||||
impl MemtableBuilder for BulkMemtableBuilder {
|
||||
fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
//todo(hl): create different memtables according to region type (metadata/physical)
|
||||
if metadata.primary_key_encoding == PrimaryKeyEncoding::Dense {
|
||||
self.fallback_builder.build(id, metadata)
|
||||
} else {
|
||||
Arc::new(BulkMemtable::new(
|
||||
metadata.clone(),
|
||||
id,
|
||||
self.write_buffer_manager.clone(),
|
||||
self.dedup,
|
||||
self.merge_mode,
|
||||
)) as MemtableRef
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BulkMemtableBuilder {
|
||||
pub fn new(
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
) -> Self {
|
||||
let builder = PartitionTreeMemtableBuilder::new(
|
||||
PartitionTreeConfig::default(),
|
||||
write_buffer_manager.clone(),
|
||||
);
|
||||
|
||||
Self {
|
||||
write_buffer_manager,
|
||||
dedup,
|
||||
merge_mode,
|
||||
fallback_builder: builder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BulkMemtable {
|
||||
id: MemtableId,
|
||||
parts: RwLock<Vec<BulkPart>>,
|
||||
region_metadata: RegionMetadataRef,
|
||||
alloc_tracker: AllocTracker,
|
||||
max_timestamp: AtomicI64,
|
||||
min_timestamp: AtomicI64,
|
||||
max_sequence: AtomicU64,
|
||||
num_rows: AtomicUsize,
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
}
|
||||
|
||||
impl BulkMemtable {
|
||||
pub fn new(
|
||||
region_metadata: RegionMetadataRef,
|
||||
id: MemtableId,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
parts: RwLock::new(vec![]),
|
||||
region_metadata,
|
||||
alloc_tracker: AllocTracker::new(write_buffer_manager),
|
||||
max_timestamp: AtomicI64::new(i64::MIN),
|
||||
min_timestamp: AtomicI64::new(i64::MAX),
|
||||
max_sequence: Default::default(),
|
||||
num_rows: Default::default(),
|
||||
dedup,
|
||||
merge_mode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Memtable for BulkMemtable {
|
||||
@@ -139,56 +54,18 @@ impl Memtable for BulkMemtable {
|
||||
}
|
||||
|
||||
fn write_bulk(&self, fragment: BulkPart) -> Result<()> {
|
||||
self.alloc_tracker.on_allocation(fragment.data.len());
|
||||
let mut parts = self.parts.write().unwrap();
|
||||
let part_metadata = fragment.metadata();
|
||||
if self.max_timestamp.load(Ordering::Relaxed) < part_metadata.max_timestamp {
|
||||
self.max_timestamp
|
||||
.store(part_metadata.max_timestamp, Ordering::Relaxed);
|
||||
}
|
||||
if self.min_timestamp.load(Ordering::Relaxed) > part_metadata.min_timestamp {
|
||||
self.min_timestamp
|
||||
.store(part_metadata.min_timestamp, Ordering::Relaxed);
|
||||
}
|
||||
if self.max_sequence.load(Ordering::Relaxed) < part_metadata.max_sequence {
|
||||
self.max_sequence
|
||||
.store(part_metadata.max_sequence, Ordering::Relaxed);
|
||||
}
|
||||
self.num_rows
|
||||
.fetch_add(part_metadata.num_rows, Ordering::Relaxed);
|
||||
parts.push(fragment);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(
|
||||
&self,
|
||||
projection: Option<&[ColumnId]>,
|
||||
predicate: Option<Predicate>,
|
||||
sequence: Option<SequenceNumber>,
|
||||
_projection: Option<&[ColumnId]>,
|
||||
_predicate: Option<Predicate>,
|
||||
_sequence: Option<SequenceNumber>,
|
||||
) -> Result<BoxedBatchIterator> {
|
||||
let mut readers = Vec::new();
|
||||
let parts = self.parts.read().unwrap();
|
||||
|
||||
let ctx = Arc::new(BulkIterContext::new(
|
||||
self.region_metadata.clone(),
|
||||
&projection,
|
||||
predicate.clone(),
|
||||
));
|
||||
for part in parts.as_slice() {
|
||||
if let Some(reader) = part.read(ctx.clone(), sequence).unwrap() {
|
||||
readers.push(reader);
|
||||
}
|
||||
}
|
||||
let merge_reader = crate::read::sync::merge::MergeReader::new(readers)?;
|
||||
let reader = match self.merge_mode {
|
||||
MergeMode::LastRow => {
|
||||
Box::new(DedupReader::new(merge_reader, LastRow::new(self.dedup))) as BoxedBatchIterator
|
||||
}
|
||||
MergeMode::LastNonNull => {
|
||||
Box::new(DedupReader::new(merge_reader, LastNonNull::new(self.dedup))) as BoxedBatchIterator
|
||||
}
|
||||
};
|
||||
Ok(reader )
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn ranges(
|
||||
@@ -205,175 +82,17 @@ impl Memtable for BulkMemtable {
|
||||
}
|
||||
|
||||
fn freeze(&self) -> Result<()> {
|
||||
self.alloc_tracker.done_allocating();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn stats(&self) -> MemtableStats {
|
||||
let estimated_bytes = self.alloc_tracker.bytes_allocated();
|
||||
|
||||
if estimated_bytes == 0 {
|
||||
// no rows ever written
|
||||
return MemtableStats {
|
||||
estimated_bytes,
|
||||
time_range: None,
|
||||
num_rows: 0,
|
||||
num_ranges: 0,
|
||||
max_sequence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let ts_type = self
|
||||
.region_metadata
|
||||
.time_index_column()
|
||||
.column_schema
|
||||
.data_type
|
||||
.clone()
|
||||
.as_timestamp()
|
||||
.expect("Timestamp column must have timestamp type");
|
||||
let max_timestamp = ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
|
||||
let min_timestamp = ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
|
||||
MemtableStats {
|
||||
estimated_bytes,
|
||||
time_range: Some((min_timestamp, max_timestamp)),
|
||||
num_rows: self.num_rows.load(Ordering::Relaxed),
|
||||
num_ranges: 1, //todo(hl): we should consider bulk parts as different ranges.
|
||||
max_sequence: self.max_sequence.load(Ordering::Relaxed),
|
||||
}
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
Arc::new(Self::new(
|
||||
metadata.clone(),
|
||||
fn fork(&self, id: MemtableId, _metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
Arc::new(Self {
|
||||
id,
|
||||
self.alloc_tracker.write_buffer_manager(),
|
||||
self.dedup,
|
||||
self.merge_mode,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{OpType, Row, Rows, SemanticType};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use std::sync::Arc;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder, RegionMetadataRef};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::memtable::bulk::part::BulkPartEncoder;
|
||||
use crate::memtable::bulk::BulkMemtable;
|
||||
use crate::memtable::{BulkPart, Memtable};
|
||||
use crate::region::options::MergeMode;
|
||||
|
||||
fn metrics_region_metadata() -> RegionMetadataRef {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("k0", ConcreteDataType::binary_datatype(), false),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 0,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 1,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("v0", ConcreteDataType::float64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("v1", ConcreteDataType::float64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
})
|
||||
.primary_key(vec![0]);
|
||||
let region_metadata = builder.build().unwrap();
|
||||
Arc::new(region_metadata)
|
||||
}
|
||||
|
||||
fn metrics_column_schema() -> Vec<api::v1::ColumnSchema> {
|
||||
let schema = metrics_region_metadata();
|
||||
schema
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|c| api::v1::ColumnSchema {
|
||||
column_name: c.column_schema.name.clone(),
|
||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: c.semantic_type as i32,
|
||||
..Default::default()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_metrics_bulk_part(
|
||||
k: &str,
|
||||
ts: &[i64],
|
||||
v0: &[Option<f64>],
|
||||
v1: &[Option<f64>],
|
||||
seq: u64,
|
||||
) -> BulkPart {
|
||||
assert_eq!(ts.len(), v0.len());
|
||||
assert_eq!(ts.len(), v1.len());
|
||||
|
||||
let rows = ts
|
||||
.iter()
|
||||
.zip(v0.iter())
|
||||
.zip(v1.iter())
|
||||
.map(|((ts, v0), v1)| Row {
|
||||
values: vec![
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::BinaryValue(k.as_bytes().to_vec())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(*ts as i64)),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: v0.map(ValueData::F64Value),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: v1.map(ValueData::F64Value),
|
||||
},
|
||||
],
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mutation = api::v1::Mutation {
|
||||
op_type: OpType::Put as i32,
|
||||
sequence: seq,
|
||||
rows: Some(Rows {
|
||||
schema: metrics_column_schema(),
|
||||
rows,
|
||||
}),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
let encoder = BulkPartEncoder::new(metrics_region_metadata(), true, 1024);
|
||||
encoder.encode_mutations(&[mutation]).unwrap().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_iter() {
|
||||
let schema = metrics_region_metadata();
|
||||
let memtable = BulkMemtable::new(schema, 0, None, true, MergeMode::LastRow);
|
||||
memtable.write_bulk(build_metrics_bulk_part("a", &[1], &[None], &[Some(1.0)], 0)).unwrap();
|
||||
// write duplicated rows
|
||||
memtable.write_bulk(build_metrics_bulk_part("a", &[1], &[None], &[Some(1.0)], 0)).unwrap();
|
||||
let iter = memtable.iter(None, None, None).unwrap();
|
||||
let total_rows = iter.map(|b| {
|
||||
b.unwrap().num_rows()
|
||||
}).sum::<usize>();
|
||||
assert_eq!(1, total_rows);
|
||||
parts: RwLock::new(vec![]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::Mutation;
|
||||
use bytes::Bytes;
|
||||
use common_telemetry::error;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::arrow::array::{TimestampNanosecondArray, UInt64Builder};
|
||||
use datatypes::arrow;
|
||||
@@ -33,7 +32,6 @@ use datatypes::arrow::datatypes::SchemaRef;
|
||||
use datatypes::arrow_array::BinaryArray;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector};
|
||||
use datatypes::value::ValueRef;
|
||||
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
|
||||
use parquet::arrow::ArrowWriter;
|
||||
use parquet::data_type::AsBytes;
|
||||
@@ -48,17 +46,16 @@ use crate::error;
|
||||
use crate::error::{ComputeArrowSnafu, EncodeMemtableSnafu, NewRecordBatchSnafu, Result};
|
||||
use crate::memtable::bulk::context::BulkIterContextRef;
|
||||
use crate::memtable::bulk::part_reader::BulkPartIter;
|
||||
use crate::memtable::encoder::{FieldWithId, SparseEncoder};
|
||||
use crate::memtable::key_values::KeyValuesRef;
|
||||
use crate::memtable::BoxedBatchIterator;
|
||||
use crate::row_converter::{PrimaryKeyCodec, PrimaryKeyCodecExt};
|
||||
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec, PrimaryKeyCodecExt};
|
||||
use crate::sst::parquet::format::{PrimaryKeyArray, ReadFormat};
|
||||
use crate::sst::parquet::helper::parse_parquet_metadata;
|
||||
use crate::sst::to_sst_arrow_schema;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub struct BulkPart {
|
||||
pub(crate) data: Bytes,
|
||||
data: Bytes,
|
||||
metadata: BulkPartMeta,
|
||||
}
|
||||
|
||||
@@ -95,7 +92,7 @@ impl BulkPart {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub struct BulkPartMeta {
|
||||
/// Total rows in part.
|
||||
pub num_rows: usize,
|
||||
@@ -103,8 +100,6 @@ pub struct BulkPartMeta {
|
||||
pub max_timestamp: i64,
|
||||
/// Min timestamp in part.
|
||||
pub min_timestamp: i64,
|
||||
/// Max sequence number in part.
|
||||
pub max_sequence: u64,
|
||||
/// Part file metadata.
|
||||
pub parquet_metadata: Arc<ParquetMetaData>,
|
||||
/// Part region schema.
|
||||
@@ -113,7 +108,7 @@ pub struct BulkPartMeta {
|
||||
|
||||
pub struct BulkPartEncoder {
|
||||
metadata: RegionMetadataRef,
|
||||
pk_encoder: SparseEncoder,
|
||||
pk_encoder: DensePrimaryKeyCodec,
|
||||
row_group_size: usize,
|
||||
dedup: bool,
|
||||
writer_props: Option<WriterProperties>,
|
||||
@@ -125,7 +120,7 @@ impl BulkPartEncoder {
|
||||
dedup: bool,
|
||||
row_group_size: usize,
|
||||
) -> BulkPartEncoder {
|
||||
let encoder = SparseEncoder::new(&metadata);
|
||||
let codec = DensePrimaryKeyCodec::new(&metadata);
|
||||
let writer_props = Some(
|
||||
WriterProperties::builder()
|
||||
.set_write_batch_size(row_group_size)
|
||||
@@ -134,7 +129,7 @@ impl BulkPartEncoder {
|
||||
);
|
||||
Self {
|
||||
metadata,
|
||||
pk_encoder: encoder,
|
||||
pk_encoder: codec,
|
||||
row_group_size,
|
||||
dedup,
|
||||
writer_props,
|
||||
@@ -144,9 +139,9 @@ impl BulkPartEncoder {
|
||||
|
||||
impl BulkPartEncoder {
|
||||
/// Encodes mutations to a [BulkPart], returns true if encoded data has been written to `dest`.
|
||||
pub(crate) fn encode_mutations(&self, mutations: &[Mutation]) -> Result<Option<BulkPart>> {
|
||||
let Some((arrow_record_batch, min_ts, max_ts, max_sequence)) =
|
||||
mutations_to_record_batch(mutations, &self.metadata, self.dedup)?
|
||||
fn encode_mutations(&self, mutations: &[Mutation]) -> Result<Option<BulkPart>> {
|
||||
let Some((arrow_record_batch, min_ts, max_ts)) =
|
||||
mutations_to_record_batch(mutations, &self.metadata, &self.pk_encoder, self.dedup)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
@@ -173,7 +168,6 @@ impl BulkPartEncoder {
|
||||
num_rows: arrow_record_batch.num_rows(),
|
||||
max_timestamp: max_ts,
|
||||
min_timestamp: min_ts,
|
||||
max_sequence,
|
||||
parquet_metadata,
|
||||
region_metadata: self.metadata.clone(),
|
||||
},
|
||||
@@ -185,8 +179,9 @@ impl BulkPartEncoder {
|
||||
fn mutations_to_record_batch(
|
||||
mutations: &[Mutation],
|
||||
metadata: &RegionMetadataRef,
|
||||
pk_encoder: &DensePrimaryKeyCodec,
|
||||
dedup: bool,
|
||||
) -> Result<Option<(RecordBatch, i64, i64, u64)>> {
|
||||
) -> Result<Option<(RecordBatch, i64, i64)>> {
|
||||
let total_rows: usize = mutations
|
||||
.iter()
|
||||
.map(|m| m.rows.as_ref().map(|r| r.rows.len()).unwrap_or(0))
|
||||
@@ -211,29 +206,18 @@ fn mutations_to_record_batch(
|
||||
.map(|f| f.column_schema.data_type.create_mutable_vector(total_rows))
|
||||
.collect();
|
||||
|
||||
let mut max_sequence = u64::MIN;
|
||||
let mut pk_buffer = vec![];
|
||||
for m in mutations {
|
||||
let Some(key_values) = KeyValuesRef::new(metadata, m) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for row in key_values.iter() {
|
||||
assert_eq!(1, row.num_primary_keys());
|
||||
let first_primary_key_col = row.primary_keys().next().unwrap();
|
||||
|
||||
let bytes = match first_primary_key_col {
|
||||
ValueRef::Binary(b) => b,
|
||||
_ => {
|
||||
unreachable!(
|
||||
"Primary key must be encoded binary type, found: {:?}",
|
||||
first_primary_key_col
|
||||
);
|
||||
}
|
||||
};
|
||||
pk_builder.append_value(bytes);
|
||||
pk_buffer.clear();
|
||||
pk_encoder.encode_to_vec(row.primary_keys(), &mut pk_buffer)?;
|
||||
pk_builder.append_value(pk_buffer.as_bytes());
|
||||
ts_vector.push_value_ref(row.timestamp());
|
||||
sequence_builder.append_value(row.sequence());
|
||||
max_sequence = max_sequence.max(row.sequence());
|
||||
op_type_builder.append_value(row.op_type() as u8);
|
||||
for (builder, field) in field_builders.iter_mut().zip(row.fields()) {
|
||||
builder.push_value_ref(field);
|
||||
@@ -263,9 +247,7 @@ fn mutations_to_record_batch(
|
||||
arrow_schema,
|
||||
};
|
||||
|
||||
sorter.sort().map(|(batch, min, max)|{
|
||||
Some((batch, min, max, max_sequence))
|
||||
})
|
||||
sorter.sort().map(Some)
|
||||
}
|
||||
|
||||
struct ArraysSorter<I> {
|
||||
@@ -281,7 +263,7 @@ struct ArraysSorter<I> {
|
||||
|
||||
impl<I> ArraysSorter<I>
|
||||
where
|
||||
I: Iterator<Item=ArrayRef>,
|
||||
I: Iterator<Item = ArrayRef>,
|
||||
{
|
||||
/// Converts arrays to record batch.
|
||||
fn sort(self) -> Result<(RecordBatch, i64, i64)> {
|
||||
@@ -329,10 +311,10 @@ where
|
||||
check_bounds: false,
|
||||
}),
|
||||
)
|
||||
.context(ComputeArrowSnafu)?
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryArray>()
|
||||
.unwrap(),
|
||||
.context(ComputeArrowSnafu)?
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryArray>()
|
||||
.unwrap(),
|
||||
)?) as ArrayRef;
|
||||
|
||||
let mut arrays = Vec::with_capacity(self.arrow_schema.fields.len());
|
||||
@@ -345,7 +327,7 @@ where
|
||||
check_bounds: false,
|
||||
}),
|
||||
)
|
||||
.context(ComputeArrowSnafu)?,
|
||||
.context(ComputeArrowSnafu)?,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -356,7 +338,7 @@ where
|
||||
check_bounds: false,
|
||||
}),
|
||||
)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
.context(ComputeArrowSnafu)?;
|
||||
|
||||
arrays.push(timestamp);
|
||||
arrays.push(pk_dictionary);
|
||||
@@ -368,7 +350,7 @@ where
|
||||
check_bounds: false,
|
||||
}),
|
||||
)
|
||||
.context(ComputeArrowSnafu)?,
|
||||
.context(ComputeArrowSnafu)?,
|
||||
);
|
||||
|
||||
arrays.push(
|
||||
@@ -379,7 +361,7 @@ where
|
||||
check_bounds: false,
|
||||
}),
|
||||
)
|
||||
.context(ComputeArrowSnafu)?,
|
||||
.context(ComputeArrowSnafu)?,
|
||||
);
|
||||
|
||||
let batch = RecordBatch::try_new(self.arrow_schema, arrays).context(NewRecordBatchSnafu)?;
|
||||
@@ -391,7 +373,7 @@ where
|
||||
fn timestamp_array_to_iter(
|
||||
timestamp_unit: TimeUnit,
|
||||
timestamp: &ArrayRef,
|
||||
) -> impl Iterator<Item=&i64> {
|
||||
) -> impl Iterator<Item = &i64> {
|
||||
match timestamp_unit {
|
||||
// safety: timestamp column must be valid.
|
||||
TimeUnit::Second => timestamp
|
||||
@@ -453,7 +435,7 @@ fn binary_array_to_dictionary(input: &BinaryArray) -> Result<PrimaryKeyArray> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
mod tests {
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use datafusion_common::ScalarValue;
|
||||
@@ -519,19 +501,19 @@ pub(crate) mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) struct MutationInput<'a> {
|
||||
pub(crate) k0: &'a str,
|
||||
pub(crate) k1: u32,
|
||||
pub(crate) timestamps: &'a [i64],
|
||||
pub(crate) v1: &'a [Option<f64>],
|
||||
pub(crate) sequence: u64,
|
||||
struct MutationInput<'a> {
|
||||
k0: &'a str,
|
||||
k1: u32,
|
||||
timestamps: &'a [i64],
|
||||
v1: &'a [Option<f64>],
|
||||
sequence: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialOrd, PartialEq)]
|
||||
pub(crate) struct BatchOutput<'a> {
|
||||
pub(crate) pk_values: &'a [Value],
|
||||
pub(crate) timestamps: &'a [i64],
|
||||
pub(crate) v1: &'a [Option<f64>],
|
||||
struct BatchOutput<'a> {
|
||||
pk_values: &'a [Value],
|
||||
timestamps: &'a [i64],
|
||||
v1: &'a [Option<f64>],
|
||||
}
|
||||
|
||||
fn check_mutations_to_record_batches(
|
||||
@@ -552,7 +534,7 @@ pub(crate) mod tests {
|
||||
m.v1.iter().copied(),
|
||||
m.sequence,
|
||||
)
|
||||
.mutation
|
||||
.mutation
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let total_rows: usize = mutations
|
||||
@@ -561,9 +543,9 @@ pub(crate) mod tests {
|
||||
.map(|r| r.rows.len())
|
||||
.sum();
|
||||
|
||||
let pk_encoder = SparseEncoder::new(&metadata);
|
||||
let pk_encoder = DensePrimaryKeyCodec::new(&metadata);
|
||||
|
||||
let (batch, _, _,_) = mutations_to_record_batch(&mutations, &metadata, dedup)
|
||||
let (batch, _, _) = mutations_to_record_batch(&mutations, &metadata, &pk_encoder, dedup)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let read_format = ReadFormat::new_with_all_columns(metadata.clone());
|
||||
@@ -580,7 +562,7 @@ pub(crate) mod tests {
|
||||
let batch_values = batches
|
||||
.into_iter()
|
||||
.map(|b| {
|
||||
let pk_values = pk_encoder.decode(b.primary_key()).unwrap();
|
||||
let pk_values = pk_encoder.decode(b.primary_key()).unwrap().into_dense();
|
||||
let timestamps = b
|
||||
.timestamps()
|
||||
.as_any()
|
||||
@@ -760,7 +742,7 @@ pub(crate) mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn encode(input: &[MutationInput]) -> BulkPart {
|
||||
fn encode(input: &[MutationInput]) -> BulkPart {
|
||||
let metadata = metadata_for_test();
|
||||
let mutations = input
|
||||
.iter()
|
||||
@@ -773,7 +755,7 @@ pub(crate) mod tests {
|
||||
m.v1.iter().copied(),
|
||||
m.sequence,
|
||||
)
|
||||
.mutation
|
||||
.mutation
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let encoder = BulkPartEncoder::new(metadata, true, 1024);
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Sparse primary key encoder;
|
||||
|
||||
use datatypes::prelude::ValueRef;
|
||||
use memcomparable::Serializer;
|
||||
use serde::Serialize;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::ColumnId;
|
||||
|
||||
use crate::error::SerializeFieldSnafu;
|
||||
use crate::row_converter::SortField;
|
||||
|
||||
pub(crate) struct FieldWithId {
|
||||
pub(crate) field: SortField,
|
||||
pub(crate) column_id: ColumnId,
|
||||
}
|
||||
|
||||
pub(crate) struct SparseEncoder {
|
||||
pub(crate) columns: Vec<FieldWithId>,
|
||||
#[cfg(test)]
|
||||
pub(crate) column_id_to_field: std::collections::HashMap<ColumnId, (SortField, usize)>,
|
||||
}
|
||||
|
||||
impl SparseEncoder {
|
||||
pub(crate) fn new(metadata: &RegionMetadataRef) -> Self {
|
||||
let mut columns = Vec::with_capacity(metadata.primary_key.len());
|
||||
#[cfg(test)]
|
||||
let mut column_id_to_field =
|
||||
std::collections::HashMap::with_capacity(metadata.primary_key.len());
|
||||
for (_idx, c) in metadata.primary_key_columns().enumerate() {
|
||||
let sort_field = SortField::new(c.column_schema.data_type.clone());
|
||||
|
||||
let field = FieldWithId {
|
||||
field: sort_field.clone(),
|
||||
column_id: c.column_id,
|
||||
};
|
||||
columns.push(field);
|
||||
#[cfg(test)]
|
||||
column_id_to_field.insert(c.column_id, (sort_field, _idx));
|
||||
}
|
||||
Self {
|
||||
columns,
|
||||
#[cfg(test)]
|
||||
column_id_to_field,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> crate::error::Result<()>
|
||||
where
|
||||
I: Iterator<Item = ValueRef<'a>>,
|
||||
{
|
||||
let mut serializer = Serializer::new(buffer);
|
||||
for (value, field) in row.zip(self.columns.iter()) {
|
||||
if !value.is_null() {
|
||||
field
|
||||
.column_id
|
||||
.serialize(&mut serializer)
|
||||
.context(SerializeFieldSnafu)?;
|
||||
field.field.serialize(&mut serializer, &value)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn decode(&self, bytes: &[u8]) -> crate::error::Result<Vec<datatypes::value::Value>> {
|
||||
use serde::Deserialize;
|
||||
let mut deserializer = memcomparable::Deserializer::new(bytes);
|
||||
let mut values = vec![datatypes::value::Value::Null; self.columns.len()];
|
||||
|
||||
while deserializer.has_remaining() {
|
||||
let column_id =
|
||||
u32::deserialize(&mut deserializer).context(crate::error::DeserializeFieldSnafu)?;
|
||||
let (field, idx) = self.column_id_to_field.get(&column_id).unwrap();
|
||||
let value = field.deserialize(&mut deserializer)?;
|
||||
values[*idx] = value;
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
@@ -394,7 +394,6 @@ mod tests {
|
||||
sequence: START_SEQ,
|
||||
rows: Some(rows),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -433,7 +432,6 @@ mod tests {
|
||||
sequence: 100,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
let kvs = KeyValues::new(&meta, mutation);
|
||||
assert!(kvs.is_none());
|
||||
|
||||
@@ -731,7 +731,6 @@ mod tests {
|
||||
rows,
|
||||
}),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
@@ -23,15 +23,18 @@ use common_recordbatch::filter::SimpleFilterEvaluator;
|
||||
use common_time::Timestamp;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::prelude::ValueRef;
|
||||
use snafu::ensure;
|
||||
use memcomparable::Serializer;
|
||||
use serde::Serialize;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::{ColumnId, SequenceNumber};
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error::{EncodeSparsePrimaryKeySnafu, PrimaryKeyLengthMismatchSnafu, Result};
|
||||
use crate::error::{
|
||||
EncodeSparsePrimaryKeySnafu, PrimaryKeyLengthMismatchSnafu, Result, SerializeFieldSnafu,
|
||||
};
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::encoder::SparseEncoder;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::partition_tree::partition::{
|
||||
Partition, PartitionKey, PartitionReader, PartitionRef, ReadPartitionContext,
|
||||
@@ -43,7 +46,7 @@ use crate::metrics::{PARTITION_TREE_READ_STAGE_ELAPSED, READ_ROWS_TOTAL, READ_ST
|
||||
use crate::read::dedup::LastNonNullIter;
|
||||
use crate::read::Batch;
|
||||
use crate::region::options::MergeMode;
|
||||
use crate::row_converter::PrimaryKeyCodec;
|
||||
use crate::row_converter::{PrimaryKeyCodec, SortField};
|
||||
|
||||
/// The partition tree.
|
||||
pub struct PartitionTree {
|
||||
@@ -70,7 +73,15 @@ impl PartitionTree {
|
||||
config: &PartitionTreeConfig,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
) -> Self {
|
||||
let sparse_encoder = SparseEncoder::new(&metadata);
|
||||
let sparse_encoder = SparseEncoder {
|
||||
fields: metadata
|
||||
.primary_key_columns()
|
||||
.map(|c| FieldWithId {
|
||||
field: SortField::new(c.column_schema.data_type.clone()),
|
||||
column_id: c.column_id,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let is_partitioned = Partition::has_multi_partitions(&metadata);
|
||||
let mut config = config.clone();
|
||||
if config.merge_mode == MergeMode::LastNonNull {
|
||||
@@ -425,6 +436,34 @@ impl PartitionTree {
|
||||
}
|
||||
}
|
||||
|
||||
struct FieldWithId {
|
||||
field: SortField,
|
||||
column_id: ColumnId,
|
||||
}
|
||||
|
||||
struct SparseEncoder {
|
||||
fields: Vec<FieldWithId>,
|
||||
}
|
||||
|
||||
impl SparseEncoder {
|
||||
fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
|
||||
where
|
||||
I: Iterator<Item = ValueRef<'a>>,
|
||||
{
|
||||
let mut serializer = Serializer::new(buffer);
|
||||
for (value, field) in row.zip(self.fields.iter()) {
|
||||
if !value.is_null() {
|
||||
field
|
||||
.column_id
|
||||
.serialize(&mut serializer)
|
||||
.context(SerializeFieldSnafu)?;
|
||||
field.field.serialize(&mut serializer, &value)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TreeIterMetrics {
|
||||
iter_elapsed: Duration,
|
||||
|
||||
@@ -29,7 +29,7 @@ use store_api::metadata::RegionMetadataRef;
|
||||
use crate::error::{InvalidRequestSnafu, Result};
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::version::SmallMemtableVec;
|
||||
use crate::memtable::{BulkPart, KeyValues, MemtableBuilderRef, MemtableId, MemtableRef};
|
||||
use crate::memtable::{KeyValues, MemtableBuilderRef, MemtableId, MemtableRef};
|
||||
|
||||
/// A partition holds rows with timestamps between `[min, max)`.
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -141,18 +141,6 @@ impl TimePartitions {
|
||||
self.write_multi_parts(kvs, &parts)
|
||||
}
|
||||
|
||||
/// Write bulk to the memtables.
|
||||
///
|
||||
/// It creates new partitions if necessary.
|
||||
pub fn write_bulk(&self, bulk_part: BulkPart) -> Result<()> {
|
||||
// Get all parts.
|
||||
let parts = self.list_partitions();
|
||||
|
||||
// TODO(yingwen): Now we never flush so we always have a partition.
|
||||
let last_part = parts.last().unwrap();
|
||||
last_part.memtable.write_bulk(bulk_part)
|
||||
}
|
||||
|
||||
/// Append memtables in partitions to `memtables`.
|
||||
pub fn list_memtables(&self, memtables: &mut Vec<MemtableRef>) {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
|
||||
@@ -56,7 +56,10 @@ use crate::region::options::MergeMode;
|
||||
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
|
||||
|
||||
/// Initial vector builder capacity.
|
||||
const INITIAL_BUILDER_CAPACITY: usize = 0;
|
||||
const INITIAL_BUILDER_CAPACITY: usize = 16;
|
||||
|
||||
/// Vector builder capacity.
|
||||
const BUILDER_CAPACITY: usize = 512;
|
||||
|
||||
/// Builder to build [TimeSeriesMemtable].
|
||||
#[derive(Debug, Default)]
|
||||
@@ -156,9 +159,7 @@ impl TimeSeriesMemtable {
|
||||
);
|
||||
|
||||
let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
|
||||
let fields = kv.fields().collect::<Vec<_>>();
|
||||
|
||||
stats.value_bytes += fields.iter().map(|v| v.data_size()).sum::<usize>();
|
||||
let (series, series_allocated) = self.series_set.get_or_add_series(primary_key_encoded);
|
||||
stats.key_bytes += series_allocated;
|
||||
|
||||
@@ -168,7 +169,8 @@ impl TimeSeriesMemtable {
|
||||
stats.max_ts = stats.max_ts.max(ts);
|
||||
|
||||
let mut guard = series.write().unwrap();
|
||||
guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), fields);
|
||||
let size = guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), kv.fields());
|
||||
stats.value_bytes += size;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -619,6 +621,7 @@ struct Series {
|
||||
pk_cache: Option<Vec<Value>>,
|
||||
active: ValueBuilder,
|
||||
frozen: Vec<Values>,
|
||||
region_metadata: RegionMetadataRef,
|
||||
}
|
||||
|
||||
impl Series {
|
||||
@@ -627,12 +630,24 @@ impl Series {
|
||||
pk_cache: None,
|
||||
active: ValueBuilder::new(region_metadata, INITIAL_BUILDER_CAPACITY),
|
||||
frozen: vec![],
|
||||
region_metadata: region_metadata.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes a row of values into Series.
|
||||
fn push(&mut self, ts: ValueRef, sequence: u64, op_type: OpType, values: Vec<ValueRef>) {
|
||||
self.active.push(ts, sequence, op_type as u8, values);
|
||||
/// Pushes a row of values into Series. Return the size of values.
|
||||
fn push<'a>(
|
||||
&mut self,
|
||||
ts: ValueRef<'a>,
|
||||
sequence: u64,
|
||||
op_type: OpType,
|
||||
values: impl Iterator<Item = ValueRef<'a>>,
|
||||
) -> usize {
|
||||
// + 10 to avoid potential reallocation.
|
||||
if self.active.len() + 10 > BUILDER_CAPACITY {
|
||||
let region_metadata = self.region_metadata.clone();
|
||||
self.freeze(®ion_metadata);
|
||||
}
|
||||
self.active.push(ts, sequence, op_type as u8, values)
|
||||
}
|
||||
|
||||
fn update_pk_cache(&mut self, pk_values: Vec<Value>) {
|
||||
@@ -729,27 +744,45 @@ impl ValueBuilder {
|
||||
|
||||
/// Pushes a new row to `ValueBuilder`.
|
||||
/// We don't need primary keys since they've already be encoded.
|
||||
fn push(&mut self, ts: ValueRef, sequence: u64, op_type: u8, fields: Vec<ValueRef>) {
|
||||
debug_assert_eq!(fields.len(), self.fields.len());
|
||||
/// Returns the size of field values.
|
||||
///
|
||||
/// In this method, we don't check the data type of the value, because it is already checked in the caller.
|
||||
fn push<'a>(
|
||||
&mut self,
|
||||
ts: ValueRef,
|
||||
sequence: u64,
|
||||
op_type: u8,
|
||||
fields: impl Iterator<Item = ValueRef<'a>>,
|
||||
) -> usize {
|
||||
#[cfg(debug_assertions)]
|
||||
let fields = {
|
||||
let field_vec = fields.collect::<Vec<_>>();
|
||||
debug_assert_eq!(field_vec.len(), self.fields.len());
|
||||
field_vec.into_iter()
|
||||
};
|
||||
|
||||
self.timestamp
|
||||
.push(ts.as_timestamp().unwrap().unwrap().value());
|
||||
self.sequence.push(sequence);
|
||||
self.op_type.push(op_type);
|
||||
let num_rows = self.timestamp.len();
|
||||
for (idx, field_value) in fields.into_iter().enumerate() {
|
||||
let mut size = 0;
|
||||
for (idx, field_value) in fields.enumerate() {
|
||||
size += field_value.data_size();
|
||||
if !field_value.is_null() || self.fields[idx].is_some() {
|
||||
self.fields[idx]
|
||||
.get_or_insert_with(|| {
|
||||
// lazy initialize on first non-null value
|
||||
let mut mutable_vector =
|
||||
self.field_types[idx].create_mutable_vector(num_rows);
|
||||
// fill previous rows with nulls
|
||||
mutable_vector.push_nulls(num_rows - 1);
|
||||
mutable_vector
|
||||
})
|
||||
.push_value_ref(field_value);
|
||||
if let Some(field) = self.fields[idx].as_mut() {
|
||||
let _ = field.try_push_value_ref(field_value);
|
||||
} else {
|
||||
let mut mutable_vector = self.field_types[idx]
|
||||
.create_mutable_vector(num_rows.max(INITIAL_BUILDER_CAPACITY));
|
||||
mutable_vector.push_nulls(num_rows - 1);
|
||||
let _ = mutable_vector.try_push_value_ref(field_value);
|
||||
self.fields[idx] = Some(mutable_vector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
/// Returns the length of [ValueBuilder]
|
||||
@@ -912,7 +945,7 @@ impl IterBuilder for TimeSeriesIterBuilder {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
mod tests {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
@@ -929,7 +962,7 @@ pub(crate) mod tests {
|
||||
use super::*;
|
||||
use crate::row_converter::SortField;
|
||||
|
||||
pub(crate) fn schema_for_test() -> RegionMetadataRef {
|
||||
fn schema_for_test() -> RegionMetadataRef {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
@@ -970,8 +1003,8 @@ pub(crate) mod tests {
|
||||
ValueRef::Timestamp(Timestamp::new_millisecond(val))
|
||||
}
|
||||
|
||||
fn field_value_ref(v0: i64, v1: f64) -> Vec<ValueRef<'static>> {
|
||||
vec![ValueRef::Int64(v0), ValueRef::Float64(OrderedFloat(v1))]
|
||||
fn field_value_ref(v0: i64, v1: f64) -> impl Iterator<Item = ValueRef<'static>> {
|
||||
vec![ValueRef::Int64(v0), ValueRef::Float64(OrderedFloat(v1))].into_iter()
|
||||
}
|
||||
|
||||
fn check_values(values: Values, expect: &[(i64, u64, u8, i64, f64)]) {
|
||||
@@ -1033,20 +1066,20 @@ pub(crate) mod tests {
|
||||
ts_value_ref(1),
|
||||
0,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Null, ValueRef::Null],
|
||||
vec![ValueRef::Null, ValueRef::Null].into_iter(),
|
||||
);
|
||||
series.push(
|
||||
ts_value_ref(1),
|
||||
0,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Int64(1), ValueRef::Null],
|
||||
vec![ValueRef::Int64(1), ValueRef::Null].into_iter(),
|
||||
);
|
||||
series.push(ts_value_ref(1), 2, OpType::Put, field_value_ref(2, 10.2));
|
||||
series.push(
|
||||
ts_value_ref(1),
|
||||
3,
|
||||
OpType::Put,
|
||||
vec![ValueRef::Int64(2), ValueRef::Null],
|
||||
vec![ValueRef::Int64(2), ValueRef::Null].into_iter(),
|
||||
);
|
||||
assert_eq!(4, series.active.timestamp.len());
|
||||
assert_eq!(0, series.frozen.len());
|
||||
@@ -1143,7 +1176,7 @@ pub(crate) mod tests {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn build_key_values(schema: &RegionMetadataRef, k0: String, k1: i64, len: usize) -> KeyValues {
|
||||
fn build_key_values(schema: &RegionMetadataRef, k0: String, k1: i64, len: usize) -> KeyValues {
|
||||
let column_schema = schema
|
||||
.column_metadatas
|
||||
.iter()
|
||||
@@ -1186,7 +1219,6 @@ pub(crate) mod tests {
|
||||
rows,
|
||||
}),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
KeyValues::new(schema.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
@@ -26,8 +26,6 @@ pub(crate) mod scan_util;
|
||||
pub(crate) mod seq_scan;
|
||||
pub(crate) mod unordered_scan;
|
||||
|
||||
pub(crate) mod sync;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -284,23 +284,23 @@ impl MergeReaderBuilder {
|
||||
|
||||
/// Metrics for the merge reader.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct Metrics {
|
||||
struct Metrics {
|
||||
/// Total scan cost of the reader.
|
||||
pub(crate) scan_cost: Duration,
|
||||
scan_cost: Duration,
|
||||
/// Number of times to fetch batches.
|
||||
pub(crate) num_fetch_by_batches: usize,
|
||||
num_fetch_by_batches: usize,
|
||||
/// Number of times to fetch rows.
|
||||
pub(crate) num_fetch_by_rows: usize,
|
||||
num_fetch_by_rows: usize,
|
||||
/// Number of input rows.
|
||||
pub(crate) num_input_rows: usize,
|
||||
num_input_rows: usize,
|
||||
/// Number of output rows.
|
||||
pub(crate) num_output_rows: usize,
|
||||
num_output_rows: usize,
|
||||
/// Cost to fetch batches from sources.
|
||||
pub(crate) fetch_cost: Duration,
|
||||
fetch_cost: Duration,
|
||||
}
|
||||
|
||||
/// A `Node` represent an individual input data source to be merged.
|
||||
pub(crate) struct Node {
|
||||
struct Node {
|
||||
/// Data source of this `Node`.
|
||||
source: Source,
|
||||
/// Current batch to be read. The node ensures the batch is not empty.
|
||||
@@ -313,7 +313,7 @@ impl Node {
|
||||
/// Initialize a node.
|
||||
///
|
||||
/// It tries to fetch one batch from the `source`.
|
||||
pub(crate) async fn new(mut source: Source, metrics: &mut Metrics) -> Result<Node> {
|
||||
async fn new(mut source: Source, metrics: &mut Metrics) -> Result<Node> {
|
||||
// Ensures batch is not empty.
|
||||
let start = Instant::now();
|
||||
let current_batch = source.next_batch().await?.map(CompareFirst);
|
||||
@@ -432,7 +432,7 @@ impl Ord for Node {
|
||||
/// Type to compare [Batch] by first row.
|
||||
///
|
||||
/// It ignores op type as sequence is enough to distinguish different rows.
|
||||
pub(crate) struct CompareFirst(pub(crate) Batch);
|
||||
struct CompareFirst(Batch);
|
||||
|
||||
impl PartialEq for CompareFirst {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod merge;
|
||||
pub mod dedup;
|
||||
@@ -1,84 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Sync dedup reader implementation
|
||||
|
||||
use common_telemetry::debug;
|
||||
|
||||
use crate::metrics::MERGE_FILTER_ROWS_TOTAL;
|
||||
use crate::read::dedup::{DedupMetrics, DedupStrategy};
|
||||
use crate::read::Batch;
|
||||
|
||||
/// A sync version of reader that dedup sorted batches from a source based on the
|
||||
/// dedup strategy.
|
||||
pub(crate) struct DedupReader<R, S> {
|
||||
source: R,
|
||||
strategy: S,
|
||||
metrics: DedupMetrics,
|
||||
}
|
||||
|
||||
impl<R, S> DedupReader<R, S> {
|
||||
/// Creates a new dedup reader.
|
||||
pub(crate) fn new(source: R, strategy: S) -> Self {
|
||||
Self {
|
||||
source,
|
||||
strategy,
|
||||
metrics: DedupMetrics::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Iterator<Item = crate::error::Result<Batch>>, S: DedupStrategy> DedupReader<R, S> {
|
||||
/// Returns the next deduplicated batch.
|
||||
fn fetch_next_batch(&mut self) -> Option<crate::error::Result<Batch>> {
|
||||
while let Some(res) = self.source.next() {
|
||||
match res {
|
||||
Ok(batch) => {
|
||||
if let Some(batch) = self
|
||||
.strategy
|
||||
.push_batch(batch, &mut self.metrics)
|
||||
.transpose()
|
||||
{
|
||||
return Some(batch);
|
||||
}
|
||||
}
|
||||
Err(err) => return Some(Err(err)),
|
||||
}
|
||||
}
|
||||
self.strategy.finish(&mut self.metrics).transpose()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Iterator<Item = crate::error::Result<Batch>>, S: DedupStrategy> Iterator
|
||||
for DedupReader<R, S>
|
||||
{
|
||||
type Item = crate::error::Result<Batch>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.fetch_next_batch()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R, S> Drop for DedupReader<R, S> {
|
||||
fn drop(&mut self) {
|
||||
debug!("Sync dedup reader finished, metrics: {:?}", self.metrics);
|
||||
|
||||
MERGE_FILTER_ROWS_TOTAL
|
||||
.with_label_values(&["dedup"])
|
||||
.inc_by(self.metrics.num_unselected_rows as u64);
|
||||
MERGE_FILTER_ROWS_TOTAL
|
||||
.with_label_values(&["delete"])
|
||||
.inc_by(self.metrics.num_unselected_rows as u64);
|
||||
}
|
||||
}
|
||||
@@ -1,384 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Sync merge reader implementation.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::time::Instant;
|
||||
|
||||
use common_telemetry::debug;
|
||||
|
||||
use crate::error;
|
||||
use crate::memtable::BoxedBatchIterator;
|
||||
use crate::metrics::READ_STAGE_ELAPSED;
|
||||
use crate::read::{Batch};
|
||||
use crate::read::merge::{CompareFirst, Metrics};
|
||||
|
||||
/// A `Node` represent an individual input data source to be merged.
|
||||
pub(crate) struct Node {
|
||||
/// Data source of this `Node`.
|
||||
source: BoxedBatchIterator,
|
||||
/// Current batch to be read. The node ensures the batch is not empty.
|
||||
///
|
||||
/// `None` means the `source` has reached EOF.
|
||||
current_batch: Option<CompareFirst>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Initialize a node.
|
||||
///
|
||||
/// It tries to fetch one batch from the `source`.
|
||||
pub(crate) fn new(
|
||||
mut source: BoxedBatchIterator,
|
||||
metrics: &mut Metrics,
|
||||
) -> error::Result<Node> {
|
||||
// Ensures batch is not empty.
|
||||
let start = Instant::now();
|
||||
let current_batch = source.next().transpose()?.map(CompareFirst);
|
||||
metrics.fetch_cost += start.elapsed();
|
||||
metrics.num_input_rows += current_batch.as_ref().map(|b| b.0.num_rows()).unwrap_or(0);
|
||||
|
||||
Ok(Node {
|
||||
source,
|
||||
current_batch,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns whether the node still has batch to read.
|
||||
fn is_eof(&self) -> bool {
|
||||
self.current_batch.is_none()
|
||||
}
|
||||
|
||||
/// Returns the primary key of current batch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the node has reached EOF.
|
||||
fn primary_key(&self) -> &[u8] {
|
||||
self.current_batch().primary_key()
|
||||
}
|
||||
|
||||
/// Returns current batch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the node has reached EOF.
|
||||
fn current_batch(&self) -> &Batch {
|
||||
&self.current_batch.as_ref().unwrap().0
|
||||
}
|
||||
|
||||
/// Returns current batch and fetches next batch
|
||||
/// from the source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the node has reached EOF.
|
||||
fn fetch_batch(&mut self, metrics: &mut Metrics) -> error::Result<Batch> {
|
||||
let current = self.current_batch.take().unwrap();
|
||||
let start = Instant::now();
|
||||
// Ensures batch is not empty.
|
||||
self.current_batch = self.source.next().transpose()?.map(CompareFirst);
|
||||
metrics.fetch_cost += start.elapsed();
|
||||
metrics.num_input_rows += self
|
||||
.current_batch
|
||||
.as_ref()
|
||||
.map(|b| b.0.num_rows())
|
||||
.unwrap_or(0);
|
||||
Ok(current.0)
|
||||
}
|
||||
|
||||
/// Returns true if the key range of current batch in `self` is behind (exclusive) current
|
||||
/// batch in `other`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if either `self` or `other` is EOF.
|
||||
fn is_behind(&self, other: &Node) -> bool {
|
||||
debug_assert!(!self.current_batch().is_empty());
|
||||
debug_assert!(!other.current_batch().is_empty());
|
||||
|
||||
// We only compare pk and timestamp so nodes in the cold
|
||||
// heap don't have overlapping timestamps with the hottest node
|
||||
// in the hot heap.
|
||||
self.primary_key().cmp(other.primary_key()).then_with(|| {
|
||||
self.current_batch()
|
||||
.first_timestamp()
|
||||
.cmp(&other.current_batch().last_timestamp())
|
||||
}) == Ordering::Greater
|
||||
}
|
||||
|
||||
/// Skips first `num_to_skip` rows from node's current batch. If current batch is empty it fetches
|
||||
/// next batch from the node.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the node is EOF.
|
||||
fn skip_rows(&mut self, num_to_skip: usize, metrics: &mut Metrics) -> error::Result<()> {
|
||||
let batch = self.current_batch();
|
||||
debug_assert!(batch.num_rows() >= num_to_skip);
|
||||
|
||||
let remaining = batch.num_rows() - num_to_skip;
|
||||
if remaining == 0 {
|
||||
// Nothing remains, we need to fetch next batch to ensure the batch is not empty.
|
||||
self.fetch_batch(metrics)?;
|
||||
} else {
|
||||
debug_assert!(!batch.is_empty());
|
||||
self.current_batch = Some(CompareFirst(batch.slice(num_to_skip, remaining)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Node {
|
||||
fn eq(&self, other: &Node) -> bool {
|
||||
self.current_batch == other.current_batch
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Node {}
|
||||
|
||||
impl PartialOrd for Node {
|
||||
fn partial_cmp(&self, other: &Node) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Node {
|
||||
fn cmp(&self, other: &Node) -> Ordering {
|
||||
// The std binary heap is a max heap, but we want the nodes are ordered in
|
||||
// ascend order, so we compare the nodes in reverse order.
|
||||
other.current_batch.cmp(&self.current_batch)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reader to merge sorted batches.
|
||||
///
|
||||
/// The merge reader merges [Batch]es from multiple sources that yield sorted batches.
|
||||
/// 1. Batch is ordered by primary key, time index, sequence desc, op type desc (we can
|
||||
/// ignore op type as sequence is already unique).
|
||||
/// 2. Batches from sources **must** not be empty.
|
||||
///
|
||||
/// The reader won't concatenate batches. Each batch returned by the reader also doesn't
|
||||
/// contain duplicate rows. But the last (primary key, timestamp) of a batch may be the same
|
||||
/// as the first one in the next batch.
|
||||
pub struct MergeReader {
|
||||
/// Holds [Node]s whose key range of current batch **is** overlapped with the merge window.
|
||||
/// Each node yields batches from a `source`.
|
||||
///
|
||||
/// [Node] in this heap **must** not be empty. A `merge window` is the (primary key, timestamp)
|
||||
/// range of the **root node** in the `hot` heap.
|
||||
hot: BinaryHeap<Node>,
|
||||
/// Holds `Node` whose key range of current batch **isn't** overlapped with the merge window.
|
||||
///
|
||||
/// `Node` in this heap **must** not be empty.
|
||||
cold: BinaryHeap<Node>,
|
||||
/// Batch to output.
|
||||
output_batch: Option<Batch>,
|
||||
/// Local metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
impl Drop for MergeReader {
|
||||
fn drop(&mut self) {
|
||||
debug!("Merge reader(sync) finished, metrics: {:?}", self.metrics);
|
||||
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["merge"])
|
||||
.observe(self.metrics.scan_cost.as_secs_f64());
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["merge_fetch"])
|
||||
.observe(self.metrics.fetch_cost.as_secs_f64());
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for MergeReader {
|
||||
type Item = error::Result<Batch>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = Instant::now();
|
||||
while !self.hot.is_empty() && self.output_batch.is_none() {
|
||||
if self.hot.len() == 1 {
|
||||
// No need to do merge sort if only one batch in the hot heap.
|
||||
if let Err(e) = self.fetch_batch_from_hottest() {
|
||||
return Some(Err(e));
|
||||
}
|
||||
self.metrics.num_fetch_by_batches += 1;
|
||||
} else {
|
||||
// We could only fetch rows that less than the next node from the hottest node.
|
||||
if let Err(e) = self.fetch_rows_from_hottest() {
|
||||
return Some(Err(e));
|
||||
}
|
||||
self.metrics.num_fetch_by_rows += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(batch) = self.output_batch.take() {
|
||||
self.metrics.scan_cost += start.elapsed();
|
||||
self.metrics.num_output_rows += batch.num_rows();
|
||||
Some(Ok(batch))
|
||||
} else {
|
||||
// Nothing fetched.
|
||||
self.metrics.scan_cost += start.elapsed();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeReader {
|
||||
/// Creates and initializes a new [MergeReader].
|
||||
pub fn new(sources: Vec<BoxedBatchIterator>) -> error::Result<MergeReader> {
|
||||
let start = Instant::now();
|
||||
let mut metrics = Metrics::default();
|
||||
|
||||
let mut cold = BinaryHeap::with_capacity(sources.len());
|
||||
let hot = BinaryHeap::with_capacity(sources.len());
|
||||
for source in sources {
|
||||
let node = Node::new(source, &mut metrics)?;
|
||||
if !node.is_eof() {
|
||||
// Ensure `cold` don't have eof nodes.
|
||||
cold.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
let mut reader = MergeReader {
|
||||
hot,
|
||||
cold,
|
||||
output_batch: None,
|
||||
metrics,
|
||||
};
|
||||
// Initializes the reader.
|
||||
reader.refill_hot();
|
||||
|
||||
reader.metrics.scan_cost += start.elapsed();
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
/// Moves nodes in `cold` heap, whose key range is overlapped with current merge
|
||||
/// window to `hot` heap.
|
||||
fn refill_hot(&mut self) {
|
||||
while !self.cold.is_empty() {
|
||||
if let Some(merge_window) = self.hot.peek() {
|
||||
let warmest = self.cold.peek().unwrap();
|
||||
if warmest.is_behind(merge_window) {
|
||||
// if the warmest node in the `cold` heap is totally after the
|
||||
// `merge_window`, then no need to add more nodes into the `hot`
|
||||
// heap for merge sorting.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let warmest = self.cold.pop().unwrap();
|
||||
self.hot.push(warmest);
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches one batch from the hottest node.
|
||||
fn fetch_batch_from_hottest(&mut self) -> error::Result<()> {
|
||||
assert_eq!(1, self.hot.len());
|
||||
|
||||
let mut hottest = self.hot.pop().unwrap();
|
||||
let batch = hottest.fetch_batch(&mut self.metrics)?;
|
||||
Self::maybe_output_batch(batch, &mut self.output_batch)?;
|
||||
self.reheap(hottest)
|
||||
}
|
||||
|
||||
/// Fetches non-duplicated rows from the hottest node.
|
||||
fn fetch_rows_from_hottest(&mut self) -> error::Result<()> {
|
||||
// Safety: `fetch_batches_to_output()` ensures the hot heap has more than 1 element.
|
||||
// Pop hottest node.
|
||||
let mut top_node = self.hot.pop().unwrap();
|
||||
let top = top_node.current_batch();
|
||||
// Min timestamp and its sequence in the next batch.
|
||||
let next_min_ts = {
|
||||
let next_node = self.hot.peek().unwrap();
|
||||
let next = next_node.current_batch();
|
||||
// top and next have overlapping rows so they must have same primary keys.
|
||||
debug_assert_eq!(top.primary_key(), next.primary_key());
|
||||
// Safety: Batches in the heap is not empty, so we can use unwrap here.
|
||||
next.first_timestamp().unwrap()
|
||||
};
|
||||
|
||||
// Safety: Batches in the heap is not empty, so we can use unwrap here.
|
||||
let timestamps = top.timestamps_native().unwrap();
|
||||
// Binary searches the timestamp in the top batch.
|
||||
// Safety: Batches should have the same timestamp resolution so we can compare the native
|
||||
// value directly.
|
||||
let duplicate_pos = match timestamps.binary_search(&next_min_ts.value()) {
|
||||
Ok(pos) => pos,
|
||||
Err(pos) => {
|
||||
// No duplicate timestamp. Outputs timestamp before `pos`.
|
||||
Self::maybe_output_batch(top.slice(0, pos), &mut self.output_batch)?;
|
||||
top_node.skip_rows(pos, &mut self.metrics)?;
|
||||
return self.reheap(top_node);
|
||||
}
|
||||
};
|
||||
|
||||
// No need to remove duplicate timestamps.
|
||||
let output_end = if duplicate_pos == 0 {
|
||||
// If the first timestamp of the top node is duplicate. We can simply return the first row
|
||||
// as the heap ensure it is the one with largest sequence.
|
||||
1
|
||||
} else {
|
||||
// We don't know which one has the larger sequence so we use the range before
|
||||
// the duplicate pos.
|
||||
duplicate_pos
|
||||
};
|
||||
Self::maybe_output_batch(top.slice(0, output_end), &mut self.output_batch)?;
|
||||
top_node.skip_rows(output_end, &mut self.metrics)?;
|
||||
self.reheap(top_node)
|
||||
}
|
||||
|
||||
/// Push the node popped from `hot` back to a proper heap.
|
||||
fn reheap(&mut self, node: Node) -> crate::error::Result<()> {
|
||||
if node.is_eof() {
|
||||
// If the node is EOF, don't put it into the heap again.
|
||||
// The merge window would be updated, need to refill the hot heap.
|
||||
self.refill_hot();
|
||||
} else {
|
||||
// Find a proper heap for this node.
|
||||
let node_is_cold = if let Some(hottest) = self.hot.peek() {
|
||||
// If key range of this node is behind the hottest node's then we can
|
||||
// push it to the cold heap. Otherwise we should push it to the hot heap.
|
||||
node.is_behind(hottest)
|
||||
} else {
|
||||
// The hot heap is empty, but we don't known whether the current
|
||||
// batch of this node is still the hottest.
|
||||
true
|
||||
};
|
||||
|
||||
if node_is_cold {
|
||||
self.cold.push(node);
|
||||
} else {
|
||||
self.hot.push(node);
|
||||
}
|
||||
// Anyway, the merge window has been changed, we need to refill the hot heap.
|
||||
self.refill_hot();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// If `filter_deleted` is set to true, removes deleted entries and sets the `batch` to the `output_batch`.
|
||||
///
|
||||
/// Ignores the `batch` if it is empty.
|
||||
fn maybe_output_batch(
|
||||
batch: Batch,
|
||||
output_batch: &mut Option<Batch>,
|
||||
) -> crate::error::Result<()> {
|
||||
debug_assert!(output_batch.is_none());
|
||||
if batch.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
*output_batch = Some(batch);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -544,19 +544,17 @@ where
|
||||
.as_ref()
|
||||
.map(|rows| rows.rows.len())
|
||||
.unwrap_or(0);
|
||||
// TODO(yingwen): We need to support schema change as bulk may have different schema.
|
||||
region_write_ctx.push_mutation(
|
||||
mutation.op_type,
|
||||
mutation.rows,
|
||||
mutation.write_hint,
|
||||
mutation.bulk,
|
||||
OptionOutputTx::none(),
|
||||
);
|
||||
}
|
||||
|
||||
// set next_entry_id and write to memtable.
|
||||
region_write_ctx.set_next_entry_id(last_entry_id + 1);
|
||||
region_write_ctx.write_memtable();
|
||||
region_write_ctx.write_memtable().await;
|
||||
}
|
||||
|
||||
// TODO(weny): We need to update `flushed_entry_id` in the region manifest
|
||||
|
||||
@@ -16,19 +16,16 @@ use std::mem;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{Mutation, OpType, Rows, WalEntry, WriteHint};
|
||||
use futures::future::try_join_all;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use snafu::ResultExt;
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::storage::{RegionId, SequenceNumber};
|
||||
|
||||
use crate::error::{Error, JoinSnafu, Result, WriteGroupSnafu};
|
||||
use crate::memtable::bulk::part::BulkPartEncoder;
|
||||
use crate::memtable::{BulkPart, KeyValues};
|
||||
use crate::error::{Error, Result, WriteGroupSnafu};
|
||||
use crate::memtable::KeyValues;
|
||||
use crate::region::version::{VersionControlData, VersionControlRef, VersionRef};
|
||||
use crate::request::OptionOutputTx;
|
||||
use crate::sst::parquet::DEFAULT_ROW_GROUP_SIZE;
|
||||
use crate::wal::{EntryId, WalWriter};
|
||||
|
||||
/// Notifier to notify write result on drop.
|
||||
@@ -97,8 +94,6 @@ pub(crate) struct RegionWriteCtx {
|
||||
notifiers: Vec<WriteNotify>,
|
||||
/// The write operation is failed and we should not write to the mutable memtable.
|
||||
failed: bool,
|
||||
/// Bulk parts to write to the memtable.
|
||||
bulk_parts: Vec<Option<BulkPart>>,
|
||||
|
||||
// Metrics:
|
||||
/// Rows to put.
|
||||
@@ -131,7 +126,6 @@ impl RegionWriteCtx {
|
||||
provider,
|
||||
notifiers: Vec::new(),
|
||||
failed: false,
|
||||
bulk_parts: Vec::new(),
|
||||
put_num: 0,
|
||||
delete_num: 0,
|
||||
}
|
||||
@@ -143,7 +137,6 @@ impl RegionWriteCtx {
|
||||
op_type: i32,
|
||||
rows: Option<Rows>,
|
||||
write_hint: Option<WriteHint>,
|
||||
bulk: Vec<u8>,
|
||||
tx: OptionOutputTx,
|
||||
) {
|
||||
let num_rows = rows.as_ref().map(|rows| rows.rows.len()).unwrap_or(0);
|
||||
@@ -152,7 +145,6 @@ impl RegionWriteCtx {
|
||||
sequence: self.next_sequence,
|
||||
rows,
|
||||
write_hint,
|
||||
bulk,
|
||||
});
|
||||
|
||||
let notify = WriteNotify::new(tx, num_rows);
|
||||
@@ -206,43 +198,43 @@ impl RegionWriteCtx {
|
||||
}
|
||||
|
||||
/// Consumes mutations and writes them into mutable memtable.
|
||||
pub(crate) fn write_memtable(&mut self) {
|
||||
pub(crate) async fn write_memtable(&mut self) {
|
||||
debug_assert_eq!(self.notifiers.len(), self.wal_entry.mutations.len());
|
||||
|
||||
if self.failed {
|
||||
return;
|
||||
}
|
||||
|
||||
let mutable = &self.version.memtables.mutable;
|
||||
let mutable = self.version.memtables.mutable.clone();
|
||||
let mutations = mem::take(&mut self.wal_entry.mutations)
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, mutation)| {
|
||||
let kvs = KeyValues::new(&self.version.metadata, mutation)?;
|
||||
Some((i, kvs))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if self.version().metadata.primary_key_encoding == PrimaryKeyEncoding::Dense {
|
||||
let mutations = mem::take(&mut self.wal_entry.mutations);
|
||||
for (mutation, notify) in mutations.into_iter().zip(&mut self.notifiers) {
|
||||
// Write mutation to the memtable.
|
||||
let Some(kvs) = KeyValues::new(&self.version.metadata, mutation) else {
|
||||
continue;
|
||||
};
|
||||
if let Err(e) = mutable.write(&kvs) {
|
||||
notify.err = Some(Arc::new(e));
|
||||
}
|
||||
if mutations.len() == 1 {
|
||||
if let Err(err) = mutable.write(&mutations[0].1) {
|
||||
self.notifiers[mutations[0].0].err = Some(Arc::new(err));
|
||||
}
|
||||
} else {
|
||||
// Takes mutations from the wal entry.
|
||||
let bulk_parts = mem::take(&mut self.bulk_parts);
|
||||
for (bulk_part, notify) in bulk_parts.into_iter().zip(&mut self.notifiers) {
|
||||
// Write mutation to the memtable.
|
||||
let Some(bulk_part) = bulk_part else {
|
||||
continue;
|
||||
};
|
||||
if let Err(e) = mutable.write_bulk(bulk_part) {
|
||||
notify.err = Some(Arc::new(e));
|
||||
let mut tasks = FuturesUnordered::new();
|
||||
for (i, kvs) in mutations {
|
||||
let mutable = mutable.clone();
|
||||
// use tokio runtime to schedule tasks.
|
||||
tasks.push(common_runtime::spawn_blocking_global(move || {
|
||||
(i, mutable.write(&kvs))
|
||||
}));
|
||||
}
|
||||
|
||||
while let Some(result) = tasks.next().await {
|
||||
// first unwrap the result from `spawn` above
|
||||
let (i, result) = result.unwrap();
|
||||
if let Err(err) = result {
|
||||
self.notifiers[i].err = Some(Arc::new(err));
|
||||
}
|
||||
// let Some(kvs) = KeyValues::new(&self.version.metadata, mutation) else {
|
||||
// continue;
|
||||
// };
|
||||
// if let Err(e) = mutable.write(&kvs) {
|
||||
// notify.err = Some(Arc::new(e));
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,35 +243,4 @@ impl RegionWriteCtx {
|
||||
self.version_control
|
||||
.set_sequence_and_entry_id(self.next_sequence - 1, self.next_entry_id - 1);
|
||||
}
|
||||
|
||||
/// Encodes mutations into bulks and clears rows.
|
||||
pub(crate) async fn encode_bulks(&mut self) -> Result<()> {
|
||||
let mut tasks = Vec::with_capacity(self.wal_entry.mutations.len());
|
||||
for mutation in self.wal_entry.mutations.drain(..) {
|
||||
let metadata = self.version.metadata.clone();
|
||||
let task = common_runtime::spawn_global(async move {
|
||||
let encoder = BulkPartEncoder::new(metadata, true, DEFAULT_ROW_GROUP_SIZE);
|
||||
let mutations = [mutation];
|
||||
let part_opt = encoder.encode_mutations(&mutations)?;
|
||||
let [mut mutation] = mutations;
|
||||
// TODO(yingwen): This require clone the data, we should avoid this.
|
||||
mutation.bulk = part_opt
|
||||
.as_ref()
|
||||
.map(|part| part.data.to_vec())
|
||||
.unwrap_or_default();
|
||||
mutation.rows = None;
|
||||
Ok((part_opt, mutation))
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
let results = try_join_all(tasks).await.context(JoinSnafu)?;
|
||||
for result in results {
|
||||
let (part_opt, mutation) = result?;
|
||||
self.wal_entry.mutations.push(mutation);
|
||||
self.bulk_parts.push(part_opt);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,13 +66,20 @@ pub struct WriteRequest {
|
||||
has_null: Vec<bool>,
|
||||
/// Write hint.
|
||||
pub hint: Option<WriteHint>,
|
||||
/// Region metadata on the time of this request is created.
|
||||
pub(crate) region_metadata: Option<RegionMetadataRef>,
|
||||
}
|
||||
|
||||
impl WriteRequest {
|
||||
/// Creates a new request.
|
||||
///
|
||||
/// Returns `Err` if `rows` are invalid.
|
||||
pub fn new(region_id: RegionId, op_type: OpType, rows: Rows) -> Result<WriteRequest> {
|
||||
pub fn new(
|
||||
region_id: RegionId,
|
||||
op_type: OpType,
|
||||
rows: Rows,
|
||||
region_metadata: Option<RegionMetadataRef>,
|
||||
) -> Result<WriteRequest> {
|
||||
let mut name_to_index = HashMap::with_capacity(rows.schema.len());
|
||||
for (index, column) in rows.schema.iter().enumerate() {
|
||||
ensure!(
|
||||
@@ -116,6 +123,7 @@ impl WriteRequest {
|
||||
name_to_index,
|
||||
has_null,
|
||||
hint: None,
|
||||
region_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -248,46 +256,67 @@ impl WriteRequest {
|
||||
pub(crate) fn fill_missing_columns(&mut self, metadata: &RegionMetadata) -> Result<()> {
|
||||
debug_assert_eq!(self.region_id, metadata.region_id);
|
||||
|
||||
let mut columns_to_fill = vec![];
|
||||
for column in &metadata.column_metadatas {
|
||||
if !self.name_to_index.contains_key(&column.column_schema.name) {
|
||||
self.fill_column(column)?;
|
||||
columns_to_fill.push(column);
|
||||
}
|
||||
}
|
||||
self.fill_columns(columns_to_fill)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks the schema and fill missing columns.
|
||||
pub(crate) fn maybe_fill_missing_columns(&mut self, metadata: &RegionMetadata) -> Result<()> {
|
||||
if let Err(e) = self.check_schema(metadata) {
|
||||
if e.is_fill_default() {
|
||||
// TODO(yingwen): Add metrics for this case.
|
||||
// We need to fill default value. The write request may be a request
|
||||
// sent before changing the schema.
|
||||
self.fill_missing_columns(metadata)?;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fills default value for specific `column`.
|
||||
fn fill_column(&mut self, column: &ColumnMetadata) -> Result<()> {
|
||||
// Need to add a default value for this column.
|
||||
let proto_value = self.column_default_value(column)?;
|
||||
|
||||
if proto_value.value_data.is_none() {
|
||||
return Ok(());
|
||||
/// Fills default value for specific `columns`.
|
||||
fn fill_columns(&mut self, columns: Vec<&ColumnMetadata>) -> Result<()> {
|
||||
let mut default_values = Vec::with_capacity(columns.len());
|
||||
let mut columns_to_fill = Vec::with_capacity(columns.len());
|
||||
for column in columns {
|
||||
let default_value = self.column_default_value(column)?;
|
||||
if default_value.value_data.is_some() {
|
||||
default_values.push(default_value);
|
||||
columns_to_fill.push(column);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert default value to each row.
|
||||
for row in &mut self.rows.rows {
|
||||
row.values.push(proto_value.clone());
|
||||
row.values.extend(default_values.iter().cloned());
|
||||
}
|
||||
|
||||
// Insert column schema.
|
||||
let (datatype, datatype_ext) =
|
||||
ColumnDataTypeWrapper::try_from(column.column_schema.data_type.clone())
|
||||
.with_context(|_| ConvertColumnDataTypeSnafu {
|
||||
reason: format!(
|
||||
"no protobuf type for column {} ({:?})",
|
||||
column.column_schema.name, column.column_schema.data_type
|
||||
),
|
||||
})?
|
||||
.to_parts();
|
||||
self.rows.schema.push(ColumnSchema {
|
||||
column_name: column.column_schema.name.clone(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: column.semantic_type as i32,
|
||||
datatype_extension: datatype_ext,
|
||||
options: options_from_column_schema(&column.column_schema),
|
||||
});
|
||||
for column in columns_to_fill {
|
||||
let (datatype, datatype_ext) =
|
||||
ColumnDataTypeWrapper::try_from(column.column_schema.data_type.clone())
|
||||
.with_context(|_| ConvertColumnDataTypeSnafu {
|
||||
reason: format!(
|
||||
"no protobuf type for column {} ({:?})",
|
||||
column.column_schema.name, column.column_schema.data_type
|
||||
),
|
||||
})?
|
||||
.to_parts();
|
||||
self.rows.schema.push(ColumnSchema {
|
||||
column_name: column.column_schema.name.clone(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: column.semantic_type as i32,
|
||||
datatype_extension: datatype_ext,
|
||||
options: options_from_column_schema(&column.column_schema),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -559,19 +588,32 @@ impl WorkerRequest {
|
||||
pub(crate) fn try_from_region_request(
|
||||
region_id: RegionId,
|
||||
value: RegionRequest,
|
||||
region_metadata: Option<RegionMetadataRef>,
|
||||
) -> Result<(WorkerRequest, Receiver<Result<AffectedRows>>)> {
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
let worker_request = match value {
|
||||
RegionRequest::Put(v) => {
|
||||
let write_request =
|
||||
WriteRequest::new(region_id, OpType::Put, v.rows)?.with_hint(v.hint);
|
||||
let mut write_request =
|
||||
WriteRequest::new(region_id, OpType::Put, v.rows, region_metadata.clone())?
|
||||
.with_hint(v.hint);
|
||||
if write_request.primary_key_encoding() == PrimaryKeyEncoding::Dense
|
||||
&& let Some(region_metadata) = ®ion_metadata
|
||||
{
|
||||
write_request.maybe_fill_missing_columns(region_metadata)?;
|
||||
}
|
||||
WorkerRequest::Write(SenderWriteRequest {
|
||||
sender: sender.into(),
|
||||
request: write_request,
|
||||
})
|
||||
}
|
||||
RegionRequest::Delete(v) => {
|
||||
let write_request = WriteRequest::new(region_id, OpType::Delete, v.rows)?;
|
||||
let mut write_request =
|
||||
WriteRequest::new(region_id, OpType::Delete, v.rows, region_metadata.clone())?;
|
||||
if write_request.primary_key_encoding() == PrimaryKeyEncoding::Dense
|
||||
&& let Some(region_metadata) = ®ion_metadata
|
||||
{
|
||||
write_request.maybe_fill_missing_columns(region_metadata)?;
|
||||
}
|
||||
WorkerRequest::Write(SenderWriteRequest {
|
||||
sender: sender.into(),
|
||||
request: write_request,
|
||||
@@ -875,7 +917,7 @@ mod tests {
|
||||
rows: vec![],
|
||||
};
|
||||
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap_err();
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap_err();
|
||||
check_invalid_request(&err, "duplicate column c0");
|
||||
}
|
||||
|
||||
@@ -891,7 +933,7 @@ mod tests {
|
||||
}],
|
||||
};
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
assert_eq!(0, request.column_index_by_name("c0").unwrap());
|
||||
assert_eq!(1, request.column_index_by_name("c1").unwrap());
|
||||
assert_eq!(None, request.column_index_by_name("c2"));
|
||||
@@ -909,7 +951,7 @@ mod tests {
|
||||
}],
|
||||
};
|
||||
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap_err();
|
||||
let err = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap_err();
|
||||
check_invalid_request(&err, "row has 3 columns but schema has 2");
|
||||
}
|
||||
|
||||
@@ -955,7 +997,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
request.check_schema(&metadata).unwrap();
|
||||
}
|
||||
|
||||
@@ -972,7 +1014,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts expect type Timestamp(Millisecond(TimestampMillisecondType)), given: INT64(4)");
|
||||
}
|
||||
@@ -994,7 +1036,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts has semantic type Timestamp, given: TAG(0)");
|
||||
}
|
||||
@@ -1016,7 +1058,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts is not null but input has null");
|
||||
}
|
||||
@@ -1035,7 +1077,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "missing column ts");
|
||||
}
|
||||
@@ -1058,7 +1100,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, r#"unknown columns: ["k1"]"#);
|
||||
}
|
||||
@@ -1104,7 +1146,7 @@ mod tests {
|
||||
builder.build().unwrap()
|
||||
};
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
assert!(request
|
||||
@@ -1128,7 +1170,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1214,7 +1256,8 @@ mod tests {
|
||||
};
|
||||
let metadata = region_metadata_two_fields();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "delete requests need column k0");
|
||||
let err = request.fill_missing_columns(&metadata).unwrap_err();
|
||||
@@ -1233,7 +1276,8 @@ mod tests {
|
||||
values: vec![i64_value(100), ts_ms_value(1)],
|
||||
}],
|
||||
};
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1296,7 +1340,8 @@ mod tests {
|
||||
values: vec![i64_value(100), ts_ms_value(1)],
|
||||
}],
|
||||
};
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
|
||||
let mut request =
|
||||
WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
assert!(err.is_fill_default());
|
||||
request.fill_missing_columns(&metadata).unwrap();
|
||||
@@ -1333,7 +1378,7 @@ mod tests {
|
||||
};
|
||||
let metadata = new_region_metadata();
|
||||
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.fill_missing_columns(&metadata).unwrap_err();
|
||||
check_invalid_request(&err, "column ts does not have default value");
|
||||
}
|
||||
@@ -1363,11 +1408,39 @@ mod tests {
|
||||
};
|
||||
let metadata = region_metadata_two_fields();
|
||||
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
|
||||
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows, None).unwrap();
|
||||
let err = request.check_schema(&metadata).unwrap_err();
|
||||
check_invalid_request(
|
||||
&err,
|
||||
"column f1 expect type Int64(Int64Type), given: STRING(12)",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_request_metadata() {
|
||||
let rows = Rows {
|
||||
schema: vec![
|
||||
new_column_schema("c0", ColumnDataType::Int64, SemanticType::Tag),
|
||||
new_column_schema("c1", ColumnDataType::Int64, SemanticType::Tag),
|
||||
],
|
||||
rows: vec![Row {
|
||||
values: vec![i64_value(1), i64_value(2)],
|
||||
}],
|
||||
};
|
||||
|
||||
let metadata = Arc::new(new_region_metadata());
|
||||
let request = WriteRequest::new(
|
||||
RegionId::new(1, 1),
|
||||
OpType::Put,
|
||||
rows,
|
||||
Some(metadata.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(request.region_metadata.is_some());
|
||||
assert_eq!(
|
||||
request.region_metadata.unwrap().region_id,
|
||||
RegionId::new(1, 1)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,8 +348,8 @@ impl DensePrimaryKeyCodec {
|
||||
I: Iterator<Item = ValueRef<'a>>,
|
||||
{
|
||||
let mut serializer = Serializer::new(buffer);
|
||||
for (value, (_, field)) in row.zip(self.ordered_primary_key_columns.iter()) {
|
||||
field.serialize(&mut serializer, &value)?;
|
||||
for (idx, value) in row.enumerate() {
|
||||
self.field_at(idx).serialize(&mut serializer, &value)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -290,7 +290,6 @@ pub(crate) fn build_key_values_with_ts_seq_values(
|
||||
rows,
|
||||
}),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
@@ -166,7 +166,6 @@ pub(crate) fn write_rows_to_version(
|
||||
sequence: start_ts as u64, // The sequence may be incorrect, but it's fine in test.
|
||||
rows: Some(rows),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
};
|
||||
let key_values = KeyValues::new(&version.metadata, mutation).unwrap();
|
||||
version.memtables.mutable.write(&key_values).unwrap();
|
||||
|
||||
@@ -288,7 +288,6 @@ mod tests {
|
||||
sequence,
|
||||
rows: Some(Rows { schema, rows }),
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -176,7 +176,7 @@ pub const DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE: usize = 2048;
|
||||
/// |
|
||||
/// // may deadlock |
|
||||
/// distributor.distribute().await; |
|
||||
/// |
|
||||
/// |
|
||||
/// |
|
||||
/// receivers[0].read().await |
|
||||
/// ```
|
||||
@@ -280,7 +280,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -295,7 +294,6 @@ mod tests {
|
||||
sequence: 2u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -310,7 +308,6 @@ mod tests {
|
||||
sequence: 3u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -355,7 +352,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
)]
|
||||
@@ -376,7 +372,6 @@ mod tests {
|
||||
sequence: 2u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
)]
|
||||
@@ -393,7 +388,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
};
|
||||
let region2 = RegionId::new(1, 2);
|
||||
@@ -403,7 +397,6 @@ mod tests {
|
||||
sequence: 3u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
};
|
||||
let region3 = RegionId::new(1, 3);
|
||||
@@ -413,7 +406,6 @@ mod tests {
|
||||
sequence: 3u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
};
|
||||
let provider = Provider::kafka_provider("my_topic".to_string());
|
||||
@@ -492,7 +484,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
};
|
||||
let region2 = RegionId::new(1, 2);
|
||||
@@ -570,7 +561,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -585,7 +575,6 @@ mod tests {
|
||||
sequence: 2u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -600,7 +589,6 @@ mod tests {
|
||||
sequence: 3u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -615,7 +603,6 @@ mod tests {
|
||||
sequence: 4u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
.encode_to_vec(),
|
||||
@@ -651,7 +638,6 @@ mod tests {
|
||||
sequence: 4u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
}
|
||||
)]
|
||||
|
||||
@@ -116,7 +116,6 @@ mod tests {
|
||||
sequence: 1u64,
|
||||
rows: None,
|
||||
write_hint: None,
|
||||
bulk: Vec::new(),
|
||||
}],
|
||||
};
|
||||
let encoded_entry = wal_entry.encode_to_vec();
|
||||
|
||||
@@ -688,11 +688,18 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
self.last_periodical_check_millis += init_check_delay.as_millis() as i64;
|
||||
|
||||
// Buffer to retrieve requests from receiver.
|
||||
let mut buffer = RequestBuffer::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut write_req_buffer: Vec<SenderWriteRequest> =
|
||||
Vec::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut ddl_req_buffer: Vec<SenderDdlRequest> =
|
||||
Vec::with_capacity(self.config.worker_request_batch_size);
|
||||
let mut general_req_buffer: Vec<WorkerRequest> =
|
||||
RequestBuffer::with_capacity(self.config.worker_request_batch_size);
|
||||
|
||||
while self.running.load(Ordering::Relaxed) {
|
||||
// Clear the buffer before handling next batch of requests.
|
||||
buffer.clear();
|
||||
write_req_buffer.clear();
|
||||
ddl_req_buffer.clear();
|
||||
general_req_buffer.clear();
|
||||
|
||||
let max_wait_time = self.time_provider.wait_duration(CHECK_REGION_INTERVAL);
|
||||
let sleep = tokio::time::sleep(max_wait_time);
|
||||
@@ -701,7 +708,11 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
tokio::select! {
|
||||
request_opt = self.receiver.recv() => {
|
||||
match request_opt {
|
||||
Some(request) => buffer.push(request),
|
||||
Some(request) => match request {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(request),
|
||||
},
|
||||
// The channel is disconnected.
|
||||
None => break,
|
||||
}
|
||||
@@ -736,18 +747,29 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
// Try to recv more requests from the channel.
|
||||
for _ in 1..buffer.capacity() {
|
||||
for _ in 1..self.config.worker_request_batch_size {
|
||||
// We have received one request so we start from 1.
|
||||
match self.receiver.try_recv() {
|
||||
Ok(req) => buffer.push(req),
|
||||
Ok(req) => match req {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(req),
|
||||
},
|
||||
// We still need to handle remaining requests.
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
self.listener.on_recv_requests(buffer.len());
|
||||
self.listener.on_recv_requests(
|
||||
write_req_buffer.len() + ddl_req_buffer.len() + general_req_buffer.len(),
|
||||
);
|
||||
|
||||
self.handle_requests(&mut buffer).await;
|
||||
self.handle_requests(
|
||||
&mut write_req_buffer,
|
||||
&mut ddl_req_buffer,
|
||||
&mut general_req_buffer,
|
||||
)
|
||||
.await;
|
||||
|
||||
self.handle_periodical_tasks();
|
||||
}
|
||||
@@ -760,16 +782,17 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
/// Dispatches and processes requests.
|
||||
///
|
||||
/// `buffer` should be empty.
|
||||
async fn handle_requests(&mut self, buffer: &mut RequestBuffer) {
|
||||
let mut write_requests = Vec::with_capacity(buffer.len());
|
||||
let mut ddl_requests = Vec::with_capacity(buffer.len());
|
||||
for worker_req in buffer.drain(..) {
|
||||
async fn handle_requests(
|
||||
&mut self,
|
||||
write_requests: &mut Vec<SenderWriteRequest>,
|
||||
ddl_requests: &mut Vec<SenderDdlRequest>,
|
||||
general_requests: &mut Vec<WorkerRequest>,
|
||||
) {
|
||||
for worker_req in general_requests.drain(..) {
|
||||
match worker_req {
|
||||
WorkerRequest::Write(sender_req) => {
|
||||
write_requests.push(sender_req);
|
||||
}
|
||||
WorkerRequest::Ddl(sender_req) => {
|
||||
ddl_requests.push(sender_req);
|
||||
WorkerRequest::Write(_) | WorkerRequest::Ddl(_) => {
|
||||
// These requests are categorized into write_requests and ddl_requests.
|
||||
continue;
|
||||
}
|
||||
WorkerRequest::Background { region_id, notify } => {
|
||||
// For background notify, we handle it directly.
|
||||
@@ -803,12 +826,12 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
/// Takes and handles all ddl requests.
|
||||
async fn handle_ddl_requests(&mut self, ddl_requests: Vec<SenderDdlRequest>) {
|
||||
async fn handle_ddl_requests(&mut self, ddl_requests: &mut Vec<SenderDdlRequest>) {
|
||||
if ddl_requests.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for ddl in ddl_requests {
|
||||
for ddl in ddl_requests.drain(..) {
|
||||
let res = match ddl.request {
|
||||
DdlRequest::Create(req) => self.handle_create_request(ddl.region_id, req).await,
|
||||
DdlRequest::Drop(_) => self.handle_drop_request(ddl.region_id).await,
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::region::{RegionLeaderState, RegionMapRef};
|
||||
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
|
||||
|
||||
const GC_TASK_INTERVAL_SEC: u64 = 5 * 60; // 5 minutes
|
||||
const MAX_RETRY_TIMES: u64 = 288; // 24 hours (5m * 288)
|
||||
const MAX_RETRY_TIMES: u64 = 12; // 1 hours (5m * 12)
|
||||
|
||||
impl<S> RegionWorkerLoop<S>
|
||||
where
|
||||
@@ -118,12 +118,16 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Background GC task to remove the entire region path once it find there is no
|
||||
/// parquet file left. Returns whether the path is removed.
|
||||
/// Background GC task to remove the entire region path once one of the following
|
||||
/// conditions is true:
|
||||
/// - It finds there is no parquet file left.
|
||||
/// - After `gc_duration`.
|
||||
///
|
||||
/// This task will keep running until finished. Any resource captured by it will
|
||||
/// not be released before then. Be sure to only pass weak reference if something
|
||||
/// is depended on ref-count mechanism.
|
||||
/// Returns whether the path is removed.
|
||||
///
|
||||
/// This task will retry on failure and keep running until finished. Any resource
|
||||
/// captured by it will not be released before then. Be sure to only pass weak reference
|
||||
/// if something is depended on ref-count mechanism.
|
||||
async fn later_drop_task(
|
||||
region_id: RegionId,
|
||||
region_path: String,
|
||||
@@ -131,9 +135,9 @@ async fn later_drop_task(
|
||||
dropping_regions: RegionMapRef,
|
||||
gc_duration: Duration,
|
||||
) -> bool {
|
||||
let mut force = false;
|
||||
for _ in 0..MAX_RETRY_TIMES {
|
||||
sleep(gc_duration).await;
|
||||
let result = remove_region_dir_once(®ion_path, &object_store).await;
|
||||
let result = remove_region_dir_once(®ion_path, &object_store, force).await;
|
||||
match result {
|
||||
Err(err) => {
|
||||
warn!(
|
||||
@@ -143,11 +147,14 @@ async fn later_drop_task(
|
||||
}
|
||||
Ok(true) => {
|
||||
dropping_regions.remove_region(region_id);
|
||||
info!("Region {} is dropped", region_path);
|
||||
info!("Region {} is dropped, force: {}", region_path, force);
|
||||
return true;
|
||||
}
|
||||
Ok(false) => (),
|
||||
}
|
||||
sleep(gc_duration).await;
|
||||
// Force recycle after gc duration.
|
||||
force = true;
|
||||
}
|
||||
|
||||
warn!(
|
||||
@@ -160,9 +167,11 @@ async fn later_drop_task(
|
||||
|
||||
// TODO(ruihang): place the marker in a separate dir
|
||||
/// Removes region dir if there is no parquet files, returns whether the directory is removed.
|
||||
/// If `force = true`, always removes the dir.
|
||||
pub(crate) async fn remove_region_dir_once(
|
||||
region_path: &str,
|
||||
object_store: &ObjectStore,
|
||||
force: bool,
|
||||
) -> Result<bool> {
|
||||
// list all files under the given region path to check if there are un-deleted parquet files
|
||||
let mut has_parquet_file = false;
|
||||
@@ -173,7 +182,8 @@ pub(crate) async fn remove_region_dir_once(
|
||||
.await
|
||||
.context(OpenDalSnafu)?;
|
||||
while let Some(file) = files.try_next().await.context(OpenDalSnafu)? {
|
||||
if file.path().ends_with(".parquet") {
|
||||
if !force && file.path().ends_with(".parquet") {
|
||||
// If not in force mode, we only remove the region dir if there is no parquet file
|
||||
has_parquet_file = true;
|
||||
break;
|
||||
} else if !file.path().ends_with(DROPPING_MARKER_FILE) {
|
||||
|
||||
@@ -230,13 +230,13 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
request.on_success();
|
||||
|
||||
// Handle pending requests for the region.
|
||||
if let Some((ddl_requests, write_requests)) =
|
||||
if let Some((mut ddl_requests, mut write_requests)) =
|
||||
self.flush_scheduler.on_flush_success(region_id)
|
||||
{
|
||||
// Perform DDLs first because they require empty memtables.
|
||||
self.handle_ddl_requests(ddl_requests).await;
|
||||
self.handle_ddl_requests(&mut ddl_requests).await;
|
||||
// Handle pending write requests, we don't stall these requests.
|
||||
self.handle_write_requests(write_requests, false).await;
|
||||
self.handle_write_requests(&mut write_requests, false).await;
|
||||
}
|
||||
|
||||
// Handle stalled requests.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user