test: add data compatibility test (#3109)

* test: data files compatibility test

* rework compatibility test

* revert unneeded changes

* revert unneeded changes

* debug CI

* Update .github/workflows/develop.yml

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
LFC
2024-02-20 15:44:04 +08:00
committed by GitHub
parent b1f54d8a03
commit eded08897d
9 changed files with 234 additions and 8 deletions

View File

@@ -278,3 +278,21 @@ jobs:
flags: rust flags: rust
fail_ci_if_error: false fail_ci_if_error: false
verbose: true verbose: true
compat:
name: Compatibility Test
needs: build
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: |
mkdir -p ./bins/current
tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current
- run: ./tests/compat/test-compat.sh 0.6.0

31
tests/compat/README.md Normal file
View File

@@ -0,0 +1,31 @@
# GreptimeDB compatibility test
The compatibility test check whether a newer version of GreptimeDB can read from the data written by an old version of
GreptimeDB (the "backward" compatibility), and vice-versa (the "forward" compatibility). It's often ran in the Github
Actions to ensure there are no breaking changes by accident.
The test work like this: reuse the sqlness-runner two times but each for a read or write side. For example, if we are
testing backward compatibility, we use sqlness-runner to run the SQLs with writes against the old GreptimeDB binary, and
use the same sqlness-runner to run the SQLs with reads against the new GreptimeDB binary. If the reads were executed
expectedly, we have achieved backward compatibility.
This compatibility test is inspired by [Databend](https://github.com/datafuselabs/databend/).
## Usage
```shell
tests/compat/test-compat.sh <old_ver>
```
E.g. `tests/compat/test-compat.sh 0.6.0` tests if the data written by GreptimeDB **v0.6.0** can be read by **current**
version of GreptimeDB, and vice-versa. By "current", it's meant the fresh binary built by current codes.
## Prerequisites
Current version of GreptimeDB's binaries must reside in `./bins`:
- `./bins/current/greptime`
- `./bins/current/sqlness-runner`
The steps in Github Action already assure that. When running in local host, you have to `cp` them from target directory
manually.

View File

@@ -0,0 +1,13 @@
select ts, i, s, f from foo order by ts;
+---------------------+---+----------+-----+
| ts | i | s | f |
+---------------------+---+----------+-----+
| 2024-02-01T17:00:00 | 1 | my_tag_1 | |
| 2024-02-01T18:00:00 | 2 | my_tag_2 | |
| 2024-02-01T19:00:00 | 3 | my_tag_3 | |
| 2024-02-01T20:00:00 | 4 | my_tag_4 | 4.4 |
| 2024-02-01T21:00:00 | 5 | my_tag_5 | 5.5 |
| 2024-02-01T22:00:00 | 6 | my_tag_6 | 6.6 |
+---------------------+---+----------+-----+

View File

@@ -0,0 +1 @@
select ts, i, s, f from foo order by ts;

View File

@@ -0,0 +1,25 @@
create table foo(ts timestamp time index, s string primary key, i int);
Affected Rows: 0
insert into foo values
("2024-02-02 01:00:00+0800", "my_tag_1", 1),
("2024-02-02 02:00:00+0800", "my_tag_2", 2),
("2024-02-02 03:00:00+0800", "my_tag_3", 3);
Affected Rows: 3
-- Alter the table to trigger a flush (will be executed before process being terminated).
-- Otherwise the SST might not be generated (the data could be remained in WAL).
-- If we have the explicitly flush table interface in the future, it's still good to have the alter table in the test.
alter table foo add column f float;
Affected Rows: 0
insert into foo values
("2024-02-02 04:00:00+0800", "my_tag_4", 4, 4.4),
("2024-02-02 05:00:00+0800", "my_tag_5", 5, 5.5),
("2024-02-02 06:00:00+0800", "my_tag_6", 6, 6.6);
Affected Rows: 3

View File

@@ -0,0 +1,16 @@
create table foo(ts timestamp time index, s string primary key, i int);
insert into foo values
("2024-02-02 01:00:00+0800", "my_tag_1", 1),
("2024-02-02 02:00:00+0800", "my_tag_2", 2),
("2024-02-02 03:00:00+0800", "my_tag_3", 3);
-- Alter the table to trigger a flush (will be executed before process being terminated).
-- Otherwise the SST might not be generated (the data could be remained in WAL).
-- If we have the explicitly flush table interface in the future, it's still good to have the alter table in the test.
alter table foo add column f float;
insert into foo values
("2024-02-02 04:00:00+0800", "my_tag_4", 4, 4.4),
("2024-02-02 05:00:00+0800", "my_tag_5", 5, 5.5),
("2024-02-02 06:00:00+0800", "my_tag_6", 6, 6.6);

37
tests/compat/test-compat.sh Executable file
View File

@@ -0,0 +1,37 @@
#!/bin/bash
set -o errexit
usage() {
echo " Tests the compatibility between different versions of GreptimeDB."
echo " Expects the directory './bins/current' contains the newly built binaries."
echo " Usage: $0 <old_version>"
}
# The previous version of GreptimeDB to test compatibility with.
# e.g. old_ver="0.6.0"
old_ver="$1"
if [ -z $old_ver ]
then
usage
exit -1
fi
SCRIPT_PATH="$(cd "$(dirname "$0")" >/dev/null 2>&1 && pwd)"
echo " === SCRIPT_PATH: $SCRIPT_PATH"
source "${SCRIPT_PATH}/util.sh"
# go to work tree root
cd "$SCRIPT_PATH/../../"
download_binary "$old_ver"
run_test $old_ver "backward"
echo " === Clear GreptimeDB data before running forward compatibility test"
rm -rf /tmp/greptimedb-standalone
run_test $old_ver "forward"
echo "Compatibility test run successfully!"

82
tests/compat/util.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/bin/bash
# Assemble the GreptimeDB binary download URL for a specific version.
binary_url() {
local ver="$1"
local bin_tar="greptime-$(uname -s | tr '[:upper:]' '[:lower:]')-amd64-v$ver.tar.gz"
echo "https://github.com/GreptimeTeam/greptimedb/releases/download/v$ver/$bin_tar"
}
# Download a specific version of GreptimeDB binary tar file, untar it to folder `./bins/$ver`.
# `ver` is semver without prefix `v`
download_binary() {
local ver="$1"
local url="$(binary_url $ver)"
local bin_tar="greptime-$(uname -s | tr '[:upper:]' '[:lower:]')-amd64-v$ver.tar.gz"
if [ -f ./bins/$ver/greptime ]; then
echo " === binaries exist: $(ls ./bins/$ver/* | tr '\n' ' ')"
chmod +x ./bins/$ver/*
return
fi
if [ -f "$bin_tar" ]; then
echo " === tar file exists: $bin_tar"
else
echo " === Download binary ver: $ver"
echo " === Download binary url: $url"
curl --connect-timeout 5 --retry 5 --retry-delay 1 -L "$url" -o "$bin_tar"
fi
mkdir -p ./bins/$ver
tar -xf "$bin_tar" --strip-components=1 -C ./bins/$ver
echo " === unpacked: ./bins/$ver:"
ls -lh ./bins/$ver
chmod +x ./bins/$ver/*
}
# Test data compatibility that:
# - the data written by an old version of GreptimeDB can be read by the current one
# - the data written by the current version of GreptimeDB can be read by an old one ("forward" compatibility)
run_test() {
local old_ver="$1"
local forward="$2"
local write_case_dir="./tests/compat/case/write"
local read_case_dir="./tests/compat/case/read"
local bin_old="./bins/$old_ver/greptime"
local bin_new="./bins/current/greptime"
local runner="./bins/current/sqlness-runner"
echo " === Test with:"
echo " === old greptimedb version:"
"$bin_old" --version
echo " === new greptimedb version:"
"$bin_new" --version
# "forward" means we are testing forward compatibility:
# the data generated by current version GreptimeDB can be used by old.
# So we run new GreptimeDB binary first to write, then run old to read.
# And the opposite for backward compatibility.
if [ "$forward" == "forward" ]
then
echo " === Running forward compat test ..."
echo " === Run test: write with current GreptimeDB"
$runner --bins-dir $(dirname $bin_new) --case-dir $write_case_dir
else
echo " === Running backward compat test ..."
echo " === Run test: write with old GreptimeDB"
$runner --bins-dir $(dirname $bin_old) --case-dir $write_case_dir
fi
if [ "$forward" == 'forward' ]
then
echo " === Run test: read with old GreptimeDB"
$runner --bins-dir $(dirname $bin_old) --case-dir $read_case_dir
else
echo " === Run test: read with current GreptimeDB"
$runner --bins-dir $(dirname $bin_new) --case-dir $read_case_dir
fi
}

View File

@@ -362,9 +362,7 @@ impl Env {
kafka_wal_broker_endpoints: String, kafka_wal_broker_endpoints: String,
} }
let data_home = self let data_home = self.data_home.join(format!("greptimedb-{subcommand}"));
.data_home
.join(format!("greptimedb-{subcommand}-{}", db_ctx.time));
std::fs::create_dir_all(data_home.as_path()).unwrap(); std::fs::create_dir_all(data_home.as_path()).unwrap();
let wal_dir = data_home.join("wal").display().to_string(); let wal_dir = data_home.join("wal").display().to_string();
@@ -489,21 +487,26 @@ impl GreptimeDB {
fn stop(&mut self) { fn stop(&mut self) {
if let Some(server_processes) = self.server_processes.clone() { if let Some(server_processes) = self.server_processes.clone() {
let mut server_processes = server_processes.lock().unwrap(); let mut server_processes = server_processes.lock().unwrap();
for server_process in server_processes.iter_mut() { for mut server_process in server_processes.drain(..) {
Env::stop_server(server_process); Env::stop_server(&mut server_process);
println!(
"Standalone or Datanode (pid = {}) is stopped",
server_process.id()
);
} }
} }
if let Some(mut metasrv) = self.metasrv_process.take() { if let Some(mut metasrv) = self.metasrv_process.take() {
Env::stop_server(&mut metasrv); Env::stop_server(&mut metasrv);
println!("Metasrv (pid = {}) is stopped", metasrv.id());
} }
if let Some(mut datanode) = self.frontend_process.take() { if let Some(mut frontend) = self.frontend_process.take() {
Env::stop_server(&mut datanode); Env::stop_server(&mut frontend);
println!("Frontend (pid = {}) is stopped", frontend.id());
} }
if matches!(self.ctx.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster) if matches!(self.ctx.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster)
{ {
util::teardown_wal(); util::teardown_wal();
} }
println!("Stopped DB.");
} }
} }