Skip to main content

sqlness_runner/env/
bare.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::fmt::Display;
17use std::fs::OpenOptions;
18use std::io;
19use std::io::Write;
20use std::path::{Path, PathBuf};
21use std::process::{Child, Command};
22use std::sync::atomic::{AtomicU32, Ordering};
23use std::sync::{Arc, Mutex};
24use std::time::Duration;
25
26use async_trait::async_trait;
27use common_error::ext::ErrorExt;
28use sqlness::{Database, EnvController, QueryContext};
29use tokio::sync::Mutex as TokioMutex;
30
31use crate::client::MultiProtocolClient;
32use crate::cmd::bare::ServerAddr;
33use crate::cmd::compat_case::try_infer_version;
34use crate::formatter::{ErrorFormatter, MysqlFormatter, OutputFormatter, PostgresqlFormatter};
35use crate::protocol_interceptor::{MYSQL, PROTOCOL_KEY};
36use crate::server_mode::{GrpcArgStyle, ServerMode};
37use crate::util;
38use crate::util::{PROGRAM, get_workspace_root, maybe_pull_binary};
39
40// standalone mode
41const SERVER_MODE_STANDALONE_IDX: usize = 0;
42// distributed mode
43const SERVER_MODE_METASRV_IDX: usize = 0;
44const SERVER_MODE_DATANODE_START_IDX: usize = 1;
45const SERVER_MODE_FRONTEND_IDX: usize = 4;
46const SERVER_MODE_FLOWNODE_IDX: usize = 5;
47// Number of datanodes in distributed mode
48const DISTRIBUTED_DATANODE_COUNT: usize = 3;
49
50#[derive(Clone)]
51pub enum WalConfig {
52    RaftEngine,
53    Kafka {
54        /// Indicates whether the runner needs to start a kafka cluster
55        /// (it might be available in the external system environment).
56        needs_kafka_cluster: bool,
57        broker_endpoints: Vec<String>,
58    },
59}
60
61#[derive(Debug, Clone)]
62pub(crate) enum ServiceProvider {
63    Create,
64    External(String),
65}
66
67impl From<&str> for ServiceProvider {
68    fn from(value: &str) -> Self {
69        if value.is_empty() {
70            Self::Create
71        } else {
72            Self::External(value.to_string())
73        }
74    }
75}
76
77#[derive(Clone)]
78pub struct StoreConfig {
79    pub store_addrs: Vec<String>,
80    pub setup_etcd: bool,
81    pub(crate) setup_pg: Option<ServiceProvider>,
82    pub(crate) setup_mysql: Option<ServiceProvider>,
83    pub enable_flat_format: bool,
84}
85
86#[derive(Clone)]
87pub struct Env {
88    sqlness_home: PathBuf,
89    server_addrs: ServerAddr,
90    wal: WalConfig,
91
92    /// The path to the directory that contains the pre-built GreptimeDB binary.
93    /// When running in CI, this is expected to be set.
94    /// If not set, this runner will build the GreptimeDB binary itself when needed, and set this field by then.
95    bins_dir: Arc<Mutex<Option<PathBuf>>>,
96    /// The path to the directory that contains the old pre-built GreptimeDB binaries.
97    versioned_bins_dirs: Arc<Mutex<HashMap<String, PathBuf>>>,
98    /// Pull different versions of GreptimeDB on need.
99    pull_version_on_need: bool,
100    /// Store address for metasrv metadata
101    store_config: StoreConfig,
102    /// Extra command line arguments when starting GreptimeDB binaries.
103    extra_args: Vec<String>,
104    /// Cache for the inferred gRPC argument style per `bins_dir`.
105    grpc_arg_style_cache: Arc<Mutex<HashMap<PathBuf, GrpcArgStyle>>>,
106}
107
108#[async_trait]
109impl EnvController for Env {
110    type DB = GreptimeDB;
111
112    async fn start(&self, mode: &str, id: usize, _config: Option<&Path>) -> Self::DB {
113        if self.server_addrs.server_addr.is_some() && id > 0 {
114            panic!("Parallel test mode is not supported when server address is already set.");
115        }
116
117        unsafe {
118            std::env::set_var("SQLNESS_HOME", self.sqlness_home.display().to_string());
119        }
120        match mode {
121            "standalone" => self.start_standalone(id).await,
122            "distributed" => self.start_distributed(id).await,
123            _ => panic!("Unexpected mode: {mode}"),
124        }
125    }
126
127    /// Stop one [`Database`].
128    async fn stop(&self, _mode: &str, mut database: Self::DB) {
129        database.stop();
130    }
131}
132
133impl Env {
134    pub fn new(
135        data_home: PathBuf,
136        server_addrs: ServerAddr,
137        wal: WalConfig,
138        pull_version_on_need: bool,
139        bins_dir: Option<PathBuf>,
140        store_config: StoreConfig,
141        extra_args: Vec<String>,
142    ) -> Self {
143        Self {
144            sqlness_home: data_home,
145            server_addrs,
146            wal,
147            pull_version_on_need,
148            bins_dir: Arc::new(Mutex::new(bins_dir.clone())),
149            versioned_bins_dirs: Arc::new(Mutex::new(HashMap::from_iter([(
150                "latest".to_string(),
151                bins_dir.clone().unwrap_or(util::get_binary_dir("debug")),
152            )]))),
153            store_config,
154            extra_args,
155            grpc_arg_style_cache: Arc::new(Mutex::new(HashMap::new())),
156        }
157    }
158
159    async fn start_standalone(&self, id: usize) -> GreptimeDB {
160        println!("Starting standalone instance id: {id}");
161
162        if self.server_addrs.server_addr.is_some() {
163            self.connect_db(&self.server_addrs, id).await
164        } else {
165            self.build_db();
166            self.setup_wal();
167            let mut db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone());
168
169            let server_mode = ServerMode::random_standalone();
170            db_ctx.set_server_mode(server_mode.clone(), SERVER_MODE_STANDALONE_IDX);
171            let server_addr = server_mode.server_addr().unwrap();
172            let server_process = self.start_server(server_mode, &db_ctx, id, true).await;
173
174            let mut greptimedb = self.connect_db(&server_addr, id).await;
175            greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![server_process])));
176            greptimedb.is_standalone = true;
177            greptimedb.ctx = db_ctx;
178
179            greptimedb
180        }
181    }
182
183    async fn start_distributed(&self, id: usize) -> GreptimeDB {
184        self.start_distributed_inner(id).await
185    }
186
187    /// Internal: start a distributed cluster with flownode.
188    async fn start_distributed_inner(&self, id: usize) -> GreptimeDB {
189        if self.server_addrs.server_addr.is_some() {
190            self.connect_db(&self.server_addrs, id).await
191        } else {
192            self.build_db();
193            self.setup_wal();
194            self.setup_etcd();
195            self.setup_pg();
196            self.setup_mysql().await;
197            let mut db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone());
198
199            // start a distributed GreptimeDB
200            let meta_server_mode = ServerMode::random_metasrv();
201            let metasrv_port = match &meta_server_mode {
202                ServerMode::Metasrv {
203                    rpc_server_addr, ..
204                } => rpc_server_addr
205                    .split(':')
206                    .nth(1)
207                    .unwrap()
208                    .parse::<u16>()
209                    .unwrap(),
210                _ => panic!(
211                    "metasrv mode not set, maybe running in remote mode which doesn't support restart?"
212                ),
213            };
214            db_ctx.set_server_mode(meta_server_mode.clone(), SERVER_MODE_METASRV_IDX);
215            let meta_server = self.start_server(meta_server_mode, &db_ctx, id, true).await;
216
217            let mut datanodes = Vec::with_capacity(DISTRIBUTED_DATANODE_COUNT);
218            for i in 0..DISTRIBUTED_DATANODE_COUNT {
219                let datanode_mode = ServerMode::random_datanode(metasrv_port, i as u32);
220                db_ctx.set_server_mode(datanode_mode.clone(), SERVER_MODE_DATANODE_START_IDX + i);
221                let datanode = self.start_server(datanode_mode, &db_ctx, id, true).await;
222                datanodes.push(datanode);
223            }
224
225            let frontend_mode = ServerMode::random_frontend(metasrv_port);
226            let server_addr = frontend_mode.server_addr().unwrap();
227            db_ctx.set_server_mode(frontend_mode.clone(), SERVER_MODE_FRONTEND_IDX);
228            let frontend = self.start_server(frontend_mode, &db_ctx, id, true).await;
229
230            let flownode_mode = ServerMode::random_flownode(metasrv_port, 0);
231            db_ctx.set_server_mode(flownode_mode.clone(), SERVER_MODE_FLOWNODE_IDX);
232            let flownode = self.start_server(flownode_mode, &db_ctx, id, true).await;
233
234            let mut greptimedb = self.connect_db(&server_addr, id).await;
235
236            greptimedb.metasrv_process = Some(meta_server).into();
237            greptimedb.server_processes = Some(Arc::new(Mutex::new(datanodes)));
238            greptimedb.frontend_process = Some(frontend).into();
239            greptimedb.flownode_process = Some(flownode).into();
240            greptimedb.is_standalone = false;
241            greptimedb.ctx = db_ctx;
242
243            greptimedb
244        }
245    }
246
247    async fn connect_db(&self, server_addr: &ServerAddr, id: usize) -> GreptimeDB {
248        let grpc_server_addr = server_addr.server_addr.as_ref().unwrap();
249        let pg_server_addr = server_addr.pg_server_addr.as_ref().unwrap();
250        let mysql_server_addr = server_addr.mysql_server_addr.as_ref().unwrap();
251
252        let client =
253            MultiProtocolClient::connect(grpc_server_addr, pg_server_addr, mysql_server_addr).await;
254        GreptimeDB {
255            client: TokioMutex::new(client),
256            server_processes: None,
257            metasrv_process: None.into(),
258            frontend_process: None.into(),
259            flownode_process: None.into(),
260            active_bins_dir: Mutex::new(self.bins_dir.lock().unwrap().clone()),
261            ctx: GreptimeDBContext {
262                time: 0,
263                datanode_id: Default::default(),
264                wal: self.wal.clone(),
265                store_config: self.store_config.clone(),
266                server_modes: Vec::new(),
267            },
268            is_standalone: false,
269            env: self.clone(),
270            id,
271        }
272    }
273
274    fn stop_server(process: &mut Child) {
275        let _ = process.kill();
276        let _ = process.wait();
277    }
278
279    /// Infers which gRPC argument style to use for the binary at `bins_dir`.
280    fn infer_grpc_arg_style(&self, bins_dir: &Path) -> GrpcArgStyle {
281        let cache_key = bins_dir.to_path_buf();
282
283        // Fast path: already cached.
284        {
285            let cache = self.grpc_arg_style_cache.lock().unwrap();
286            if let Some(style) = cache.get(&cache_key) {
287                return *style;
288            }
289        }
290
291        let version = try_infer_version(bins_dir);
292        let style = GrpcArgStyle::for_version(version.as_ref());
293
294        // Insert into cache (may race with another thread, but both detect
295        // the same value, so it's harmless).
296        {
297            let mut cache = self.grpc_arg_style_cache.lock().unwrap();
298            cache.entry(cache_key).or_insert(style);
299        }
300
301        style
302    }
303
304    async fn start_server(
305        &self,
306        mode: ServerMode,
307        db_ctx: &GreptimeDBContext,
308        id: usize,
309        truncate_log: bool,
310    ) -> Child {
311        let bins_dir = self.bins_dir.lock().unwrap().clone().expect(
312            "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.",
313        );
314
315        self.start_server_with_bins_dir(mode, db_ctx, id, truncate_log, bins_dir)
316            .await
317    }
318
319    async fn start_server_with_bins_dir(
320        &self,
321        mode: ServerMode,
322        db_ctx: &GreptimeDBContext,
323        id: usize,
324        truncate_log: bool,
325        bins_dir: PathBuf,
326    ) -> Child {
327        let log_file_name = match mode {
328            ServerMode::Datanode { node_id, .. } => {
329                db_ctx.incr_datanode_id();
330                format!("greptime-{}-sqlness-datanode-{}.log", id, node_id)
331            }
332            ServerMode::Flownode { .. } => format!("greptime-{}-sqlness-flownode.log", id),
333            ServerMode::Frontend { .. } => format!("greptime-{}-sqlness-frontend.log", id),
334            ServerMode::Metasrv { .. } => format!("greptime-{}-sqlness-metasrv.log", id),
335            ServerMode::Standalone { .. } => format!("greptime-{}-sqlness-standalone.log", id),
336        };
337        let stdout_file_name = self.sqlness_home.join(log_file_name).display().to_string();
338
339        println!("DB instance {id} log file at {stdout_file_name}");
340
341        let stdout_file = OpenOptions::new()
342            .create(true)
343            .write(true)
344            .truncate(truncate_log)
345            .append(!truncate_log)
346            .open(&stdout_file_name)
347            .unwrap();
348
349        let arg_style = self.infer_grpc_arg_style(&bins_dir);
350        let args = mode.get_args(&self.sqlness_home, self, db_ctx, id, arg_style);
351        let check_ip_addrs = mode.check_addrs();
352
353        for check_ip_addr in &check_ip_addrs {
354            if util::check_port(check_ip_addr.parse().unwrap(), Duration::from_secs(1)).await {
355                panic!(
356                    "Port {check_ip_addr} is already in use, please check and retry.",
357                    check_ip_addr = check_ip_addr
358                );
359            }
360        }
361
362        let program = PROGRAM;
363
364        let abs_bins_dir = bins_dir
365            .canonicalize()
366            .expect("Failed to canonicalize bins_dir");
367
368        let mut process = Command::new(abs_bins_dir.join(program))
369            .current_dir(bins_dir.clone())
370            .env("TZ", "UTC")
371            .args(args)
372            .stdout(stdout_file)
373            .spawn()
374            .unwrap_or_else(|error| {
375                panic!(
376                    "Failed to start the DB with subcommand {}, Error: {error}, path: {:?}",
377                    mode.name(),
378                    bins_dir.join(program)
379                );
380            });
381
382        for check_ip_addr in &check_ip_addrs {
383            if !util::check_port(check_ip_addr.parse().unwrap(), Duration::from_secs(30)).await {
384                Env::stop_server(&mut process);
385                panic!(
386                    "{} doesn't up in 30 seconds, check {} for more details.",
387                    mode.name(),
388                    stdout_file_name
389                )
390            }
391        }
392
393        process
394    }
395
396    /// stop and restart the server process
397    pub(crate) async fn restart_server(&self, db: &GreptimeDB, is_full_restart: bool) {
398        let bins_dir = db.active_bins_dir.lock().unwrap().clone().expect(
399            "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.",
400        );
401
402        {
403            if let Some(server_process) = db.server_processes.clone() {
404                let mut server_processes = server_process.lock().unwrap();
405                for server_process in server_processes.iter_mut() {
406                    Env::stop_server(server_process);
407                }
408            }
409
410            if is_full_restart {
411                if let Some(mut metasrv_process) =
412                    db.metasrv_process.lock().expect("poisoned lock").take()
413                {
414                    Env::stop_server(&mut metasrv_process);
415                }
416                if let Some(mut frontend_process) =
417                    db.frontend_process.lock().expect("poisoned lock").take()
418                {
419                    Env::stop_server(&mut frontend_process);
420                }
421            }
422
423            // Stop flownode if present.
424            if let Some(mut flownode_process) =
425                db.flownode_process.lock().expect("poisoned lock").take()
426            {
427                Env::stop_server(&mut flownode_process);
428            }
429        }
430
431        // check if the server is distributed or standalone
432        let new_server_processes = if db.is_standalone {
433            let server_mode = db
434                .ctx
435                .get_server_mode(SERVER_MODE_STANDALONE_IDX)
436                .cloned()
437                .unwrap();
438            let server_addr = server_mode.server_addr().unwrap();
439            let new_server_process = self
440                .start_server_with_bins_dir(server_mode, &db.ctx, db.id, false, bins_dir.clone())
441                .await;
442
443            let mut client = db.client.lock().await;
444            client
445                .reconnect_mysql_client(&server_addr.mysql_server_addr.unwrap())
446                .await;
447            client
448                .reconnect_pg_client(&server_addr.pg_server_addr.unwrap())
449                .await;
450            vec![new_server_process]
451        } else {
452            db.ctx.reset_datanode_id();
453            if is_full_restart {
454                let metasrv_mode = db
455                    .ctx
456                    .get_server_mode(SERVER_MODE_METASRV_IDX)
457                    .cloned()
458                    .unwrap();
459                let metasrv = self
460                    .start_server_with_bins_dir(
461                        metasrv_mode,
462                        &db.ctx,
463                        db.id,
464                        false,
465                        bins_dir.clone(),
466                    )
467                    .await;
468                db.metasrv_process
469                    .lock()
470                    .expect("lock poisoned")
471                    .replace(metasrv);
472
473                // wait for metasrv to start
474                // since it seems older version of db might take longer to complete election
475                tokio::time::sleep(Duration::from_secs(5)).await;
476            }
477
478            let mut processes = vec![];
479            for i in 0..DISTRIBUTED_DATANODE_COUNT {
480                let datanode_mode = db
481                    .ctx
482                    .get_server_mode(SERVER_MODE_DATANODE_START_IDX + i)
483                    .cloned()
484                    .unwrap();
485                let new_server_process = self
486                    .start_server_with_bins_dir(
487                        datanode_mode,
488                        &db.ctx,
489                        db.id,
490                        false,
491                        bins_dir.clone(),
492                    )
493                    .await;
494                processes.push(new_server_process);
495            }
496
497            if is_full_restart {
498                let frontend_mode = db
499                    .ctx
500                    .get_server_mode(SERVER_MODE_FRONTEND_IDX)
501                    .cloned()
502                    .unwrap();
503                let server_addr = frontend_mode.server_addr().unwrap();
504                let frontend = self
505                    .start_server_with_bins_dir(
506                        frontend_mode,
507                        &db.ctx,
508                        db.id,
509                        false,
510                        bins_dir.clone(),
511                    )
512                    .await;
513                db.frontend_process
514                    .lock()
515                    .expect("lock poisoned")
516                    .replace(frontend);
517
518                // Reconnect protocol clients to the new frontend process
519                // so that MySQL/Postgres queries use the restarted frontend,
520                // not stale connections to the old (killed) process.
521                let mut client = db.client.lock().await;
522                client
523                    .reconnect_mysql_client(server_addr.mysql_server_addr.as_ref().unwrap())
524                    .await;
525                client
526                    .reconnect_pg_client(server_addr.pg_server_addr.as_ref().unwrap())
527                    .await;
528            }
529
530            // Restart flownode.
531            if let Some(flownode_mode) = db.ctx.get_server_mode(SERVER_MODE_FLOWNODE_IDX).cloned() {
532                let flownode = self
533                    .start_server_with_bins_dir(
534                        flownode_mode,
535                        &db.ctx,
536                        db.id,
537                        false,
538                        bins_dir.clone(),
539                    )
540                    .await;
541                db.flownode_process
542                    .lock()
543                    .expect("lock poisoned")
544                    .replace(flownode);
545            }
546
547            processes
548        };
549
550        if let Some(server_processes) = db.server_processes.clone() {
551            let mut server_processes = server_processes.lock().unwrap();
552            *server_processes = new_server_processes;
553        }
554    }
555
556    /// Setup kafka wal cluster if needed. The counterpart is in [GreptimeDB::stop].
557    fn setup_wal(&self) {
558        if matches!(self.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster) {
559            util::setup_wal();
560        }
561    }
562
563    /// Setup etcd if needed.
564    fn setup_etcd(&self) {
565        if self.store_config.setup_etcd {
566            let client_ports = self
567                .store_config
568                .store_addrs
569                .iter()
570                .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
571                .collect::<Vec<_>>();
572            util::setup_etcd(client_ports, None, None);
573        }
574    }
575
576    /// Setup PostgreSql if needed.
577    fn setup_pg(&self) {
578        if matches!(self.store_config.setup_pg, Some(ServiceProvider::Create)) {
579            let client_ports = self
580                .store_config
581                .store_addrs
582                .iter()
583                .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
584                .collect::<Vec<_>>();
585            let client_port = client_ports.first().unwrap_or(&5432);
586            util::setup_pg(*client_port, None);
587        }
588    }
589
590    /// Setup MySql if needed.
591    async fn setup_mysql(&self) {
592        if matches!(self.store_config.setup_mysql, Some(ServiceProvider::Create)) {
593            let client_ports = self
594                .store_config
595                .store_addrs
596                .iter()
597                .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
598                .collect::<Vec<_>>();
599            let client_port = client_ports.first().unwrap_or(&3306);
600            util::setup_mysql(*client_port, None);
601
602            // Docker of MySQL starts slowly, so we need to wait for a while
603            tokio::time::sleep(Duration::from_secs(10)).await;
604        }
605    }
606
607    /// Build the DB with `cargo build --bin greptime`
608    fn build_db(&self) {
609        let mut bins_dir = self.bins_dir.lock().unwrap();
610        if bins_dir.is_some() {
611            return;
612        }
613
614        println!("Going to build the DB...");
615        let output = Command::new("cargo")
616            .current_dir(util::get_workspace_root())
617            .args([
618                "build",
619                "--bin",
620                "greptime",
621                "--features",
622                "pg_kvbackend,mysql_kvbackend,vector_index",
623            ])
624            .output()
625            .expect("Failed to start GreptimeDB");
626        if !output.status.success() {
627            println!("Failed to build GreptimeDB, {}", output.status);
628            println!("Cargo build stdout:");
629            io::stdout().write_all(&output.stdout).unwrap();
630            println!("Cargo build stderr:");
631            io::stderr().write_all(&output.stderr).unwrap();
632            panic!();
633        }
634
635        bins_dir.replace(util::get_binary_dir("debug"));
636    }
637
638    pub(crate) fn extra_args(&self) -> &Vec<String> {
639        &self.extra_args
640    }
641
642    /// Start a distributed GreptimeDB cluster. Exposed for compat runner.
643    pub(crate) async fn compat_start_distributed(&self, id: usize) -> GreptimeDB {
644        self.start_distributed(id).await
645    }
646
647    /// Full restart of all distributed processes with a new binary directory,
648    /// preserving the same context and data.
649    /// After restart, waits for the frontend gRPC endpoint to become ready.
650    pub(crate) async fn compat_restart_all(&self, db: &GreptimeDB, bins_dir: PathBuf) {
651        *db.active_bins_dir.lock().unwrap() = Some(bins_dir);
652        self.restart_server(db, true).await;
653        self.wait_frontend_ready(db).await;
654    }
655
656    /// Wait for frontend gRPC readiness after restart.
657    async fn wait_frontend_ready(&self, db: &GreptimeDB) {
658        let frontend_mode = db
659            .ctx
660            .get_server_mode(SERVER_MODE_FRONTEND_IDX)
661            .cloned()
662            .unwrap();
663        if let Some(addr) = frontend_mode.check_addrs().first() {
664            println!("Waiting for frontend gRPC readiness at {addr}...");
665            crate::util::retry_with_backoff(
666                || async {
667                    let mut client = db.client.lock().await;
668                    match client.grpc_query("SELECT 1").await {
669                        Ok(_) => Ok(()),
670                        Err(e) => Err(format!("Frontend not ready: {e}")),
671                    }
672                },
673                10,
674                std::time::Duration::from_secs(1),
675            )
676            .await
677            .unwrap_or_else(|e| panic!("Frontend failed to become ready: {e}"));
678        }
679    }
680}
681
682pub struct GreptimeDB {
683    server_processes: Option<Arc<Mutex<Vec<Child>>>>,
684    metasrv_process: Mutex<Option<Child>>,
685    frontend_process: Mutex<Option<Child>>,
686    flownode_process: Mutex<Option<Child>>,
687    client: TokioMutex<MultiProtocolClient>,
688    active_bins_dir: Mutex<Option<PathBuf>>,
689    ctx: GreptimeDBContext,
690    is_standalone: bool,
691    env: Env,
692    id: usize,
693}
694
695impl GreptimeDB {
696    async fn postgres_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
697        let mut client = self.client.lock().await;
698
699        match client.postgres_query(&query).await {
700            Ok(rows) => Box::new(PostgresqlFormatter::from(rows)),
701            Err(e) => Box::new(e),
702        }
703    }
704
705    async fn mysql_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
706        let mut client = self.client.lock().await;
707
708        match client.mysql_query(&query).await {
709            Ok(res) => Box::new(MysqlFormatter::from(res)),
710            Err(e) => Box::new(e),
711        }
712    }
713
714    async fn grpc_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
715        let mut client = self.client.lock().await;
716
717        match client.grpc_query(&query).await {
718            Ok(rows) => Box::new(OutputFormatter::from(rows)),
719            Err(e) => Box::new(ErrorFormatter::from(e)),
720        }
721    }
722
723    /// Handle `QueryContext` directives for compat statement execution.
724    ///
725    /// Inspects `QueryContext` keys set by sqlness interceptors:
726    /// - `restart`: restarts the server (datanode-only) if not using external address.
727    /// - `version`: switches to the specified binary version and performs a full restart.
728    ///
729    /// This does **not** execute queries itself; it only prepares the server state.
730    /// Used by the compat runner.
731    pub(crate) async fn compat_prepare_query_context(&self, ctx: &QueryContext) {
732        if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() {
733            self.env.restart_server(self, false).await;
734        } else if let Some(version) = ctx.context.get("version") {
735            let version_bin_dir = self
736                .env
737                .versioned_bins_dirs
738                .lock()
739                .expect("lock poison")
740                .get(version.as_str())
741                .cloned();
742
743            match version_bin_dir {
744                Some(path) if path.join(PROGRAM).is_file() => {
745                    *self.active_bins_dir.lock().unwrap() = Some(path);
746                }
747                _ => {
748                    maybe_pull_binary(version, self.env.pull_version_on_need).await;
749                    let root = get_workspace_root();
750                    let new_path = PathBuf::from_iter([&root, version]);
751                    *self.active_bins_dir.lock().unwrap() = Some(new_path);
752                }
753            }
754
755            self.env.restart_server(self, true).await;
756            // sleep for a while to wait for the server to fully boot up
757            tokio::time::sleep(Duration::from_secs(5)).await;
758        }
759    }
760
761    pub(crate) async fn compat_query(
762        &self,
763        query: &str,
764        ctx: &QueryContext,
765    ) -> Result<String, String> {
766        let mut client = self.client.lock().await;
767
768        // Handle protocol switching
769        if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) {
770            if protocol == MYSQL {
771                return match client.mysql_query(query).await {
772                    Ok(res) => Ok(crate::formatter::MysqlFormatter::from(res).to_string()),
773                    Err(e) => Err(e),
774                };
775            } else {
776                // postgres
777                return match client.postgres_query(query).await {
778                    Ok(rows) => Ok(crate::formatter::PostgresqlFormatter::from(rows).to_string()),
779                    Err(e) => Err(e),
780                };
781            }
782        }
783
784        // Default: gRPC
785        match client.grpc_query(query).await {
786            Ok(output) => Ok(OutputFormatter::from(output).to_string()),
787            Err(e) => {
788                let status_code = e.status_code();
789                let root_cause = e.output_msg();
790                Err(format!(
791                    "Error: {}({status_code}), {root_cause}",
792                    status_code as u32
793                ))
794            }
795        }
796    }
797}
798
799#[async_trait]
800impl Database for GreptimeDB {
801    async fn query(&self, ctx: QueryContext, query: String) -> Box<dyn Display> {
802        if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() {
803            self.env.restart_server(self, false).await;
804        } else if let Some(version) = ctx.context.get("version") {
805            let version_bin_dir = self
806                .env
807                .versioned_bins_dirs
808                .lock()
809                .expect("lock poison")
810                .get(version.as_str())
811                .cloned();
812
813            match version_bin_dir {
814                Some(path) if path.join(PROGRAM).is_file() => {
815                    // use version in versioned_bins_dirs
816                    *self.active_bins_dir.lock().unwrap() = Some(path);
817                }
818                _ => {
819                    // use version in dir files
820                    maybe_pull_binary(version, self.env.pull_version_on_need).await;
821                    let root = get_workspace_root();
822                    let new_path = PathBuf::from_iter([&root, version]);
823                    *self.active_bins_dir.lock().unwrap() = Some(new_path);
824                }
825            }
826
827            self.env.restart_server(self, true).await;
828            // sleep for a while to wait for the server to fully boot up
829            tokio::time::sleep(Duration::from_secs(5)).await;
830        }
831
832        if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) {
833            // protocol is bound to be either "mysql" or "postgres"
834            if protocol == MYSQL {
835                self.mysql_query(ctx, query).await
836            } else {
837                self.postgres_query(ctx, query).await
838            }
839        } else {
840            self.grpc_query(ctx, query).await
841        }
842    }
843}
844
845impl GreptimeDB {
846    fn stop(&mut self) {
847        if let Some(server_processes) = self.server_processes.clone() {
848            let mut server_processes = server_processes.lock().unwrap();
849            for mut server_process in server_processes.drain(..) {
850                Env::stop_server(&mut server_process);
851                println!(
852                    "Standalone or Datanode (pid = {}) is stopped",
853                    server_process.id()
854                );
855            }
856        }
857        if let Some(mut metasrv) = self
858            .metasrv_process
859            .lock()
860            .expect("someone else panic when holding lock")
861            .take()
862        {
863            Env::stop_server(&mut metasrv);
864            println!("Metasrv (pid = {}) is stopped", metasrv.id());
865        }
866        if let Some(mut frontend) = self
867            .frontend_process
868            .lock()
869            .expect("someone else panic when holding lock")
870            .take()
871        {
872            Env::stop_server(&mut frontend);
873            println!("Frontend (pid = {}) is stopped", frontend.id());
874        }
875        if let Some(mut flownode) = self
876            .flownode_process
877            .lock()
878            .expect("someone else panic when holding lock")
879            .take()
880        {
881            Env::stop_server(&mut flownode);
882            println!("Flownode (pid = {}) is stopped", flownode.id());
883        }
884        if matches!(self.ctx.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster)
885        {
886            util::teardown_wal();
887        }
888    }
889
890    /// Stop all processes managed by this GreptimeDB. Exposed for compat runner.
891    pub(crate) fn compat_stop(&mut self) {
892        self.stop();
893    }
894}
895
896impl Drop for GreptimeDB {
897    fn drop(&mut self) {
898        if self.env.server_addrs.server_addr.is_none() {
899            self.stop();
900        }
901    }
902}
903
904pub struct GreptimeDBContext {
905    /// Start time in millisecond
906    time: i64,
907    datanode_id: AtomicU32,
908    wal: WalConfig,
909    store_config: StoreConfig,
910    server_modes: Vec<ServerMode>,
911}
912
913impl GreptimeDBContext {
914    pub fn new(wal: WalConfig, store_config: StoreConfig) -> Self {
915        Self {
916            time: common_time::util::current_time_millis(),
917            datanode_id: AtomicU32::new(0),
918            wal,
919            store_config,
920            server_modes: Vec::new(),
921        }
922    }
923
924    pub(crate) fn time(&self) -> i64 {
925        self.time
926    }
927
928    pub fn is_raft_engine(&self) -> bool {
929        matches!(self.wal, WalConfig::RaftEngine)
930    }
931
932    pub fn kafka_wal_broker_endpoints(&self) -> String {
933        match &self.wal {
934            WalConfig::RaftEngine => String::new(),
935            WalConfig::Kafka {
936                broker_endpoints, ..
937            } => serde_json::to_string(&broker_endpoints).unwrap(),
938        }
939    }
940
941    fn incr_datanode_id(&self) {
942        let _ = self.datanode_id.fetch_add(1, Ordering::Relaxed);
943    }
944
945    fn reset_datanode_id(&self) {
946        self.datanode_id.store(0, Ordering::Relaxed);
947    }
948
949    pub(crate) fn store_config(&self) -> StoreConfig {
950        self.store_config.clone()
951    }
952
953    fn set_server_mode(&mut self, mode: ServerMode, idx: usize) {
954        if idx >= self.server_modes.len() {
955            self.server_modes.resize(idx + 1, mode.clone());
956        }
957        self.server_modes[idx] = mode;
958    }
959
960    fn get_server_mode(&self, idx: usize) -> Option<&ServerMode> {
961        self.server_modes.get(idx)
962    }
963}