tests_integration/
cluster.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeSet, HashMap};
16use std::env;
17use std::net::TcpListener;
18use std::ops::RangeInclusive;
19use std::sync::Arc;
20use std::time::Duration;
21
22use api::v1::region::region_server::RegionServer;
23use arrow_flight::flight_service_server::FlightServiceServer;
24use cache::{
25    build_datanode_cache_registry, build_fundamental_cache_registry,
26    with_default_composite_cache_registry,
27};
28use catalog::information_extension::DistributedInformationExtension;
29use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
30use catalog::process_manager::ProcessManager;
31use client::Client;
32use client::client_manager::NodeClients;
33use cmd::frontend::create_heartbeat_task;
34use common_base::Plugins;
35use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
36use common_meta::DatanodeId;
37use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
38use common_meta::kv_backend::KvBackendRef;
39use common_meta::kv_backend::chroot::ChrootKvBackend;
40use common_meta::kv_backend::etcd::EtcdStore;
41use common_meta::kv_backend::memory::MemoryKvBackend;
42use common_meta::peer::Peer;
43use common_runtime::Builder as RuntimeBuilder;
44use common_runtime::runtime::BuilderBuild;
45use common_test_util::temp_dir::create_temp_dir;
46use common_time::util::DefaultSystemTimer;
47use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
48use datanode::config::DatanodeOptions;
49use datanode::datanode::{Datanode, DatanodeBuilder, ProcedureConfig};
50use frontend::frontend::{Frontend, FrontendOptions};
51use frontend::instance::Instance as FeInstance;
52use frontend::instance::builder::FrontendBuilder;
53use frontend::server::Services;
54use futures::TryStreamExt;
55use hyper_util::rt::TokioIo;
56use meta_client::client::MetaClientBuilder;
57use meta_srv::cluster::MetaPeerClientRef;
58use meta_srv::discovery;
59use meta_srv::gc::GcSchedulerOptions;
60use meta_srv::metasrv::{Metasrv, MetasrvOptions, SelectorRef};
61use meta_srv::mocks::MockInfo;
62use mito2::gc::GcConfig;
63use mito2::region::MitoRegionRef;
64use object_store::config::ObjectStoreConfig;
65use rand::Rng;
66use servers::grpc::GrpcOptions;
67use servers::grpc::flight::FlightCraftWrapper;
68use servers::grpc::region_server::RegionServerRequestHandler;
69use servers::server::ServerHandlers;
70use store_api::storage::RegionId;
71use tempfile::TempDir;
72use tonic::codec::CompressionEncoding;
73use tonic::transport::Server;
74use tower::service_fn;
75use uuid::Uuid;
76
77use crate::test_util::{
78    self, FileDirGuard, PEER_PLACEHOLDER_ADDR, StorageType, TestGuard, create_datanode_opts,
79    create_tmp_dir_and_datanode_opts,
80};
81
82pub struct GreptimeDbCluster {
83    pub guards: Vec<TestGuard>,
84    pub datanode_options: Vec<DatanodeOptions>,
85
86    pub datanode_instances: HashMap<DatanodeId, Datanode>,
87    pub kv_backend: KvBackendRef,
88    pub metasrv: Arc<Metasrv>,
89    pub frontend: Arc<Frontend>,
90}
91
92impl GreptimeDbCluster {
93    pub fn fe_instance(&self) -> &Arc<FeInstance> {
94        &self.frontend.instance
95    }
96
97    /// List all SST files from all datanodes.
98    pub async fn list_sst_files_from_all_datanodes(&self) -> BTreeSet<String> {
99        let mut sst_files = BTreeSet::new();
100
101        for datanode in self.datanode_instances.values() {
102            let region_server = datanode.region_server();
103            let mito = region_server.mito_engine().unwrap();
104            let all_files = mito
105                .all_ssts_from_storage()
106                .try_collect::<Vec<_>>()
107                .await
108                .unwrap()
109                .into_iter()
110                .map(|e| e.file_path)
111                .collect::<Vec<_>>();
112            sst_files.extend(all_files);
113        }
114
115        sst_files
116    }
117
118    /// List all SST files from the manifests of all datanodes.
119    pub async fn list_sst_files_from_manifests(&self) -> BTreeSet<String> {
120        let mut sst_files = BTreeSet::new();
121
122        for datanode in self.datanode_instances.values() {
123            let region_server = datanode.region_server();
124            let mito = region_server.mito_engine().unwrap();
125            let all_files = mito
126                .all_ssts_from_manifest()
127                .await
128                .into_iter()
129                .flat_map(|e| {
130                    if e.index_file_path.is_some() {
131                        vec![e.file_path, e.index_file_path.unwrap()]
132                    } else {
133                        vec![e.file_path]
134                    }
135                })
136                .collect::<BTreeSet<_>>();
137
138            sst_files.extend(all_files);
139        }
140
141        sst_files
142    }
143
144    pub async fn list_all_regions(&self) -> HashMap<RegionId, MitoRegionRef> {
145        let mut regions = HashMap::new();
146
147        for datanode in self.datanode_instances.values() {
148            let region_server = datanode.region_server();
149            let mito = region_server.mito_engine().unwrap();
150            for region in mito.regions() {
151                regions.insert(region.region_id(), region);
152            }
153        }
154
155        regions
156    }
157}
158
159pub struct GreptimeDbClusterBuilder {
160    cluster_name: String,
161    kv_backend: KvBackendRef,
162    store_config: Option<ObjectStoreConfig>,
163    store_providers: Option<Vec<StorageType>>,
164    datanodes: Option<u32>,
165    datanode_wal_config: DatanodeWalConfig,
166    metasrv_wal_config: MetasrvWalConfig,
167    datanode_gc_config: GcConfig,
168    metasrv_gc_config: GcSchedulerOptions,
169    shared_home_dir: Option<Arc<TempDir>>,
170    meta_selector: Option<SelectorRef>,
171}
172
173impl GreptimeDbClusterBuilder {
174    pub async fn new(cluster_name: &str) -> Self {
175        let endpoints = env::var("GT_ETCD_ENDPOINTS").unwrap_or_default();
176
177        let kv_backend: KvBackendRef = if endpoints.is_empty() {
178            Arc::new(MemoryKvBackend::new())
179        } else {
180            let endpoints = endpoints
181                .split(',')
182                .map(|s| s.to_string())
183                .collect::<Vec<String>>();
184            let backend = EtcdStore::with_endpoints(endpoints, 128)
185                .await
186                .expect("malformed endpoints");
187            // Each retry requires a new isolation namespace.
188            let chroot = format!("{}{}", cluster_name, Uuid::new_v4());
189            Arc::new(ChrootKvBackend::new(chroot.into(), backend))
190        };
191
192        Self {
193            cluster_name: cluster_name.to_string(),
194            kv_backend,
195            store_config: None,
196            store_providers: None,
197            datanodes: None,
198            datanode_wal_config: DatanodeWalConfig::default(),
199            metasrv_wal_config: MetasrvWalConfig::default(),
200            datanode_gc_config: GcConfig::default(),
201            metasrv_gc_config: GcSchedulerOptions::default(),
202            shared_home_dir: None,
203            meta_selector: None,
204        }
205    }
206
207    #[must_use]
208    pub fn with_store_config(mut self, store_config: ObjectStoreConfig) -> Self {
209        self.store_config = Some(store_config);
210        self
211    }
212
213    #[must_use]
214    pub fn with_store_providers(mut self, store_providers: Vec<StorageType>) -> Self {
215        self.store_providers = Some(store_providers);
216        self
217    }
218
219    #[must_use]
220    pub fn with_datanodes(mut self, datanodes: u32) -> Self {
221        self.datanodes = Some(datanodes);
222        self
223    }
224
225    #[must_use]
226    pub fn with_datanode_wal_config(mut self, datanode_wal_config: DatanodeWalConfig) -> Self {
227        self.datanode_wal_config = datanode_wal_config;
228        self
229    }
230
231    #[must_use]
232    pub fn with_metasrv_wal_config(mut self, metasrv_wal_config: MetasrvWalConfig) -> Self {
233        self.metasrv_wal_config = metasrv_wal_config;
234        self
235    }
236
237    #[must_use]
238    pub fn with_datanode_gc_config(mut self, datanode_gc_config: GcConfig) -> Self {
239        self.datanode_gc_config = datanode_gc_config;
240        self
241    }
242
243    pub fn with_metasrv_gc_config(mut self, metasrv_gc_config: GcSchedulerOptions) -> Self {
244        self.metasrv_gc_config = metasrv_gc_config;
245        self
246    }
247
248    #[must_use]
249    pub fn with_shared_home_dir(mut self, shared_home_dir: Arc<TempDir>) -> Self {
250        self.shared_home_dir = Some(shared_home_dir);
251        self
252    }
253
254    #[must_use]
255    pub fn with_meta_selector(mut self, selector: SelectorRef) -> Self {
256        self.meta_selector = Some(selector);
257        self
258    }
259
260    pub async fn build_with(
261        &self,
262        datanode_options: Vec<DatanodeOptions>,
263        start_frontend_servers: bool,
264        guards: Vec<TestGuard>,
265    ) -> GreptimeDbCluster {
266        let datanodes = datanode_options.len();
267        let channel_config = ChannelConfig::new().timeout(Some(Duration::from_secs(20)));
268        let datanode_clients = Arc::new(NodeClients::new(channel_config));
269
270        let opt = MetasrvOptions {
271            procedure: ProcedureConfig {
272                // Due to large network delay during cross data-center.
273                // We only make max_retry_times and retry_delay large than the default in tests.
274                max_retry_times: 5,
275                retry_delay: Duration::from_secs(1),
276                max_metadata_value_size: None,
277                max_running_procedures: 128,
278            },
279            wal: self.metasrv_wal_config.clone(),
280            grpc: GrpcOptions {
281                server_addr: "127.0.0.1:3002".to_string(),
282                ..Default::default()
283            },
284            gc: self.metasrv_gc_config.clone(),
285            ..Default::default()
286        };
287
288        let metasrv = meta_srv::mocks::mock(
289            opt,
290            self.kv_backend.clone(),
291            self.meta_selector.clone(),
292            Some(datanode_clients.clone()),
293            None,
294        )
295        .await;
296
297        let datanode_instances = self
298            .build_datanodes_with_options(&metasrv, &datanode_options)
299            .await;
300
301        build_datanode_clients(datanode_clients.clone(), &datanode_instances, datanodes).await;
302
303        self.wait_datanodes_alive(metasrv.metasrv.meta_peer_client(), datanodes)
304            .await;
305
306        let mut frontend = self
307            .build_frontend(metasrv.clone(), datanode_clients, start_frontend_servers)
308            .await;
309
310        test_util::prepare_another_catalog_and_schema(&frontend.instance).await;
311
312        frontend.start().await.unwrap();
313
314        GreptimeDbCluster {
315            datanode_options,
316            guards,
317            datanode_instances,
318            kv_backend: self.kv_backend.clone(),
319            metasrv: metasrv.metasrv,
320            frontend: Arc::new(frontend),
321        }
322    }
323
324    pub async fn build(&self, start_frontend_servers: bool) -> GreptimeDbCluster {
325        let datanodes = self.datanodes.unwrap_or(4);
326        let (datanode_options, guards) = self.build_datanode_options_and_guards(datanodes).await;
327        self.build_with(datanode_options, start_frontend_servers, guards)
328            .await
329    }
330
331    async fn build_datanode_options_and_guards(
332        &self,
333        datanodes: u32,
334    ) -> (Vec<DatanodeOptions>, Vec<TestGuard>) {
335        let mut options = Vec::with_capacity(datanodes as usize);
336        let mut guards = Vec::with_capacity(datanodes as usize);
337
338        for i in 0..datanodes {
339            let datanode_id = i as u64 + 1;
340            let mut opts = if let Some(store_config) = &self.store_config {
341                let home_dir = if let Some(home_dir) = &self.shared_home_dir {
342                    home_dir.path().to_str().unwrap().to_string()
343                } else {
344                    let home_tmp_dir = create_temp_dir(&format!("gt_home_{}", &self.cluster_name));
345                    let home_dir = home_tmp_dir.path().to_str().unwrap().to_string();
346                    guards.push(TestGuard {
347                        home_guard: FileDirGuard::new(home_tmp_dir),
348                        storage_guards: Vec::new(),
349                    });
350
351                    home_dir
352                };
353
354                create_datanode_opts(
355                    store_config.clone(),
356                    vec![],
357                    home_dir,
358                    self.datanode_wal_config.clone(),
359                    self.datanode_gc_config.clone(),
360                )
361            } else {
362                let (opts, guard) = create_tmp_dir_and_datanode_opts(
363                    StorageType::File,
364                    self.store_providers.clone().unwrap_or_default(),
365                    &format!("{}-dn-{}", self.cluster_name, datanode_id),
366                    self.datanode_wal_config.clone(),
367                    self.datanode_gc_config.clone(),
368                );
369                guards.push(guard);
370
371                opts
372            };
373            opts.node_id = Some(datanode_id);
374
375            options.push(opts);
376        }
377        (options, guards)
378    }
379
380    async fn build_datanodes_with_options(
381        &self,
382        metasrv: &MockInfo,
383        options: &[DatanodeOptions],
384    ) -> HashMap<DatanodeId, Datanode> {
385        let mut instances = HashMap::with_capacity(options.len());
386
387        for opts in options {
388            let datanode = self.create_datanode(opts.clone(), metasrv.clone()).await;
389            instances.insert(opts.node_id.unwrap(), datanode);
390        }
391
392        instances
393    }
394
395    async fn wait_datanodes_alive(
396        &self,
397        meta_peer_client: &MetaPeerClientRef,
398        expected_datanodes: usize,
399    ) {
400        for _ in 0..100 {
401            let alive_datanodes = discovery::utils::alive_datanodes(
402                &DefaultSystemTimer,
403                meta_peer_client.as_ref(),
404                Duration::from_secs(u64::MAX),
405                None,
406            )
407            .await
408            .unwrap()
409            .len();
410            if alive_datanodes == expected_datanodes {
411                return;
412            }
413            tokio::time::sleep(Duration::from_micros(100)).await
414        }
415        panic!("Some Datanodes are not alive in 10 seconds!")
416    }
417
418    async fn create_datanode(&self, opts: DatanodeOptions, metasrv: MockInfo) -> Datanode {
419        let mut meta_client = MetaClientBuilder::datanode_default_options(opts.node_id.unwrap())
420            .channel_manager(metasrv.channel_manager)
421            .build();
422        meta_client.start(&[&metasrv.server_addr]).await.unwrap();
423        let meta_client = Arc::new(meta_client);
424
425        let meta_backend = Arc::new(MetaKvBackend {
426            client: meta_client.clone(),
427        });
428
429        let layered_cache_registry = Arc::new(
430            LayeredCacheRegistryBuilder::default()
431                .add_cache_registry(build_datanode_cache_registry(meta_backend.clone()))
432                .build(),
433        );
434
435        let mut builder = DatanodeBuilder::new(opts, Plugins::default(), meta_backend);
436        builder
437            .with_cache_registry(layered_cache_registry)
438            .with_meta_client(meta_client);
439        let mut datanode = builder.build().await.unwrap();
440
441        datanode.start_heartbeat().await.unwrap();
442
443        datanode
444    }
445
446    async fn build_frontend(
447        &self,
448        metasrv: MockInfo,
449        datanode_clients: Arc<NodeClients>,
450        start_frontend_servers: bool,
451    ) -> Frontend {
452        let mut meta_client = MetaClientBuilder::frontend_default_options()
453            .channel_manager(metasrv.channel_manager)
454            .enable_access_cluster_info()
455            .build();
456        meta_client.start(&[&metasrv.server_addr]).await.unwrap();
457        let meta_client = Arc::new(meta_client);
458
459        let cached_meta_backend = Arc::new(
460            CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
461        );
462
463        let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
464            CacheRegistryBuilder::default()
465                .add_cache(cached_meta_backend.clone())
466                .build(),
467        );
468        let fundamental_cache_registry =
469            build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
470        let cache_registry = Arc::new(
471            with_default_composite_cache_registry(
472                layered_cache_builder.add_cache_registry(fundamental_cache_registry),
473            )
474            .unwrap()
475            .build(),
476        );
477
478        let information_extension = Arc::new(DistributedInformationExtension::new(
479            meta_client.clone(),
480            datanode_clients.clone(),
481        ));
482        let catalog_manager = KvBackendCatalogManagerBuilder::new(
483            information_extension,
484            cached_meta_backend.clone(),
485            cache_registry.clone(),
486        )
487        .build();
488
489        let fe_opts = self.build_frontend_options();
490
491        let instance = FrontendBuilder::new(
492            fe_opts.clone(),
493            cached_meta_backend.clone(),
494            cache_registry.clone(),
495            catalog_manager,
496            datanode_clients,
497            meta_client.clone(),
498            Arc::new(ProcessManager::new(fe_opts.grpc.server_addr.clone(), None)),
499        )
500        .with_local_cache_invalidator(cache_registry)
501        .try_build()
502        .await
503        .unwrap();
504
505        let heartbeat_task = create_heartbeat_task(&fe_opts, meta_client, &instance);
506
507        let instance = Arc::new(instance);
508
509        // Build the servers for the frontend.
510        let servers = if start_frontend_servers {
511            Services::new(fe_opts, instance.clone(), Plugins::default())
512                .build()
513                .unwrap()
514        } else {
515            ServerHandlers::default()
516        };
517
518        Frontend {
519            instance,
520            servers,
521            heartbeat_task: Some(heartbeat_task),
522        }
523    }
524
525    fn build_frontend_options(&self) -> FrontendOptions {
526        let mut fe_opts = FrontendOptions::default();
527
528        // Choose a random unused port between [14000, 24000] for local test to avoid conflicts.
529        let port_range = 14000..=24000;
530        let max_attempts = 10;
531        let localhost = "127.0.0.1";
532        let construct_addr = |port: u16| format!("{}:{}", localhost, port);
533
534        fe_opts.http.addr = construct_addr(self.choose_random_unused_port(
535            port_range.clone(),
536            max_attempts,
537            localhost,
538        ));
539
540        let grpc_port = self.choose_random_unused_port(port_range.clone(), max_attempts, localhost);
541        fe_opts.grpc.bind_addr = construct_addr(grpc_port);
542        fe_opts.grpc.server_addr = construct_addr(grpc_port);
543
544        fe_opts.mysql.addr = construct_addr(self.choose_random_unused_port(
545            port_range.clone(),
546            max_attempts,
547            localhost,
548        ));
549        fe_opts.postgres.addr =
550            construct_addr(self.choose_random_unused_port(port_range, max_attempts, localhost));
551
552        fe_opts
553    }
554
555    // Choose a random unused port between [start, end].
556    fn choose_random_unused_port(
557        &self,
558        port_range: RangeInclusive<u16>,
559        max_attempts: u16,
560        addr: &str,
561    ) -> u16 {
562        let mut rng = rand::rng();
563
564        let mut attempts = 0;
565        while attempts < max_attempts {
566            let port = rng.random_range(port_range.clone());
567            if TcpListener::bind(format!("{}:{}", addr, port)).is_ok() {
568                return port;
569            }
570            attempts += 1;
571        }
572
573        panic!("No unused port found");
574    }
575}
576
577async fn build_datanode_clients(
578    clients: Arc<NodeClients>,
579    instances: &HashMap<DatanodeId, Datanode>,
580    datanodes: usize,
581) {
582    for i in 0..datanodes {
583        let datanode_id = i as u64 + 1;
584        let instance = instances.get(&datanode_id).unwrap();
585        let (addr, client) = create_datanode_client(instance).await;
586        clients
587            .insert_client(Peer::new(datanode_id, addr), client)
588            .await;
589    }
590}
591
592async fn create_datanode_client(datanode: &Datanode) -> (String, Client) {
593    let (client, server) = tokio::io::duplex(1024);
594
595    let runtime = RuntimeBuilder::default()
596        .worker_threads(2)
597        .thread_name("grpc-handlers")
598        .build()
599        .unwrap();
600
601    let flight_handler = FlightCraftWrapper(datanode.region_server());
602
603    let region_server_handler =
604        RegionServerRequestHandler::new(Arc::new(datanode.region_server()), runtime);
605
606    let _handle = tokio::spawn(async move {
607        Server::builder()
608            .add_service(
609                FlightServiceServer::new(flight_handler)
610                    .accept_compressed(CompressionEncoding::Gzip)
611                    .accept_compressed(CompressionEncoding::Zstd)
612                    .send_compressed(CompressionEncoding::Gzip)
613                    .send_compressed(CompressionEncoding::Zstd),
614            )
615            .add_service(
616                RegionServer::new(region_server_handler)
617                    .accept_compressed(CompressionEncoding::Gzip)
618                    .accept_compressed(CompressionEncoding::Zstd)
619                    .send_compressed(CompressionEncoding::Gzip)
620                    .send_compressed(CompressionEncoding::Zstd),
621            )
622            .serve_with_incoming(futures::stream::iter(vec![Ok::<_, std::io::Error>(server)]))
623            .await
624    });
625
626    // Move client to an option so we can _move_ the inner value
627    // on the first attempt to connect. All other attempts will fail.
628    let mut client = Some(client);
629    // `PEER_PLACEHOLDER_ADDR` is just a placeholder, does not actually connect to it.
630    let addr = PEER_PLACEHOLDER_ADDR;
631    let channel_manager = ChannelManager::new();
632    let _ = channel_manager
633        .reset_with_connector(
634            addr,
635            service_fn(move |_| {
636                let client = client.take();
637
638                async move {
639                    if let Some(client) = client {
640                        Ok(TokioIo::new(client))
641                    } else {
642                        Err(std::io::Error::other("Client already taken"))
643                    }
644                }
645            }),
646        )
647        .unwrap();
648    (
649        addr.to_string(),
650        Client::with_manager_and_urls(channel_manager, vec![addr]),
651    )
652}