Skip to main content

tests_integration/
cluster.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeSet, HashMap};
16use std::env;
17use std::net::TcpListener;
18use std::ops::RangeInclusive;
19use std::sync::Arc;
20use std::time::Duration;
21
22use api::v1::region::region_server::RegionServer;
23use arrow_flight::flight_service_server::FlightServiceServer;
24use cache::{
25    build_datanode_cache_registry, build_fundamental_cache_registry,
26    with_default_composite_cache_registry,
27};
28use catalog::information_extension::DistributedInformationExtension;
29use catalog::kvbackend::{
30    CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, new_read_only_meta_kv_backend,
31};
32use catalog::process_manager::ProcessManager;
33use client::Client;
34use client::client_manager::NodeClients;
35use cmd::frontend::create_heartbeat_task;
36use common_base::Plugins;
37use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
38use common_meta::DatanodeId;
39use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
40use common_meta::kv_backend::KvBackendRef;
41use common_meta::kv_backend::chroot::ChrootKvBackend;
42use common_meta::kv_backend::etcd::EtcdStore;
43use common_meta::kv_backend::memory::MemoryKvBackend;
44use common_meta::peer::Peer;
45use common_runtime::Builder as RuntimeBuilder;
46use common_runtime::runtime::BuilderBuild;
47use common_test_util::temp_dir::create_temp_dir;
48use common_time::util::DefaultSystemTimer;
49use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
50use datanode::config::DatanodeOptions;
51use datanode::datanode::{Datanode, DatanodeBuilder, ProcedureConfig};
52use frontend::frontend::{Frontend, FrontendOptions};
53use frontend::instance::Instance as FeInstance;
54use frontend::instance::builder::FrontendBuilder;
55use frontend::server::Services;
56use futures::TryStreamExt;
57use hyper_util::rt::TokioIo;
58use meta_client::client::MetaClientBuilder;
59use meta_srv::cluster::MetaPeerClientRef;
60use meta_srv::discovery;
61use meta_srv::gc::GcSchedulerOptions;
62use meta_srv::metasrv::{Metasrv, MetasrvOptions, SelectorRef};
63use meta_srv::mocks::MockInfo;
64use mito2::gc::GcConfig;
65use mito2::region::MitoRegionRef;
66use object_store::config::ObjectStoreConfig;
67use rand::Rng;
68use servers::grpc::GrpcOptions;
69use servers::grpc::flight::FlightCraftWrapper;
70use servers::grpc::region_server::RegionServerRequestHandler;
71use servers::server::ServerHandlers;
72use store_api::storage::RegionId;
73use tempfile::TempDir;
74use tonic::codec::CompressionEncoding;
75use tonic::transport::Server;
76use tower::service_fn;
77use uuid::Uuid;
78
79use crate::test_util::{
80    self, FileDirGuard, PEER_PLACEHOLDER_ADDR, StorageType, TestGuard, create_datanode_opts,
81    create_tmp_dir_and_datanode_opts,
82};
83
84pub struct GreptimeDbCluster {
85    pub guards: Vec<TestGuard>,
86    pub datanode_options: Vec<DatanodeOptions>,
87
88    pub datanode_instances: HashMap<DatanodeId, Datanode>,
89    pub kv_backend: KvBackendRef,
90    pub metasrv: Arc<Metasrv>,
91    pub frontend: Arc<Frontend>,
92}
93
94impl GreptimeDbCluster {
95    pub fn fe_instance(&self) -> &Arc<FeInstance> {
96        &self.frontend.instance
97    }
98
99    /// List all SST files from all datanodes.
100    pub async fn list_sst_files_from_all_datanodes(&self) -> BTreeSet<String> {
101        let mut sst_files = BTreeSet::new();
102
103        for datanode in self.datanode_instances.values() {
104            let region_server = datanode.region_server();
105            let mito = region_server.mito_engine().unwrap();
106            let all_files = mito
107                .all_ssts_from_storage()
108                .try_collect::<Vec<_>>()
109                .await
110                .unwrap()
111                .into_iter()
112                .map(|e| e.file_path)
113                .collect::<Vec<_>>();
114            sst_files.extend(all_files);
115        }
116
117        sst_files
118    }
119
120    /// List all SST files from the manifests of all datanodes.
121    pub async fn list_sst_files_from_manifests(&self) -> BTreeSet<String> {
122        let mut sst_files = BTreeSet::new();
123
124        for datanode in self.datanode_instances.values() {
125            let region_server = datanode.region_server();
126            let mito = region_server.mito_engine().unwrap();
127            let all_files = mito
128                .all_ssts_from_manifest()
129                .await
130                .into_iter()
131                .flat_map(|e| {
132                    if let Some(index_file_path) = e.index_file_path {
133                        vec![e.file_path, index_file_path]
134                    } else {
135                        vec![e.file_path]
136                    }
137                })
138                .collect::<BTreeSet<_>>();
139
140            sst_files.extend(all_files);
141        }
142
143        sst_files
144    }
145
146    pub async fn list_all_regions(&self) -> HashMap<RegionId, MitoRegionRef> {
147        let mut regions = HashMap::new();
148
149        for datanode in self.datanode_instances.values() {
150            let region_server = datanode.region_server();
151            let mito = region_server.mito_engine().unwrap();
152            for region in mito.regions() {
153                regions.insert(region.region_id(), region);
154            }
155        }
156
157        regions
158    }
159}
160
161pub struct GreptimeDbClusterBuilder {
162    cluster_name: String,
163    kv_backend: KvBackendRef,
164    store_config: Option<ObjectStoreConfig>,
165    store_providers: Option<Vec<StorageType>>,
166    datanodes: Option<u32>,
167    datanode_wal_config: DatanodeWalConfig,
168    metasrv_wal_config: MetasrvWalConfig,
169    datanode_gc_config: GcConfig,
170    metasrv_gc_config: GcSchedulerOptions,
171    shared_home_dir: Option<Arc<TempDir>>,
172    meta_selector: Option<SelectorRef>,
173}
174
175impl GreptimeDbClusterBuilder {
176    pub async fn new(cluster_name: &str) -> Self {
177        let endpoints = env::var("GT_ETCD_ENDPOINTS").unwrap_or_default();
178
179        let kv_backend: KvBackendRef = if endpoints.is_empty() {
180            Arc::new(MemoryKvBackend::new())
181        } else {
182            let endpoints = endpoints
183                .split(',')
184                .map(|s| s.to_string())
185                .collect::<Vec<String>>();
186            let backend = EtcdStore::with_endpoints(endpoints, 128)
187                .await
188                .expect("malformed endpoints");
189            // Each retry requires a new isolation namespace.
190            let chroot = format!("{}{}", cluster_name, Uuid::new_v4());
191            Arc::new(ChrootKvBackend::new(chroot.into(), backend))
192        };
193
194        Self {
195            cluster_name: cluster_name.to_string(),
196            kv_backend,
197            store_config: None,
198            store_providers: None,
199            datanodes: None,
200            datanode_wal_config: DatanodeWalConfig::default(),
201            metasrv_wal_config: MetasrvWalConfig::default(),
202            datanode_gc_config: GcConfig::default(),
203            metasrv_gc_config: GcSchedulerOptions::default(),
204            shared_home_dir: None,
205            meta_selector: None,
206        }
207    }
208
209    #[must_use]
210    pub fn with_store_config(mut self, store_config: ObjectStoreConfig) -> Self {
211        self.store_config = Some(store_config);
212        self
213    }
214
215    #[must_use]
216    pub fn with_store_providers(mut self, store_providers: Vec<StorageType>) -> Self {
217        self.store_providers = Some(store_providers);
218        self
219    }
220
221    #[must_use]
222    pub fn with_datanodes(mut self, datanodes: u32) -> Self {
223        self.datanodes = Some(datanodes);
224        self
225    }
226
227    #[must_use]
228    pub fn with_datanode_wal_config(mut self, datanode_wal_config: DatanodeWalConfig) -> Self {
229        self.datanode_wal_config = datanode_wal_config;
230        self
231    }
232
233    #[must_use]
234    pub fn with_metasrv_wal_config(mut self, metasrv_wal_config: MetasrvWalConfig) -> Self {
235        self.metasrv_wal_config = metasrv_wal_config;
236        self
237    }
238
239    #[must_use]
240    pub fn with_datanode_gc_config(mut self, datanode_gc_config: GcConfig) -> Self {
241        self.datanode_gc_config = datanode_gc_config;
242        self
243    }
244
245    pub fn with_metasrv_gc_config(mut self, metasrv_gc_config: GcSchedulerOptions) -> Self {
246        self.metasrv_gc_config = metasrv_gc_config;
247        self
248    }
249
250    #[must_use]
251    pub fn with_shared_home_dir(mut self, shared_home_dir: Arc<TempDir>) -> Self {
252        self.shared_home_dir = Some(shared_home_dir);
253        self
254    }
255
256    #[must_use]
257    pub fn with_meta_selector(mut self, selector: SelectorRef) -> Self {
258        self.meta_selector = Some(selector);
259        self
260    }
261
262    pub async fn build_with(
263        &self,
264        datanode_options: Vec<DatanodeOptions>,
265        start_frontend_servers: bool,
266        guards: Vec<TestGuard>,
267    ) -> GreptimeDbCluster {
268        let datanodes = datanode_options.len();
269        let channel_config = ChannelConfig::new().timeout(Some(Duration::from_secs(20)));
270        let datanode_clients = Arc::new(NodeClients::new(channel_config));
271
272        let opt = MetasrvOptions {
273            procedure: ProcedureConfig {
274                // Due to large network delay during cross data-center.
275                // We only make max_retry_times and retry_delay large than the default in tests.
276                max_retry_times: 5,
277                retry_delay: Duration::from_secs(1),
278                max_metadata_value_size: None,
279                max_running_procedures: 128,
280            },
281            wal: self.metasrv_wal_config.clone(),
282            grpc: GrpcOptions {
283                server_addr: "127.0.0.1:3002".to_string(),
284                ..Default::default()
285            },
286            gc: self.metasrv_gc_config.clone(),
287            ..Default::default()
288        };
289
290        test_util::prepare_another_catalog_and_schema_with_kv_backend(self.kv_backend.clone())
291            .await;
292
293        let metasrv = meta_srv::mocks::mock(
294            opt,
295            self.kv_backend.clone(),
296            self.meta_selector.clone(),
297            Some(datanode_clients.clone()),
298            None,
299        )
300        .await;
301
302        let datanode_instances = self
303            .build_datanodes_with_options(&metasrv, &datanode_options)
304            .await;
305
306        build_datanode_clients(datanode_clients.clone(), &datanode_instances, datanodes).await;
307
308        self.wait_datanodes_alive(metasrv.metasrv.meta_peer_client(), datanodes)
309            .await;
310
311        let mut frontend = self
312            .build_frontend(metasrv.clone(), datanode_clients, start_frontend_servers)
313            .await;
314
315        frontend.start().await.unwrap();
316
317        GreptimeDbCluster {
318            datanode_options,
319            guards,
320            datanode_instances,
321            kv_backend: self.kv_backend.clone(),
322            metasrv: metasrv.metasrv,
323            frontend: Arc::new(frontend),
324        }
325    }
326
327    pub async fn build(&self, start_frontend_servers: bool) -> GreptimeDbCluster {
328        let datanodes = self.datanodes.unwrap_or(4);
329        let (datanode_options, guards) = self.build_datanode_options_and_guards(datanodes).await;
330        self.build_with(datanode_options, start_frontend_servers, guards)
331            .await
332    }
333
334    async fn build_datanode_options_and_guards(
335        &self,
336        datanodes: u32,
337    ) -> (Vec<DatanodeOptions>, Vec<TestGuard>) {
338        let mut options = Vec::with_capacity(datanodes as usize);
339        let mut guards = Vec::with_capacity(datanodes as usize);
340
341        for i in 0..datanodes {
342            let datanode_id = i as u64 + 1;
343            let mut opts = if let Some(store_config) = &self.store_config {
344                let home_dir = if let Some(home_dir) = &self.shared_home_dir {
345                    home_dir.path().to_str().unwrap().to_string()
346                } else {
347                    let home_tmp_dir = create_temp_dir(&format!("gt_home_{}", &self.cluster_name));
348                    let home_dir = home_tmp_dir.path().to_str().unwrap().to_string();
349                    guards.push(TestGuard {
350                        home_guard: FileDirGuard::new(home_tmp_dir),
351                        storage_guards: Vec::new(),
352                    });
353
354                    home_dir
355                };
356
357                create_datanode_opts(
358                    store_config.clone(),
359                    vec![],
360                    home_dir,
361                    self.datanode_wal_config.clone(),
362                    self.datanode_gc_config.clone(),
363                )
364            } else {
365                let (opts, guard) = create_tmp_dir_and_datanode_opts(
366                    StorageType::File,
367                    self.store_providers.clone().unwrap_or_default(),
368                    &format!("{}-dn-{}", self.cluster_name, datanode_id),
369                    self.datanode_wal_config.clone(),
370                    self.datanode_gc_config.clone(),
371                );
372                guards.push(guard);
373
374                opts
375            };
376            opts.node_id = Some(datanode_id);
377
378            options.push(opts);
379        }
380        (options, guards)
381    }
382
383    async fn build_datanodes_with_options(
384        &self,
385        metasrv: &MockInfo,
386        options: &[DatanodeOptions],
387    ) -> HashMap<DatanodeId, Datanode> {
388        let mut instances = HashMap::with_capacity(options.len());
389
390        for opts in options {
391            let datanode = self.create_datanode(opts.clone(), metasrv.clone()).await;
392            instances.insert(opts.node_id.unwrap(), datanode);
393        }
394
395        instances
396    }
397
398    async fn wait_datanodes_alive(
399        &self,
400        meta_peer_client: &MetaPeerClientRef,
401        expected_datanodes: usize,
402    ) {
403        for _ in 0..100 {
404            let alive_datanodes = discovery::utils::alive_datanodes(
405                &DefaultSystemTimer,
406                meta_peer_client.as_ref(),
407                Duration::from_secs(u64::MAX),
408                None,
409            )
410            .await
411            .unwrap()
412            .len();
413            if alive_datanodes == expected_datanodes {
414                return;
415            }
416            tokio::time::sleep(Duration::from_micros(100)).await
417        }
418        panic!("Some Datanodes are not alive in 10 seconds!")
419    }
420
421    async fn create_datanode(&self, opts: DatanodeOptions, metasrv: MockInfo) -> Datanode {
422        let mut meta_client = MetaClientBuilder::datanode_default_options(opts.node_id.unwrap())
423            .channel_manager(metasrv.channel_manager)
424            .build();
425        meta_client.start(&[&metasrv.server_addr]).await.unwrap();
426        let meta_client = Arc::new(meta_client);
427
428        let meta_backend = new_read_only_meta_kv_backend(meta_client.clone());
429
430        let layered_cache_registry = Arc::new(
431            LayeredCacheRegistryBuilder::default()
432                .add_cache_registry(build_datanode_cache_registry(meta_backend.clone()))
433                .build(),
434        );
435
436        let mut builder = DatanodeBuilder::new(opts, Plugins::default(), meta_backend);
437        builder
438            .with_cache_registry(layered_cache_registry)
439            .with_meta_client(meta_client);
440        let mut datanode = builder.build().await.unwrap();
441
442        datanode.start_heartbeat().await.unwrap();
443
444        datanode
445    }
446
447    async fn build_frontend(
448        &self,
449        metasrv: MockInfo,
450        datanode_clients: Arc<NodeClients>,
451        start_frontend_servers: bool,
452    ) -> Frontend {
453        let mut meta_client = MetaClientBuilder::frontend_default_options()
454            .channel_manager(metasrv.channel_manager)
455            .enable_access_cluster_info()
456            .build();
457        meta_client.start(&[&metasrv.server_addr]).await.unwrap();
458        let meta_client = Arc::new(meta_client);
459
460        let readonly_meta_backend = new_read_only_meta_kv_backend(meta_client.clone());
461        let cached_meta_backend =
462            Arc::new(CachedKvBackendBuilder::new(readonly_meta_backend.clone()).build());
463
464        let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
465            CacheRegistryBuilder::default()
466                .add_cache(cached_meta_backend.clone())
467                .build(),
468        );
469        let fundamental_cache_registry =
470            build_fundamental_cache_registry(readonly_meta_backend.clone());
471        let cache_registry = Arc::new(
472            with_default_composite_cache_registry(
473                layered_cache_builder.add_cache_registry(fundamental_cache_registry),
474            )
475            .unwrap()
476            .build(),
477        );
478
479        let information_extension = Arc::new(DistributedInformationExtension::new(
480            meta_client.clone(),
481            datanode_clients.clone(),
482        ));
483        let catalog_manager = KvBackendCatalogManagerBuilder::new(
484            information_extension,
485            cached_meta_backend.clone(),
486            cache_registry.clone(),
487        )
488        .build();
489
490        let fe_opts = self.build_frontend_options();
491
492        let instance = FrontendBuilder::new(
493            fe_opts.clone(),
494            cached_meta_backend.clone(),
495            cache_registry.clone(),
496            catalog_manager,
497            datanode_clients,
498            meta_client.clone(),
499            Arc::new(ProcessManager::new(fe_opts.grpc.server_addr.clone(), None)),
500        )
501        .with_local_cache_invalidator(cache_registry)
502        .try_build()
503        .await
504        .unwrap();
505
506        let heartbeat_task = create_heartbeat_task(&fe_opts, meta_client, &instance);
507
508        let instance = Arc::new(instance);
509
510        // Build the servers for the frontend.
511        let servers = if start_frontend_servers {
512            Services::new(fe_opts, instance.clone(), Plugins::default())
513                .build()
514                .unwrap()
515        } else {
516            ServerHandlers::default()
517        };
518
519        Frontend {
520            instance,
521            servers,
522            heartbeat_task: Some(heartbeat_task),
523        }
524    }
525
526    fn build_frontend_options(&self) -> FrontendOptions {
527        let mut fe_opts = FrontendOptions::default();
528
529        // Choose a random unused port between [14000, 24000] for local test to avoid conflicts.
530        let port_range = 14000..=24000;
531        let max_attempts = 10;
532        let localhost = "127.0.0.1";
533        let construct_addr = |port: u16| format!("{}:{}", localhost, port);
534
535        fe_opts.http.addr = construct_addr(self.choose_random_unused_port(
536            port_range.clone(),
537            max_attempts,
538            localhost,
539        ));
540
541        let grpc_port = self.choose_random_unused_port(port_range.clone(), max_attempts, localhost);
542        fe_opts.grpc.bind_addr = construct_addr(grpc_port);
543        fe_opts.grpc.server_addr = construct_addr(grpc_port);
544
545        fe_opts.mysql.addr = construct_addr(self.choose_random_unused_port(
546            port_range.clone(),
547            max_attempts,
548            localhost,
549        ));
550        fe_opts.postgres.addr =
551            construct_addr(self.choose_random_unused_port(port_range, max_attempts, localhost));
552
553        fe_opts
554    }
555
556    // Choose a random unused port between [start, end].
557    fn choose_random_unused_port(
558        &self,
559        port_range: RangeInclusive<u16>,
560        max_attempts: u16,
561        addr: &str,
562    ) -> u16 {
563        let mut rng = rand::rng();
564
565        let mut attempts = 0;
566        while attempts < max_attempts {
567            let port = rng.random_range(port_range.clone());
568            if TcpListener::bind(format!("{}:{}", addr, port)).is_ok() {
569                return port;
570            }
571            attempts += 1;
572        }
573
574        panic!("No unused port found");
575    }
576}
577
578async fn build_datanode_clients(
579    clients: Arc<NodeClients>,
580    instances: &HashMap<DatanodeId, Datanode>,
581    datanodes: usize,
582) {
583    for i in 0..datanodes {
584        let datanode_id = i as u64 + 1;
585        let instance = instances.get(&datanode_id).unwrap();
586        let (addr, client) = create_datanode_client(instance).await;
587        clients
588            .insert_client(Peer::new(datanode_id, addr), client)
589            .await;
590    }
591}
592
593async fn create_datanode_client(datanode: &Datanode) -> (String, Client) {
594    let (client, server) = tokio::io::duplex(1024);
595
596    let runtime = RuntimeBuilder::default()
597        .worker_threads(2)
598        .thread_name("grpc-handlers")
599        .build()
600        .unwrap();
601
602    let flight_handler = FlightCraftWrapper(datanode.region_server());
603
604    let region_server_handler =
605        RegionServerRequestHandler::new(Arc::new(datanode.region_server()), runtime);
606
607    let _handle = tokio::spawn(async move {
608        Server::builder()
609            .add_service(
610                FlightServiceServer::new(flight_handler)
611                    .accept_compressed(CompressionEncoding::Gzip)
612                    .accept_compressed(CompressionEncoding::Zstd)
613                    .send_compressed(CompressionEncoding::Gzip)
614                    .send_compressed(CompressionEncoding::Zstd),
615            )
616            .add_service(
617                RegionServer::new(region_server_handler)
618                    .accept_compressed(CompressionEncoding::Gzip)
619                    .accept_compressed(CompressionEncoding::Zstd)
620                    .send_compressed(CompressionEncoding::Gzip)
621                    .send_compressed(CompressionEncoding::Zstd),
622            )
623            .serve_with_incoming(futures::stream::iter(vec![Ok::<_, std::io::Error>(server)]))
624            .await
625    });
626
627    // Move client to an option so we can _move_ the inner value
628    // on the first attempt to connect. All other attempts will fail.
629    let mut client = Some(client);
630    // `PEER_PLACEHOLDER_ADDR` is just a placeholder, does not actually connect to it.
631    let addr = PEER_PLACEHOLDER_ADDR;
632    let channel_manager = ChannelManager::new();
633    let _ = channel_manager
634        .reset_with_connector(
635            addr,
636            service_fn(move |_| {
637                let client = client.take();
638
639                async move {
640                    if let Some(client) = client {
641                        Ok(TokioIo::new(client))
642                    } else {
643                        Err(std::io::Error::other("Client already taken"))
644                    }
645                }
646            }),
647        )
648        .unwrap();
649    (
650        addr.to_string(),
651        Client::with_manager_and_urls(channel_manager, vec![addr]),
652    )
653}