Skip to main content

catalog/kvbackend/
client.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::fmt::Debug;
17use std::sync::atomic::{AtomicUsize, Ordering};
18use std::sync::{Arc, Mutex};
19use std::time::Duration;
20
21use common_error::ext::BoxedError;
22use common_meta::cache_invalidator::KvCacheInvalidator;
23use common_meta::error::Error::CacheNotGet;
24use common_meta::error::{
25    CacheNotGetSnafu, Error, ExternalSnafu, GetKvCacheSnafu, Result, UnsupportedSnafu,
26};
27use common_meta::kv_backend::read_only::ReadOnlyKvBackend;
28use common_meta::kv_backend::txn::{Txn, TxnResponse};
29use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
30use common_meta::rpc::KeyValue;
31use common_meta::rpc::store::{
32    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
33    BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
34    DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
35};
36use common_telemetry::debug;
37use meta_client::client::MetaClient;
38use moka::future::{Cache, CacheBuilder};
39use snafu::{OptionExt, ResultExt};
40
41use crate::metrics::{
42    METRIC_CATALOG_KV_BATCH_GET, METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET,
43};
44
45const DEFAULT_CACHE_MAX_CAPACITY: u64 = 10000;
46const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
47const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
48
49pub struct CachedKvBackendBuilder {
50    cache_max_capacity: Option<u64>,
51    cache_ttl: Option<Duration>,
52    cache_tti: Option<Duration>,
53    inner: KvBackendRef,
54}
55
56impl CachedKvBackendBuilder {
57    pub fn new(inner: KvBackendRef) -> Self {
58        Self {
59            cache_max_capacity: None,
60            cache_ttl: None,
61            cache_tti: None,
62            inner,
63        }
64    }
65
66    pub fn cache_max_capacity(mut self, cache_max_capacity: u64) -> Self {
67        self.cache_max_capacity.replace(cache_max_capacity);
68        self
69    }
70
71    pub fn cache_ttl(mut self, cache_ttl: Duration) -> Self {
72        self.cache_ttl.replace(cache_ttl);
73        self
74    }
75
76    pub fn cache_tti(mut self, cache_tti: Duration) -> Self {
77        self.cache_tti.replace(cache_tti);
78        self
79    }
80
81    pub fn build(self) -> CachedKvBackend {
82        let cache_max_capacity = self
83            .cache_max_capacity
84            .unwrap_or(DEFAULT_CACHE_MAX_CAPACITY);
85        let cache_ttl = self.cache_ttl.unwrap_or(DEFAULT_CACHE_TTL);
86        let cache_tti = self.cache_tti.unwrap_or(DEFAULT_CACHE_TTI);
87
88        let cache = CacheBuilder::new(cache_max_capacity)
89            .time_to_live(cache_ttl)
90            .time_to_idle(cache_tti)
91            .build();
92        let kv_backend = self.inner;
93        let name = format!("CachedKvBackend({})", kv_backend.name());
94        let version = AtomicUsize::new(0);
95
96        CachedKvBackend {
97            kv_backend,
98            cache,
99            name,
100            version,
101        }
102    }
103}
104
105pub type CacheBackend = Cache<Vec<u8>, KeyValue>;
106
107/// A wrapper of `MetaKvBackend` with cache support.
108///
109/// CachedMetaKvBackend is mainly used to read metadata information from Metasrv, and provides
110/// cache for get and batch_get. One way to trigger cache invalidation of CachedMetaKvBackend:
111/// when metadata information changes, Metasrv will broadcast a metadata invalidation request.
112///
113/// Therefore, it is recommended to use CachedMetaKvBackend to only read metadata related
114/// information. Note: If you read other information, you may read expired data, which depends on
115/// TTL and TTI for cache.
116pub struct CachedKvBackend {
117    kv_backend: KvBackendRef,
118    cache: CacheBackend,
119    name: String,
120    version: AtomicUsize,
121}
122
123#[async_trait::async_trait]
124impl TxnService for CachedKvBackend {
125    type Error = Error;
126
127    async fn txn(&self, txn: Txn) -> std::result::Result<TxnResponse, Self::Error> {
128        // TODO(hl): txn of CachedKvBackend simply pass through to inner backend without invalidating caches.
129        self.kv_backend.txn(txn).await
130    }
131
132    fn max_txn_ops(&self) -> usize {
133        self.kv_backend.max_txn_ops()
134    }
135}
136
137#[async_trait::async_trait]
138impl KvBackend for CachedKvBackend {
139    fn name(&self) -> &str {
140        &self.name
141    }
142
143    fn as_any(&self) -> &dyn Any {
144        self
145    }
146
147    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
148        self.kv_backend.range(req).await
149    }
150
151    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
152        let key = &req.key.clone();
153
154        let ret = self.kv_backend.put(req).await;
155
156        if ret.is_ok() {
157            self.invalidate_key(key).await;
158        }
159
160        ret
161    }
162
163    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
164        let keys = req
165            .kvs
166            .iter()
167            .map(|kv| kv.key().to_vec())
168            .collect::<Vec<_>>();
169
170        let resp = self.kv_backend.batch_put(req).await;
171
172        if resp.is_ok() {
173            for key in keys {
174                self.invalidate_key(&key).await;
175            }
176        }
177
178        resp
179    }
180
181    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
182        let _timer = METRIC_CATALOG_KV_BATCH_GET.start_timer();
183
184        let mut kvs = Vec::with_capacity(req.keys.len());
185        let mut miss_keys = Vec::with_capacity(req.keys.len());
186
187        for key in req.keys {
188            if let Some(val) = self.cache.get(&key).await {
189                kvs.push(val);
190            } else {
191                miss_keys.push(key);
192            }
193        }
194
195        let batch_get_req = BatchGetRequest::new().with_keys(miss_keys.clone());
196
197        let pre_version = self.version();
198
199        let unhit_kvs = self.kv_backend.batch_get(batch_get_req).await?.kvs;
200
201        for kv in unhit_kvs.iter() {
202            self.cache.insert(kv.key().to_vec(), kv.clone()).await;
203        }
204
205        if !self.validate_version(pre_version) {
206            for key in miss_keys.iter() {
207                self.cache.invalidate(key).await;
208            }
209        }
210
211        kvs.extend(unhit_kvs);
212
213        Ok(BatchGetResponse { kvs })
214    }
215
216    async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
217        let key = &req.key.clone();
218
219        let ret = self.kv_backend.compare_and_put(req).await;
220
221        if ret.is_ok() {
222            self.invalidate_key(key).await;
223        }
224
225        ret
226    }
227
228    async fn delete_range(&self, mut req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
229        let prev_kv = req.prev_kv;
230
231        req.prev_kv = true;
232        let resp = self.kv_backend.delete_range(req).await;
233        match resp {
234            Ok(mut resp) => {
235                for prev_kv in resp.prev_kvs.iter() {
236                    self.invalidate_key(prev_kv.key()).await;
237                }
238
239                if !prev_kv {
240                    resp.prev_kvs = vec![];
241                }
242                Ok(resp)
243            }
244            Err(e) => Err(e),
245        }
246    }
247
248    async fn batch_delete(&self, mut req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
249        let prev_kv = req.prev_kv;
250
251        req.prev_kv = true;
252        let resp = self.kv_backend.batch_delete(req).await;
253        match resp {
254            Ok(mut resp) => {
255                for prev_kv in resp.prev_kvs.iter() {
256                    self.invalidate_key(prev_kv.key()).await;
257                }
258
259                if !prev_kv {
260                    resp.prev_kvs = vec![];
261                }
262                Ok(resp)
263            }
264            Err(e) => Err(e),
265        }
266    }
267
268    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
269        let _timer = METRIC_CATALOG_KV_GET.start_timer();
270
271        let pre_version = Arc::new(Mutex::new(None));
272
273        let init = async {
274            let version_clone = pre_version.clone();
275            let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer();
276
277            version_clone.lock().unwrap().replace(self.version());
278
279            self.kv_backend.get(key).await.map(|val| {
280                val.with_context(|| CacheNotGetSnafu {
281                    key: String::from_utf8_lossy(key),
282                })
283            })?
284        };
285
286        // currently moka doesn't have `optionally_try_get_with_by_ref`
287        // TODO(fys): change to moka method when available
288        // https://github.com/moka-rs/moka/issues/254
289        let ret = match self.cache.try_get_with_by_ref(key, init).await {
290            Ok(val) => Ok(Some(val)),
291            Err(e) => match e.as_ref() {
292                CacheNotGet { .. } => Ok(None),
293                _ => Err(e),
294            },
295        }
296        .map_err(|e| {
297            GetKvCacheSnafu {
298                err_msg: e.to_string(),
299            }
300            .build()
301        });
302
303        // "cache.invalidate_key" and "cache.try_get_with_by_ref" are not mutually exclusive. So we need
304        // to use the version mechanism to prevent expired data from being put into the cache.
305        if pre_version
306            .lock()
307            .unwrap()
308            .as_ref()
309            .is_some_and(|v| !self.validate_version(*v))
310        {
311            self.cache.invalidate(key).await;
312        }
313
314        ret
315    }
316}
317
318#[async_trait::async_trait]
319impl KvCacheInvalidator for CachedKvBackend {
320    async fn invalidate_key(&self, key: &[u8]) {
321        self.create_new_version();
322        self.cache.invalidate(key).await;
323        debug!("invalidated cache key: {}", String::from_utf8_lossy(key));
324    }
325}
326
327impl CachedKvBackend {
328    // only for test
329    #[cfg(test)]
330    fn wrap(kv_backend: KvBackendRef) -> Self {
331        let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
332            .time_to_live(DEFAULT_CACHE_TTL)
333            .time_to_idle(DEFAULT_CACHE_TTI)
334            .build();
335
336        let name = format!("CachedKvBackend({})", kv_backend.name());
337        Self {
338            kv_backend,
339            cache,
340            name,
341            version: AtomicUsize::new(0),
342        }
343    }
344
345    pub fn cache(&self) -> &CacheBackend {
346        &self.cache
347    }
348
349    fn version(&self) -> usize {
350        self.version.load(Ordering::Relaxed)
351    }
352
353    fn validate_version(&self, pre_version: usize) -> bool {
354        self.version() == pre_version
355    }
356
357    fn create_new_version(&self) -> usize {
358        self.version.fetch_add(1, Ordering::Relaxed) + 1
359    }
360}
361
362#[derive(Debug)]
363pub(crate) struct MetaKvBackend {
364    client: Arc<MetaClient>,
365}
366
367impl MetaKvBackend {
368    /// Constructs a [MetaKvBackend].
369    fn new(client: Arc<MetaClient>) -> MetaKvBackend {
370        MetaKvBackend { client }
371    }
372}
373
374pub fn new_read_only_meta_kv_backend(client: Arc<MetaClient>) -> KvBackendRef {
375    Arc::new(ReadOnlyKvBackend::new(Arc::new(MetaKvBackend::new(client))))
376}
377
378#[async_trait::async_trait]
379impl TxnService for MetaKvBackend {
380    type Error = Error;
381
382    async fn txn(&self, _txn: Txn) -> Result<TxnResponse> {
383        UnsupportedSnafu {
384            operation: "MetaKvBackend txn",
385        }
386        .fail()
387    }
388
389    fn max_txn_ops(&self) -> usize {
390        usize::MAX
391    }
392}
393
394/// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
395/// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
396/// comparing to `Accessor`'s list and get method.
397#[async_trait::async_trait]
398impl KvBackend for MetaKvBackend {
399    fn name(&self) -> &str {
400        "MetaKvBackend"
401    }
402
403    fn as_any(&self) -> &dyn Any {
404        self
405    }
406
407    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
408        self.client
409            .range(req)
410            .await
411            .map_err(BoxedError::new)
412            .context(ExternalSnafu)
413    }
414
415    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
416        self.client
417            .put(req)
418            .await
419            .map_err(BoxedError::new)
420            .context(ExternalSnafu)
421    }
422
423    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
424        self.client
425            .batch_put(req)
426            .await
427            .map_err(BoxedError::new)
428            .context(ExternalSnafu)
429    }
430
431    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
432        self.client
433            .batch_get(req)
434            .await
435            .map_err(BoxedError::new)
436            .context(ExternalSnafu)
437    }
438
439    async fn compare_and_put(
440        &self,
441        request: CompareAndPutRequest,
442    ) -> Result<CompareAndPutResponse> {
443        self.client
444            .compare_and_put(request)
445            .await
446            .map_err(BoxedError::new)
447            .context(ExternalSnafu)
448    }
449
450    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
451        self.client
452            .delete_range(req)
453            .await
454            .map_err(BoxedError::new)
455            .context(ExternalSnafu)
456    }
457
458    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
459        self.client
460            .batch_delete(req)
461            .await
462            .map_err(BoxedError::new)
463            .context(ExternalSnafu)
464    }
465
466    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
467        let mut response = self
468            .client
469            .range(RangeRequest::new().with_key(key))
470            .await
471            .map_err(BoxedError::new)
472            .context(ExternalSnafu)?;
473        Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
474            key: kv.take_key(),
475            value: kv.take_value(),
476        }))
477    }
478}
479
480#[cfg(test)]
481mod tests {
482    use std::any::Any;
483    use std::sync::Arc;
484    use std::sync::atomic::{AtomicU32, Ordering};
485
486    use async_trait::async_trait;
487    use common_meta::kv_backend::memory::MemoryKvBackend;
488    use common_meta::kv_backend::read_only::ReadOnlyKvBackend;
489    use common_meta::kv_backend::txn::{Txn, TxnOp};
490    use common_meta::kv_backend::{KvBackend, TxnService};
491    use common_meta::rpc::KeyValue;
492    use common_meta::rpc::store::{
493        BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
494        BatchPutRequest, BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest,
495        PutResponse, RangeRequest, RangeResponse,
496    };
497    use dashmap::DashMap;
498    use meta_client::client::MetaClientBuilder;
499
500    use super::{CachedKvBackend, new_read_only_meta_kv_backend};
501
502    #[derive(Default)]
503    pub struct SimpleKvBackend {
504        inner_map: DashMap<Vec<u8>, Vec<u8>>,
505        get_execute_times: Arc<AtomicU32>,
506    }
507
508    impl TxnService for SimpleKvBackend {
509        type Error = common_meta::error::Error;
510    }
511
512    #[async_trait]
513    impl KvBackend for SimpleKvBackend {
514        fn name(&self) -> &str {
515            "SimpleKvBackend"
516        }
517
518        fn as_any(&self) -> &dyn Any {
519            self
520        }
521
522        async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse, Self::Error> {
523            let mut kvs = Vec::with_capacity(req.keys.len());
524            for key in req.keys.iter() {
525                if let Some(kv) = self.get(key).await? {
526                    kvs.push(kv);
527                }
528            }
529            Ok(BatchGetResponse { kvs })
530        }
531
532        async fn put(&self, req: PutRequest) -> Result<PutResponse, Self::Error> {
533            self.inner_map.insert(req.key, req.value);
534            // always return None as prev_kv, since we don't use it in this test.
535            Ok(PutResponse { prev_kv: None })
536        }
537
538        async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>, Self::Error> {
539            self.get_execute_times
540                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
541            Ok(self.inner_map.get(key).map(|v| KeyValue {
542                key: key.to_vec(),
543                value: v.value().clone(),
544            }))
545        }
546
547        async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
548            unimplemented!()
549        }
550
551        async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
552            unimplemented!()
553        }
554
555        async fn delete_range(
556            &self,
557            _req: DeleteRangeRequest,
558        ) -> Result<DeleteRangeResponse, Self::Error> {
559            unimplemented!()
560        }
561
562        async fn batch_delete(
563            &self,
564            _req: BatchDeleteRequest,
565        ) -> Result<BatchDeleteResponse, Self::Error> {
566            unimplemented!()
567        }
568    }
569
570    #[tokio::test]
571    async fn test_cached_kv_backend() {
572        let simple_kv = Arc::new(SimpleKvBackend::default());
573        let get_execute_times = simple_kv.get_execute_times.clone();
574        let cached_kv = CachedKvBackend::wrap(simple_kv);
575
576        add_some_vals(&cached_kv).await;
577
578        let batch_get_req = BatchGetRequest {
579            keys: vec![b"k1".to_vec(), b"k2".to_vec()],
580        };
581
582        assert_eq!(get_execute_times.load(Ordering::SeqCst), 0);
583
584        for _ in 0..10 {
585            let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
586
587            assert_eq!(get_execute_times.load(Ordering::SeqCst), 2);
588        }
589
590        let batch_get_req = BatchGetRequest {
591            keys: vec![b"k1".to_vec(), b"k2".to_vec(), b"k3".to_vec()],
592        };
593
594        let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
595
596        assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
597
598        for _ in 0..10 {
599            let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
600
601            assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
602        }
603    }
604
605    #[tokio::test]
606    async fn test_cached_kv_backend_rejects_writes_with_read_only_inner() {
607        let inner = Arc::new(MemoryKvBackend::<common_meta::error::Error>::new());
608        let cached_kv = CachedKvBackend::wrap(Arc::new(ReadOnlyKvBackend::new(inner)));
609
610        let err = cached_kv
611            .put(PutRequest {
612                key: b"k1".to_vec(),
613                value: b"v1".to_vec(),
614                prev_kv: false,
615            })
616            .await
617            .unwrap_err();
618
619        assert!(matches!(
620            err,
621            common_meta::error::Error::ReadOnlyKvBackend { .. }
622        ));
623    }
624
625    #[tokio::test]
626    async fn test_read_only_meta_kv_backend_rejects_writes() {
627        let meta_client = Arc::new(MetaClientBuilder::frontend_default_options().build());
628        let backend = new_read_only_meta_kv_backend(meta_client);
629
630        let err = backend
631            .put(PutRequest {
632                key: b"k1".to_vec(),
633                value: b"v1".to_vec(),
634                prev_kv: false,
635            })
636            .await
637            .unwrap_err();
638
639        assert!(matches!(
640            err,
641            common_meta::error::Error::ReadOnlyKvBackend { .. }
642        ));
643    }
644
645    #[tokio::test]
646    async fn test_read_only_meta_kv_backend_does_not_emulate_txn() {
647        let meta_client = Arc::new(MetaClientBuilder::frontend_default_options().build());
648        let backend = new_read_only_meta_kv_backend(meta_client);
649
650        let result = backend
651            .txn(Txn::new().and_then(vec![TxnOp::Get(b"k1".to_vec())]))
652            .await;
653        let err = match result {
654            Ok(_) => panic!("expected unsupported txn error"),
655            Err(err) => err,
656        };
657
658        assert!(matches!(err, common_meta::error::Error::Unsupported { .. }));
659    }
660
661    async fn add_some_vals(kv_backend: &impl KvBackend) {
662        kv_backend
663            .put(PutRequest {
664                key: b"k1".to_vec(),
665                value: b"v1".to_vec(),
666                prev_kv: false,
667            })
668            .await
669            .unwrap();
670
671        kv_backend
672            .put(PutRequest {
673                key: b"k2".to_vec(),
674                value: b"v2".to_vec(),
675                prev_kv: false,
676            })
677            .await
678            .unwrap();
679
680        kv_backend
681            .put(PutRequest {
682                key: b"k3".to_vec(),
683                value: b"v3".to_vec(),
684                prev_kv: false,
685            })
686            .await
687            .unwrap();
688    }
689}