Skip to main content

store_api/storage/
file.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::fmt;
17use std::fmt::Debug;
18use std::str::FromStr;
19
20use serde::{Deserialize, Serialize};
21use snafu::{ResultExt, Snafu};
22use uuid::Uuid;
23
24use crate::ManifestVersion;
25use crate::storage::RegionId;
26
27/// Index version
28pub type IndexVersion = u64;
29
30#[derive(Debug, Snafu, PartialEq)]
31pub struct ParseIdError {
32    source: uuid::Error,
33}
34
35/// Unique id for [SST File].
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
37pub struct FileId(Uuid);
38
39impl FileId {
40    /// Returns a new unique [FileId] randomly.
41    pub fn random() -> FileId {
42        FileId(Uuid::new_v4())
43    }
44
45    /// Parses id from string.
46    pub fn parse_str(input: &str) -> std::result::Result<FileId, ParseIdError> {
47        Uuid::parse_str(input).map(FileId).context(ParseIdSnafu)
48    }
49
50    /// Converts [FileId] as byte slice.
51    pub fn as_bytes(&self) -> &[u8] {
52        self.0.as_bytes()
53    }
54}
55
56impl From<FileId> for Uuid {
57    fn from(value: FileId) -> Self {
58        value.0
59    }
60}
61
62impl fmt::Display for FileId {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        write!(f, "{}", self.0)
65    }
66}
67
68impl FromStr for FileId {
69    type Err = ParseIdError;
70
71    fn from_str(s: &str) -> std::result::Result<FileId, ParseIdError> {
72        FileId::parse_str(s)
73    }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct FileRef {
78    pub region_id: RegionId,
79    pub file_id: FileId,
80    pub index_version: Option<IndexVersion>,
81}
82
83impl FileRef {
84    pub fn new(region_id: RegionId, file_id: FileId, index_version: Option<IndexVersion>) -> Self {
85        Self {
86            region_id,
87            file_id,
88            index_version,
89        }
90    }
91}
92
93/// The tmp file manifest which record a table's file references.
94/// Also record the manifest version when these tmp files are read.
95#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
96pub struct FileRefsManifest {
97    pub file_refs: HashMap<RegionId, HashSet<FileRef>>,
98    /// Manifest version when this manifest is read for its files
99    pub manifest_version: HashMap<RegionId, ManifestVersion>,
100    /// Cross-region file ownership mapping.
101    ///
102    /// Key is the source/original region id (before repartition); value is the set of
103    /// target/destination region ids (after repartition) that currently hold files
104    /// originally coming from that source region.
105    ///
106    pub cross_region_refs: HashMap<RegionId, HashSet<RegionId>>,
107}
108
109#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
110pub struct GcReport {
111    /// Deleted SST/parquet file ids per region. Index-only deletions are reported via
112    /// `deleted_indexes` because a naked `FileId` cannot distinguish index versions.
113    /// TODO(discord9): change to `RemovedFile`?
114    pub deleted_files: HashMap<RegionId, Vec<FileId>>,
115    pub deleted_indexes: HashMap<RegionId, Vec<(FileId, IndexVersion)>>,
116    /// Regions that need retry in next gc round, usually because their tmp ref files are outdated
117    pub need_retry_regions: HashSet<RegionId>,
118    /// Regions successfully processed in this GC run
119    pub processed_regions: HashSet<RegionId>,
120}
121
122impl GcReport {
123    pub fn new(
124        deleted_files: HashMap<RegionId, Vec<FileId>>,
125        deleted_indexes: HashMap<RegionId, Vec<(FileId, IndexVersion)>>,
126        need_retry_regions: HashSet<RegionId>,
127    ) -> Self {
128        Self {
129            deleted_files,
130            deleted_indexes,
131            need_retry_regions,
132            processed_regions: HashSet::new(),
133        }
134    }
135
136    pub fn merge(&mut self, other: GcReport) {
137        for (region, files) in other.deleted_files {
138            let self_files = self.deleted_files.entry(region).or_default();
139            let dedup: HashSet<FileId> = HashSet::from_iter(
140                std::mem::take(self_files)
141                    .into_iter()
142                    .chain(files.iter().cloned()),
143            );
144            *self_files = dedup.into_iter().collect();
145        }
146        for (region, files) in other.deleted_indexes {
147            let self_files = self.deleted_indexes.entry(region).or_default();
148            let dedup: HashSet<(FileId, IndexVersion)> = HashSet::from_iter(
149                std::mem::take(self_files)
150                    .into_iter()
151                    .chain(files.iter().cloned()),
152            );
153            *self_files = dedup.into_iter().collect();
154        }
155        self.need_retry_regions.extend(other.need_retry_regions);
156        self.processed_regions.extend(other.processed_regions);
157        // Remove regions that have succeeded from need_retry_regions
158        self.need_retry_regions
159            .retain(|region| !self.deleted_files.contains_key(region));
160    }
161}
162
163#[cfg(test)]
164mod tests {
165
166    use super::*;
167
168    #[test]
169    fn test_file_id() {
170        let id = FileId::random();
171        let uuid_str = id.to_string();
172        assert_eq!(id.0.to_string(), uuid_str);
173
174        let parsed = FileId::parse_str(&uuid_str).unwrap();
175        assert_eq!(id, parsed);
176        let parsed = uuid_str.parse().unwrap();
177        assert_eq!(id, parsed);
178    }
179
180    #[test]
181    fn test_file_id_serialization() {
182        let id = FileId::random();
183        let json = serde_json::to_string(&id).unwrap();
184        assert_eq!(format!("\"{id}\""), json);
185
186        let parsed = serde_json::from_str(&json).unwrap();
187        assert_eq!(id, parsed);
188    }
189
190    #[test]
191    fn test_file_refs_manifest_serialization() {
192        let mut manifest = FileRefsManifest::default();
193        let r0 = RegionId::new(1024, 1);
194        let r1 = RegionId::new(1024, 2);
195        manifest
196            .file_refs
197            .insert(r0, [FileRef::new(r0, FileId::random(), None)].into());
198        manifest
199            .file_refs
200            .insert(r1, [FileRef::new(r1, FileId::random(), None)].into());
201        manifest.manifest_version.insert(r0, 10);
202        manifest.manifest_version.insert(r1, 20);
203        manifest.cross_region_refs.insert(r0, [r1].into());
204        manifest.cross_region_refs.insert(r1, [r0].into());
205
206        let json = serde_json::to_string(&manifest).unwrap();
207        let parsed: FileRefsManifest = serde_json::from_str(&json).unwrap();
208        assert_eq!(manifest, parsed);
209    }
210
211    #[test]
212    fn test_file_ref_new() {
213        let region_id = RegionId::new(1024, 1);
214        let file_id = FileId::random();
215
216        // Test with Some(index_version)
217        let index_version: IndexVersion = 42;
218        let file_ref = FileRef::new(region_id, file_id, Some(index_version));
219        assert_eq!(file_ref.region_id, region_id);
220        assert_eq!(file_ref.file_id, file_id);
221        assert_eq!(file_ref.index_version, Some(index_version));
222
223        // Test with None
224        let file_ref_none = FileRef::new(region_id, file_id, None);
225        assert_eq!(file_ref_none.region_id, region_id);
226        assert_eq!(file_ref_none.file_id, file_id);
227        assert_eq!(file_ref_none.index_version, None);
228    }
229
230    #[test]
231    fn test_file_ref_equality() {
232        let region_id = RegionId::new(1024, 1);
233        let file_id = FileId::random();
234
235        let file_ref1 = FileRef::new(region_id, file_id, Some(10));
236        let file_ref2 = FileRef::new(region_id, file_id, Some(10));
237        let file_ref3 = FileRef::new(region_id, file_id, Some(20));
238        let file_ref4 = FileRef::new(region_id, file_id, None);
239
240        assert_eq!(file_ref1, file_ref2);
241        assert_ne!(file_ref1, file_ref3);
242        assert_ne!(file_ref1, file_ref4);
243        assert_ne!(file_ref3, file_ref4);
244
245        // Test equality with Some(0) vs None
246        let file_ref_zero = FileRef::new(region_id, file_id, Some(0));
247        assert_ne!(file_ref_zero, file_ref4);
248    }
249
250    #[test]
251    fn test_file_ref_serialization() {
252        let region_id = RegionId::new(1024, 1);
253        let file_id = FileId::random();
254
255        // Test with Some(index_version)
256        let index_version: IndexVersion = 12345;
257        let file_ref = FileRef::new(region_id, file_id, Some(index_version));
258
259        let json = serde_json::to_string(&file_ref).unwrap();
260        let parsed: FileRef = serde_json::from_str(&json).unwrap();
261
262        assert_eq!(file_ref, parsed);
263        assert_eq!(parsed.index_version, Some(index_version));
264
265        // Test with None
266        let file_ref_none = FileRef::new(region_id, file_id, None);
267        let json_none = serde_json::to_string(&file_ref_none).unwrap();
268        let parsed_none: FileRef = serde_json::from_str(&json_none).unwrap();
269
270        assert_eq!(file_ref_none, parsed_none);
271        assert_eq!(parsed_none.index_version, None);
272    }
273}