1use std::{fmt, str};
18
19use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use uuid::Uuid;
22
23pub const MANIFEST_VERSION: u32 = 1;
25
26pub const MANIFEST_FILE: &str = "manifest.json";
28
29#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
31pub struct TimeRange {
32 #[serde(skip_serializing_if = "Option::is_none")]
34 pub start: Option<DateTime<Utc>>,
35 #[serde(skip_serializing_if = "Option::is_none")]
37 pub end: Option<DateTime<Utc>>,
38}
39
40impl TimeRange {
41 pub fn new(start: Option<DateTime<Utc>>, end: Option<DateTime<Utc>>) -> Self {
43 Self { start, end }
44 }
45
46 pub fn unbounded() -> Self {
48 Self {
49 start: None,
50 end: None,
51 }
52 }
53
54 pub fn is_unbounded(&self) -> bool {
56 self.start.is_none() && self.end.is_none()
57 }
58}
59
60impl Default for TimeRange {
61 fn default() -> Self {
62 Self::unbounded()
63 }
64}
65
66#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
68#[serde(rename_all = "snake_case")]
69pub enum ChunkStatus {
70 #[default]
72 Pending,
73 InProgress,
75 Completed,
77 Failed,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct ChunkMeta {
84 pub id: u32,
86 pub time_range: TimeRange,
88 pub status: ChunkStatus,
90 #[serde(default)]
92 pub files: Vec<String>,
93 #[serde(skip_serializing_if = "Option::is_none")]
95 pub checksum: Option<String>,
96 #[serde(skip_serializing_if = "Option::is_none")]
98 pub error: Option<String>,
99}
100
101impl ChunkMeta {
102 pub fn new(id: u32, time_range: TimeRange) -> Self {
104 Self {
105 id,
106 time_range,
107 status: ChunkStatus::Pending,
108 files: vec![],
109 checksum: None,
110 error: None,
111 }
112 }
113
114 pub fn mark_in_progress(&mut self) {
116 self.status = ChunkStatus::InProgress;
117 self.error = None;
118 }
119
120 pub fn mark_completed(&mut self, files: Vec<String>, checksum: Option<String>) {
122 self.status = ChunkStatus::Completed;
123 self.files = files;
124 self.checksum = checksum;
125 self.error = None;
126 }
127
128 pub fn mark_failed(&mut self, error: String) {
130 self.status = ChunkStatus::Failed;
131 self.error = Some(error);
132 }
133}
134
135#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, clap::ValueEnum)]
137#[serde(rename_all = "lowercase")]
138#[value(rename_all = "lowercase")]
139pub enum DataFormat {
140 #[default]
142 Parquet,
143 Csv,
145 Json,
147}
148
149impl fmt::Display for DataFormat {
150 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151 match self {
152 DataFormat::Parquet => write!(f, "parquet"),
153 DataFormat::Csv => write!(f, "csv"),
154 DataFormat::Json => write!(f, "json"),
155 }
156 }
157}
158
159impl str::FromStr for DataFormat {
160 type Err = String;
161
162 fn from_str(s: &str) -> Result<Self, Self::Err> {
163 match s.to_lowercase().as_str() {
164 "parquet" => Ok(DataFormat::Parquet),
165 "csv" => Ok(DataFormat::Csv),
166 "json" => Ok(DataFormat::Json),
167 _ => Err(format!(
168 "invalid format '{}': expected one of parquet, csv, json",
169 s
170 )),
171 }
172 }
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct Manifest {
186 pub version: u32,
188 pub snapshot_id: Uuid,
190 pub catalog: String,
192 pub schemas: Vec<String>,
194 pub time_range: TimeRange,
196 pub schema_only: bool,
198 pub format: DataFormat,
200 #[serde(default)]
202 pub chunks: Vec<ChunkMeta>,
203 #[serde(skip_serializing_if = "Option::is_none")]
205 pub checksum: Option<String>,
206 pub created_at: DateTime<Utc>,
208 pub updated_at: DateTime<Utc>,
210}
211
212impl Manifest {
213 pub fn new_schema_only(catalog: String, schemas: Vec<String>) -> Self {
215 let now = Utc::now();
216 Self {
217 version: MANIFEST_VERSION,
218 snapshot_id: Uuid::new_v4(),
219 catalog,
220 schemas,
221 time_range: TimeRange::unbounded(),
222 schema_only: true,
223 format: DataFormat::Parquet,
224 chunks: vec![],
225 checksum: None,
226 created_at: now,
227 updated_at: now,
228 }
229 }
230
231 pub fn new_full(
233 catalog: String,
234 schemas: Vec<String>,
235 time_range: TimeRange,
236 format: DataFormat,
237 ) -> Self {
238 let now = Utc::now();
239 Self {
240 version: MANIFEST_VERSION,
241 snapshot_id: Uuid::new_v4(),
242 catalog,
243 schemas,
244 time_range,
245 schema_only: false,
246 format,
247 chunks: vec![],
248 checksum: None,
249 created_at: now,
250 updated_at: now,
251 }
252 }
253
254 pub fn is_complete(&self) -> bool {
256 self.schema_only
257 || (!self.chunks.is_empty()
258 && self
259 .chunks
260 .iter()
261 .all(|c| c.status == ChunkStatus::Completed))
262 }
263
264 pub fn pending_count(&self) -> usize {
266 self.chunks
267 .iter()
268 .filter(|c| c.status == ChunkStatus::Pending)
269 .count()
270 }
271
272 pub fn in_progress_count(&self) -> usize {
274 self.chunks
275 .iter()
276 .filter(|c| c.status == ChunkStatus::InProgress)
277 .count()
278 }
279
280 pub fn completed_count(&self) -> usize {
282 self.chunks
283 .iter()
284 .filter(|c| c.status == ChunkStatus::Completed)
285 .count()
286 }
287
288 pub fn failed_count(&self) -> usize {
290 self.chunks
291 .iter()
292 .filter(|c| c.status == ChunkStatus::Failed)
293 .count()
294 }
295
296 pub fn touch(&mut self) {
298 self.updated_at = Utc::now();
299 }
300
301 pub fn add_chunk(&mut self, chunk: ChunkMeta) {
303 self.chunks.push(chunk);
304 self.touch();
305 }
306
307 pub fn update_chunk(&mut self, id: u32, updater: impl FnOnce(&mut ChunkMeta)) {
309 if let Some(chunk) = self.chunks.iter_mut().find(|c| c.id == id) {
310 updater(chunk);
311 self.touch();
312 }
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319
320 #[test]
321 fn test_time_range_serialization() {
322 let range = TimeRange::unbounded();
323 let json = serde_json::to_string(&range).unwrap();
324 assert_eq!(json, "{}");
325
326 let range: TimeRange = serde_json::from_str("{}").unwrap();
327 assert!(range.is_unbounded());
328 }
329
330 #[test]
331 fn test_manifest_schema_only() {
332 let manifest =
333 Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]);
334
335 assert_eq!(manifest.version, MANIFEST_VERSION);
336 assert!(manifest.schema_only);
337 assert!(manifest.chunks.is_empty());
338 assert!(manifest.is_complete());
339 }
340
341 #[test]
342 fn test_manifest_full() {
343 let manifest = Manifest::new_full(
344 "greptime".to_string(),
345 vec!["public".to_string()],
346 TimeRange::unbounded(),
347 DataFormat::Parquet,
348 );
349
350 assert!(!manifest.schema_only);
351 assert!(manifest.chunks.is_empty());
352 assert!(!manifest.is_complete());
353 }
354
355 #[test]
356 fn test_data_format_parsing() {
357 assert_eq!(
358 "parquet".parse::<DataFormat>().unwrap(),
359 DataFormat::Parquet
360 );
361 assert_eq!("CSV".parse::<DataFormat>().unwrap(), DataFormat::Csv);
362 assert_eq!("JSON".parse::<DataFormat>().unwrap(), DataFormat::Json);
363 assert!("invalid".parse::<DataFormat>().is_err());
364 }
365
366 #[test]
367 fn test_chunk_status_transitions() {
368 let mut chunk = ChunkMeta::new(1, TimeRange::unbounded());
369 assert_eq!(chunk.status, ChunkStatus::Pending);
370
371 chunk.mark_in_progress();
372 assert_eq!(chunk.status, ChunkStatus::InProgress);
373
374 chunk.mark_completed(
375 vec!["file1.parquet".to_string()],
376 Some("abc123".to_string()),
377 );
378 assert_eq!(chunk.status, ChunkStatus::Completed);
379 assert_eq!(chunk.files.len(), 1);
380 }
381}