1pub mod encoder;
18pub(crate) mod entry_distributor;
19pub(crate) mod entry_reader;
20pub(crate) mod raw_entry_reader;
21
22use std::collections::HashMap;
23use std::mem;
24use std::sync::Arc;
25
26use api::v1::WalEntry;
27use common_error::ext::BoxedError;
28use common_telemetry::debug;
29use encoder::WalEntryEncoder;
30use entry_reader::NoopEntryReader;
31use futures::future::BoxFuture;
32use futures::stream::BoxStream;
33use snafu::ResultExt;
34use store_api::logstore::entry::Entry;
35use store_api::logstore::provider::Provider;
36use store_api::logstore::{AppendBatchResponse, LogStore, WalIndex};
37use store_api::storage::RegionId;
38
39use crate::error::{BuildEntrySnafu, DeleteWalSnafu, Result, WriteWalSnafu};
40use crate::wal::entry_reader::{LogStoreEntryReader, WalEntryReader};
41use crate::wal::raw_entry_reader::{LogStoreRawEntryReader, RegionRawEntryReader};
42
43pub type EntryId = store_api::logstore::entry::Id;
45pub type WalEntryStream<'a> = BoxStream<'a, Result<(EntryId, WalEntry)>>;
47
48#[derive(Debug)]
52pub struct Wal<S> {
53 store: Arc<S>,
55}
56
57impl<S> Wal<S> {
58 pub fn new(store: Arc<S>) -> Self {
60 Self { store }
61 }
62
63 pub fn store(&self) -> &Arc<S> {
64 &self.store
65 }
66}
67
68impl<S> Clone for Wal<S> {
69 fn clone(&self) -> Self {
70 Self {
71 store: Arc::clone(&self.store),
72 }
73 }
74}
75
76impl<S: LogStore> Wal<S> {
77 pub fn writer(&self) -> WalWriter<S> {
79 WalWriter {
80 store: self.store.clone(),
81 entries: Vec::new(),
82 providers: HashMap::new(),
83 encoder: WalEntryEncoder::new(),
84 }
85 }
86
87 pub(crate) fn on_region_opened(
89 &self,
90 ) -> impl FnOnce(RegionId, EntryId, &Provider) -> BoxFuture<Result<()>> {
91 let store = self.store.clone();
92 move |region_id, last_entry_id, provider| -> BoxFuture<'_, Result<()>> {
93 if let Provider::Noop = provider {
94 debug!("Skip obsolete for region: {}", region_id);
95 return Box::pin(async move { Ok(()) });
96 }
97 Box::pin(async move {
98 store
99 .obsolete(provider, region_id, last_entry_id)
100 .await
101 .map_err(BoxedError::new)
102 .context(DeleteWalSnafu { region_id })
103 })
104 }
105 }
106
107 pub(crate) fn wal_entry_reader(
109 &self,
110 provider: &Provider,
111 region_id: RegionId,
112 location_id: Option<u64>,
113 ) -> Box<dyn WalEntryReader> {
114 match provider {
115 Provider::RaftEngine(_) => Box::new(LogStoreEntryReader::new(
116 LogStoreRawEntryReader::new(self.store.clone()),
117 )),
118 Provider::Kafka(_) => {
119 let reader = if let Some(location_id) = location_id {
120 LogStoreRawEntryReader::new(self.store.clone())
121 .with_wal_index(WalIndex::new(region_id, location_id))
122 } else {
123 LogStoreRawEntryReader::new(self.store.clone())
124 };
125
126 Box::new(LogStoreEntryReader::new(RegionRawEntryReader::new(
127 reader, region_id,
128 )))
129 }
130 Provider::Noop => Box::new(NoopEntryReader),
131 }
132 }
133
134 pub fn scan<'a>(
137 &'a self,
138 region_id: RegionId,
139 start_id: EntryId,
140 provider: &'a Provider,
141 ) -> Result<WalEntryStream<'a>> {
142 let mut reader = self.wal_entry_reader(provider, region_id, None);
143 reader.read(provider, start_id)
144 }
145
146 pub async fn obsolete(
148 &self,
149 region_id: RegionId,
150 last_id: EntryId,
151 provider: &Provider,
152 ) -> Result<()> {
153 if let Provider::Noop = provider {
154 return Ok(());
155 }
156 self.store
157 .obsolete(provider, region_id, last_id)
158 .await
159 .map_err(BoxedError::new)
160 .context(DeleteWalSnafu { region_id })
161 }
162}
163
164pub struct WalWriter<S: LogStore> {
166 store: Arc<S>,
168 entries: Vec<Entry>,
170 providers: HashMap<RegionId, Provider>,
172 encoder: WalEntryEncoder,
174}
175
176impl<S: LogStore> WalWriter<S> {
177 pub fn add_entry(
179 &mut self,
180 region_id: RegionId,
181 entry_id: EntryId,
182 wal_entry: &WalEntry,
183 provider: &Provider,
184 ) -> Result<()> {
185 let provider = self
187 .providers
188 .entry(region_id)
189 .or_insert_with(|| provider.clone());
190
191 let data = self.encoder.encode_to_vec(wal_entry);
192 let entry = self
193 .store
194 .entry(data, entry_id, region_id, provider)
195 .map_err(BoxedError::new)
196 .context(BuildEntrySnafu { region_id })?;
197
198 self.entries.push(entry);
199
200 Ok(())
201 }
202
203 pub async fn write_to_wal(&mut self) -> Result<AppendBatchResponse> {
205 let entries = mem::take(&mut self.entries);
208 self.store
209 .append_batch(entries)
210 .await
211 .map_err(BoxedError::new)
212 .context(WriteWalSnafu)
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use api::v1::helper::{tag_column_schema, time_index_column_schema};
219 use api::v1::{
220 ArrowIpc, BulkWalEntry, ColumnDataType, Mutation, OpType, Row, Rows, Value, bulk_wal_entry,
221 value,
222 };
223 use common_recordbatch::DfRecordBatch;
224 use common_test_util::flight::encode_to_flight_data;
225 use common_test_util::temp_dir::{TempDir, create_temp_dir};
226 use datatypes::arrow;
227 use datatypes::arrow::array::{ArrayRef, TimestampMillisecondArray};
228 use datatypes::arrow::datatypes::Field;
229 use datatypes::arrow_array::StringArray;
230 use futures::TryStreamExt;
231 use log_store::raft_engine::log_store::RaftEngineLogStore;
232 use log_store::test_util::log_store_util;
233 use store_api::storage::SequenceNumber;
234
235 use super::*;
236
237 struct WalEnv {
238 _wal_dir: TempDir,
239 log_store: Option<Arc<RaftEngineLogStore>>,
240 }
241
242 impl WalEnv {
243 async fn new() -> WalEnv {
244 let wal_dir = create_temp_dir("");
245 let log_store =
246 log_store_util::create_tmp_local_file_log_store(wal_dir.path().to_str().unwrap())
247 .await;
248 WalEnv {
249 _wal_dir: wal_dir,
250 log_store: Some(Arc::new(log_store)),
251 }
252 }
253
254 fn new_wal(&self) -> Wal<RaftEngineLogStore> {
255 let log_store = self.log_store.clone().unwrap();
256 Wal::new(log_store)
257 }
258 }
259
260 fn new_mutation(op_type: OpType, sequence: SequenceNumber, rows: &[(&str, i64)]) -> Mutation {
264 let rows = rows
265 .iter()
266 .map(|(str_col, int_col)| {
267 let values = vec![
268 Value {
269 value_data: Some(value::ValueData::StringValue(str_col.to_string())),
270 },
271 Value {
272 value_data: Some(value::ValueData::TimestampMillisecondValue(*int_col)),
273 },
274 ];
275 Row { values }
276 })
277 .collect();
278 let schema = vec![
279 tag_column_schema("tag", ColumnDataType::String),
280 time_index_column_schema("ts", ColumnDataType::TimestampMillisecond),
281 ];
282
283 Mutation {
284 op_type: op_type as i32,
285 sequence,
286 rows: Some(Rows { schema, rows }),
287 write_hint: None,
288 }
289 }
290
291 #[tokio::test]
292 async fn test_write_wal() {
293 let env = WalEnv::new().await;
294 let wal = env.new_wal();
295
296 let entry = WalEntry {
297 mutations: vec![
298 new_mutation(OpType::Put, 1, &[("k1", 1), ("k2", 2)]),
299 new_mutation(OpType::Put, 2, &[("k3", 3), ("k4", 4)]),
300 ],
301 bulk_entries: vec![],
302 };
303 let mut writer = wal.writer();
304 let region_id = RegionId::new(1, 1);
306 writer
307 .add_entry(
308 region_id,
309 1,
310 &entry,
311 &Provider::raft_engine_provider(region_id.as_u64()),
312 )
313 .unwrap();
314 let region_id = RegionId::new(1, 2);
316 writer
317 .add_entry(
318 region_id,
319 1,
320 &entry,
321 &Provider::raft_engine_provider(region_id.as_u64()),
322 )
323 .unwrap();
324 let region_id = RegionId::new(1, 2);
326 writer
327 .add_entry(
328 region_id,
329 2,
330 &entry,
331 &Provider::raft_engine_provider(region_id.as_u64()),
332 )
333 .unwrap();
334
335 writer.write_to_wal().await.unwrap();
337 }
338
339 fn build_record_batch(rows: &[(&str, i64)]) -> DfRecordBatch {
340 let schema = Arc::new(arrow::datatypes::Schema::new(vec![
341 Field::new("tag", arrow::datatypes::DataType::Utf8, false),
342 Field::new(
343 "ts",
344 arrow::datatypes::DataType::Timestamp(
345 arrow::datatypes::TimeUnit::Millisecond,
346 None,
347 ),
348 false,
349 ),
350 ]));
351
352 let tag = Arc::new(StringArray::from_iter_values(
353 rows.iter().map(|r| r.0.to_string()),
354 )) as ArrayRef;
355 let ts = Arc::new(TimestampMillisecondArray::from_iter_values(
356 rows.iter().map(|r| r.1),
357 )) as ArrayRef;
358 DfRecordBatch::try_new(schema, vec![tag, ts]).unwrap()
359 }
360
361 fn build_bulk_wal_entry(sequence_number: SequenceNumber, rows: &[(&str, i64)]) -> BulkWalEntry {
362 let rb = build_record_batch(rows);
363 let (schema, rb) = encode_to_flight_data(rb);
364 let max_ts = rows.iter().map(|r| r.1).max().unwrap();
365 let min_ts = rows.iter().map(|r| r.1).min().unwrap();
366 BulkWalEntry {
367 sequence: sequence_number,
368 max_ts,
369 min_ts,
370 timestamp_index: 1,
371 body: Some(bulk_wal_entry::Body::ArrowIpc(ArrowIpc {
372 schema: schema.data_header,
373 data_header: rb.data_header,
374 payload: rb.data_body,
375 })),
376 }
377 }
378
379 fn sample_entries() -> Vec<WalEntry> {
380 vec![
381 WalEntry {
382 mutations: vec![
383 new_mutation(OpType::Put, 1, &[("k1", 1), ("k2", 2)]),
384 new_mutation(OpType::Put, 2, &[("k3", 3), ("k4", 4)]),
385 ],
386 bulk_entries: vec![],
387 },
388 WalEntry {
389 mutations: vec![new_mutation(OpType::Put, 3, &[("k1", 1), ("k2", 2)])],
390 bulk_entries: vec![],
391 },
392 WalEntry {
393 mutations: vec![
394 new_mutation(OpType::Put, 4, &[("k1", 1), ("k2", 2)]),
395 new_mutation(OpType::Put, 5, &[("k3", 3), ("k4", 4)]),
396 ],
397 bulk_entries: vec![],
398 },
399 WalEntry {
400 mutations: vec![new_mutation(OpType::Put, 6, &[("k1", 1), ("k2", 2)])],
401 bulk_entries: vec![build_bulk_wal_entry(7, &[("k1", 8), ("k2", 9)])],
402 },
403 ]
404 }
405
406 fn check_entries(
407 expect: &[WalEntry],
408 expect_start_id: EntryId,
409 actual: &[(EntryId, WalEntry)],
410 ) {
411 for (idx, (expect_entry, (actual_id, actual_entry))) in
412 expect.iter().zip(actual.iter()).enumerate()
413 {
414 let expect_id_entry = (expect_start_id + idx as u64, expect_entry);
415 assert_eq!(expect_id_entry, (*actual_id, actual_entry));
416 }
417 assert_eq!(expect.len(), actual.len());
418 }
419
420 #[tokio::test]
421 async fn test_scan_wal() {
422 let env = WalEnv::new().await;
423 let wal = env.new_wal();
424
425 let entries = sample_entries();
426 let (id1, id2) = (RegionId::new(1, 1), RegionId::new(1, 2));
427 let ns1 = Provider::raft_engine_provider(id1.as_u64());
428 let ns2 = Provider::raft_engine_provider(id2.as_u64());
429 let mut writer = wal.writer();
430 writer.add_entry(id1, 1, &entries[0], &ns1).unwrap();
431 writer.add_entry(id2, 1, &entries[0], &ns2).unwrap();
433 writer.add_entry(id1, 2, &entries[1], &ns1).unwrap();
434 writer.add_entry(id1, 3, &entries[2], &ns1).unwrap();
435 writer.add_entry(id1, 4, &entries[3], &ns1).unwrap();
436
437 writer.write_to_wal().await.unwrap();
438
439 let stream = wal.scan(id1, 1, &ns1).unwrap();
441 let actual: Vec<_> = stream.try_collect().await.unwrap();
442 check_entries(&entries, 1, &actual);
443
444 let stream = wal.scan(id1, 2, &ns1).unwrap();
446 let actual: Vec<_> = stream.try_collect().await.unwrap();
447 check_entries(&entries[1..], 2, &actual);
448
449 let stream = wal.scan(id1, 5, &ns1).unwrap();
451 let actual: Vec<_> = stream.try_collect().await.unwrap();
452 assert!(actual.is_empty());
453 }
454
455 #[tokio::test]
456 async fn test_obsolete_wal() {
457 let env = WalEnv::new().await;
458 let wal = env.new_wal();
459
460 let entries = sample_entries();
461 let mut writer = wal.writer();
462 let region_id = RegionId::new(1, 1);
463 let ns = Provider::raft_engine_provider(region_id.as_u64());
464 writer.add_entry(region_id, 1, &entries[0], &ns).unwrap();
465 writer.add_entry(region_id, 2, &entries[1], &ns).unwrap();
466 writer.add_entry(region_id, 3, &entries[2], &ns).unwrap();
467
468 writer.write_to_wal().await.unwrap();
469
470 wal.obsolete(region_id, 2, &ns).await.unwrap();
472
473 let mut writer = wal.writer();
475 writer.add_entry(region_id, 4, &entries[3], &ns).unwrap();
476 writer.write_to_wal().await.unwrap();
477
478 let stream = wal.scan(region_id, 1, &ns).unwrap();
480 let actual: Vec<_> = stream.try_collect().await.unwrap();
481 check_entries(&entries[2..], 3, &actual);
482 }
483}