mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
fix: apply recovered metadata after last WAL entry (#461)
* fix: apply recovered metadata after last WAL entry * fix: condition error
This commit is contained in:
@@ -97,6 +97,7 @@ pub struct StoreConfig<S> {
|
|||||||
pub flush_strategy: FlushStrategyRef,
|
pub flush_strategy: FlushStrategyRef,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type RecoverdMetadata = (SequenceNumber, (ManifestVersion, RawRegionMetadata));
|
||||||
pub type RecoveredMetadataMap = BTreeMap<SequenceNumber, (ManifestVersion, RawRegionMetadata)>;
|
pub type RecoveredMetadataMap = BTreeMap<SequenceNumber, (ManifestVersion, RawRegionMetadata)>;
|
||||||
|
|
||||||
impl<S: LogStore> RegionImpl<S> {
|
impl<S: LogStore> RegionImpl<S> {
|
||||||
@@ -345,6 +346,10 @@ impl<S: LogStore> RegionImpl<S> {
|
|||||||
self.inner.version_control().committed_sequence()
|
self.inner.version_control().committed_sequence()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn current_manifest_version(&self) -> ManifestVersion {
|
||||||
|
self.inner.version_control().current_manifest_version()
|
||||||
|
}
|
||||||
|
|
||||||
async fn wait_flush_done(&self) -> Result<()> {
|
async fn wait_flush_done(&self) -> Result<()> {
|
||||||
self.inner.writer.wait_flush_done().await
|
self.inner.writer.wait_flush_done().await
|
||||||
}
|
}
|
||||||
@@ -353,6 +358,22 @@ impl<S: LogStore> RegionImpl<S> {
|
|||||||
async fn write_inner(&self, ctx: &WriteContext, request: WriteBatch) -> Result<WriteResponse> {
|
async fn write_inner(&self, ctx: &WriteContext, request: WriteBatch) -> Result<WriteResponse> {
|
||||||
self.inner.write(ctx, request).await
|
self.inner.write(ctx, request).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replay metadata to inner.
|
||||||
|
async fn replay_inner(&self, recovered_metadata: RecoveredMetadataMap) -> Result<()> {
|
||||||
|
let inner = &self.inner;
|
||||||
|
let writer_ctx = WriterContext {
|
||||||
|
shared: &inner.shared,
|
||||||
|
flush_strategy: &inner.flush_strategy,
|
||||||
|
flush_scheduler: &inner.flush_scheduler,
|
||||||
|
sst_layer: &inner.sst_layer,
|
||||||
|
wal: &inner.wal,
|
||||||
|
writer: &inner.writer,
|
||||||
|
manifest: &inner.manifest,
|
||||||
|
};
|
||||||
|
|
||||||
|
inner.writer.replay(recovered_metadata, writer_ctx).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shared data of region.
|
/// Shared data of region.
|
||||||
|
|||||||
@@ -79,6 +79,10 @@ impl<S: LogStore> TesterBase<S> {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn replay_inner(&self, recovered_metadata: RecoveredMetadataMap) {
|
||||||
|
self.region.replay_inner(recovered_metadata).await.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
/// Scan all data.
|
/// Scan all data.
|
||||||
pub async fn full_scan(&self) -> Vec<(i64, Option<i64>)> {
|
pub async fn full_scan(&self) -> Vec<(i64, Option<i64>)> {
|
||||||
logging::info!("Full scan with ctx {:?}", self.read_ctx);
|
logging::info!("Full scan with ctx {:?}", self.read_ctx);
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_time::Timestamp;
|
use common_time::Timestamp;
|
||||||
@@ -16,8 +17,10 @@ use tempdir::TempDir;
|
|||||||
use crate::region::tests::{self, FileTesterBase};
|
use crate::region::tests::{self, FileTesterBase};
|
||||||
use crate::region::OpenOptions;
|
use crate::region::OpenOptions;
|
||||||
use crate::region::RegionImpl;
|
use crate::region::RegionImpl;
|
||||||
|
use crate::region::{RawRegionMetadata, RegionMetadata};
|
||||||
use crate::test_util;
|
use crate::test_util;
|
||||||
use crate::test_util::config_util;
|
use crate::test_util::config_util;
|
||||||
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
use crate::write_batch::PutData;
|
use crate::write_batch::PutData;
|
||||||
|
|
||||||
const REGION_NAME: &str = "region-alter-0";
|
const REGION_NAME: &str = "region-alter-0";
|
||||||
@@ -235,6 +238,7 @@ fn drop_column_req(names: &[&str]) -> AlterRequest {
|
|||||||
|
|
||||||
fn check_schema_names(schema: &SchemaRef, names: &[&str]) {
|
fn check_schema_names(schema: &SchemaRef, names: &[&str]) {
|
||||||
assert_eq!(names.len(), schema.num_columns());
|
assert_eq!(names.len(), schema.num_columns());
|
||||||
|
|
||||||
for (idx, name) in names.iter().enumerate() {
|
for (idx, name) in names.iter().enumerate() {
|
||||||
assert_eq!(*name, schema.column_name_by_index(idx));
|
assert_eq!(*name, schema.column_name_by_index(idx));
|
||||||
assert!(schema.column_schema_by_name(name).is_some());
|
assert!(schema.column_schema_by_name(name).is_some());
|
||||||
@@ -391,3 +395,34 @@ async fn test_put_old_schema_after_alter() {
|
|||||||
let scanned = tester.full_scan().await;
|
let scanned = tester.full_scan().await;
|
||||||
assert_eq!(expect, scanned);
|
assert_eq!(expect, scanned);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_replay_metadata_after_open() {
|
||||||
|
let dir = TempDir::new("replay-metadata-after-open").unwrap();
|
||||||
|
let store_dir = dir.path().to_str().unwrap();
|
||||||
|
let mut tester = AlterTester::new(store_dir).await;
|
||||||
|
|
||||||
|
let data = vec![(1000, Some(100)), (1001, Some(101)), (1002, Some(102))];
|
||||||
|
|
||||||
|
tester.put_with_init_schema(&data).await;
|
||||||
|
|
||||||
|
tester.reopen().await;
|
||||||
|
|
||||||
|
let committed_sequence = tester.base().committed_sequence();
|
||||||
|
let manifest_version = tester.base().region.current_manifest_version();
|
||||||
|
let version = tester.version();
|
||||||
|
|
||||||
|
let mut recovered_metadata = BTreeMap::new();
|
||||||
|
|
||||||
|
let desc = RegionDescBuilder::new(REGION_NAME)
|
||||||
|
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
||||||
|
.push_value_column(("v0", LogicalTypeId::Float32, true))
|
||||||
|
.build();
|
||||||
|
let metadata: &RegionMetadata = &desc.try_into().unwrap();
|
||||||
|
let mut raw_metadata: RawRegionMetadata = metadata.into();
|
||||||
|
raw_metadata.version = version + 1;
|
||||||
|
recovered_metadata.insert(committed_sequence, (manifest_version + 1, raw_metadata));
|
||||||
|
tester.base().replay_inner(recovered_metadata).await;
|
||||||
|
let schema = tester.schema();
|
||||||
|
check_schema_names(&schema, &["k1", "timestamp", "v0"]);
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ use futures::TryStreamExt;
|
|||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
use store_api::logstore::LogStore;
|
use store_api::logstore::LogStore;
|
||||||
use store_api::manifest::{Manifest, ManifestVersion, MetaAction};
|
use store_api::manifest::{Manifest, ManifestVersion, MetaAction};
|
||||||
|
use store_api::storage::SequenceNumber;
|
||||||
use store_api::storage::{AlterRequest, WriteContext, WriteResponse};
|
use store_api::storage::{AlterRequest, WriteContext, WriteResponse};
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
@@ -17,9 +18,10 @@ use crate::manifest::action::{
|
|||||||
use crate::memtable::{Inserter, MemtableBuilderRef, MemtableId, MemtableRef};
|
use crate::memtable::{Inserter, MemtableBuilderRef, MemtableId, MemtableRef};
|
||||||
use crate::metadata::RegionMetadataRef;
|
use crate::metadata::RegionMetadataRef;
|
||||||
use crate::proto::wal::WalHeader;
|
use crate::proto::wal::WalHeader;
|
||||||
use crate::region::{RecoveredMetadataMap, RegionManifest, SharedDataRef};
|
use crate::region::{RecoverdMetadata, RecoveredMetadataMap, RegionManifest, SharedDataRef};
|
||||||
use crate::schema::compat::CompatWrite;
|
use crate::schema::compat::CompatWrite;
|
||||||
use crate::sst::AccessLayerRef;
|
use crate::sst::AccessLayerRef;
|
||||||
|
use crate::version::VersionControl;
|
||||||
use crate::version::{VersionControlRef, VersionEdit};
|
use crate::version::{VersionControlRef, VersionEdit};
|
||||||
use crate::wal::{Payload, Wal};
|
use crate::wal::{Payload, Wal};
|
||||||
use crate::write_batch::WriteBatch;
|
use crate::write_batch::WriteBatch;
|
||||||
@@ -341,28 +343,14 @@ impl WriterInner {
|
|||||||
// There might be multiple metadata changes to be applied, so a loop is necessary.
|
// There might be multiple metadata changes to be applied, so a loop is necessary.
|
||||||
if req_sequence > sequence_before_alter {
|
if req_sequence > sequence_before_alter {
|
||||||
// This is the first request that use the new metadata.
|
// This is the first request that use the new metadata.
|
||||||
// It's safe to unwrap here. It's checked above. Move out metadata to avoid cloning it.
|
self.apply_metadata(
|
||||||
let (_, (manifest_version, metadata)) = next_apply_metadata.take().unwrap();
|
&writer_ctx,
|
||||||
let region_metadata: RegionMetadataRef = Arc::new(
|
|
||||||
metadata.try_into().context(error::InvalidRawRegionSnafu {
|
|
||||||
region: &writer_ctx.shared.name,
|
|
||||||
})?,
|
|
||||||
);
|
|
||||||
let new_mutable = self
|
|
||||||
.memtable_builder
|
|
||||||
.build(region_metadata.schema().clone());
|
|
||||||
version_control.freeze_mutable_and_apply_metadata(
|
|
||||||
region_metadata,
|
|
||||||
manifest_version,
|
|
||||||
new_mutable,
|
|
||||||
);
|
|
||||||
num_recovered_metadata += 1;
|
|
||||||
logging::debug!(
|
|
||||||
"Applied metadata to region: {} when replaying WAL: sequence={} manifest={} ",
|
|
||||||
writer_ctx.shared.name,
|
|
||||||
sequence_before_alter,
|
sequence_before_alter,
|
||||||
manifest_version
|
next_apply_metadata,
|
||||||
);
|
version_control,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
num_recovered_metadata += 1;
|
||||||
next_apply_metadata = recovered_metadata.pop_first();
|
next_apply_metadata = recovered_metadata.pop_first();
|
||||||
} else {
|
} else {
|
||||||
// Keep the next_apply_metadata until req_sequence > sequence_before_alter
|
// Keep the next_apply_metadata until req_sequence > sequence_before_alter
|
||||||
@@ -380,7 +368,7 @@ impl WriterInner {
|
|||||||
} else {
|
} else {
|
||||||
logging::error!(
|
logging::error!(
|
||||||
"Sequence should not decrease during replay, found {} <= {}, \
|
"Sequence should not decrease during replay, found {} <= {}, \
|
||||||
region_id: {}, region_name: {}, flushed_sequence: {}, num_requests: {}",
|
region_id: {}, region_name: {}, flushed_sequence: {}, num_requests: {}",
|
||||||
req_sequence,
|
req_sequence,
|
||||||
last_sequence,
|
last_sequence,
|
||||||
writer_ctx.shared.id,
|
writer_ctx.shared.id,
|
||||||
@@ -402,6 +390,29 @@ impl WriterInner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply metadata after last WAL entry
|
||||||
|
while let Some((sequence_before_alter, _)) = next_apply_metadata {
|
||||||
|
assert!(
|
||||||
|
sequence_before_alter >= last_sequence,
|
||||||
|
"The sequence in metadata after last WAL entry is less than last sequence, \
|
||||||
|
metadata sequence: {}, last_sequence: {}, region_id: {}, region_name: {}",
|
||||||
|
sequence_before_alter,
|
||||||
|
last_sequence,
|
||||||
|
writer_ctx.shared.id,
|
||||||
|
writer_ctx.shared.name
|
||||||
|
);
|
||||||
|
|
||||||
|
self.apply_metadata(
|
||||||
|
&writer_ctx,
|
||||||
|
sequence_before_alter,
|
||||||
|
next_apply_metadata,
|
||||||
|
version_control,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
num_recovered_metadata += 1;
|
||||||
|
next_apply_metadata = recovered_metadata.pop_first();
|
||||||
|
}
|
||||||
|
|
||||||
version_control.set_committed_sequence(last_sequence);
|
version_control.set_committed_sequence(last_sequence);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -418,6 +429,39 @@ impl WriterInner {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn apply_metadata<S: LogStore>(
|
||||||
|
&self,
|
||||||
|
writer_ctx: &WriterContext<'_, S>,
|
||||||
|
sequence: SequenceNumber,
|
||||||
|
mut metadata: Option<RecoverdMetadata>,
|
||||||
|
version_control: &VersionControl,
|
||||||
|
) -> Result<()> {
|
||||||
|
// It's safe to unwrap here, it's checked outside.
|
||||||
|
// Move out metadata to avoid cloning it.
|
||||||
|
|
||||||
|
let (_, (manifest_version, metadata)) = metadata.take().unwrap();
|
||||||
|
let region_metadata: RegionMetadataRef =
|
||||||
|
Arc::new(metadata.try_into().context(error::InvalidRawRegionSnafu {
|
||||||
|
region: &writer_ctx.shared.name,
|
||||||
|
})?);
|
||||||
|
let new_mutable = self
|
||||||
|
.memtable_builder
|
||||||
|
.build(region_metadata.schema().clone());
|
||||||
|
version_control.freeze_mutable_and_apply_metadata(
|
||||||
|
region_metadata,
|
||||||
|
manifest_version,
|
||||||
|
new_mutable,
|
||||||
|
);
|
||||||
|
logging::debug!(
|
||||||
|
"Applied metadata to region: {} when replaying WAL: sequence={} manifest={} ",
|
||||||
|
writer_ctx.shared.name,
|
||||||
|
sequence,
|
||||||
|
manifest_version
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Preprocess before write.
|
/// Preprocess before write.
|
||||||
///
|
///
|
||||||
/// Creates needed mutable memtables, ensures there is enough capacity in memtable and trigger
|
/// Creates needed mutable memtables, ensures there is enough capacity in memtable and trigger
|
||||||
|
|||||||
Reference in New Issue
Block a user