mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-20 23:10:37 +00:00
feat: implement the LogStoreRawEntryReader and RawEntryReaderFilter (#4030)
* feat: implement the `LogStoreRawEntryReader` * feat: implement the `RawEntryReaderFilter` * test: add tests
This commit is contained in:
@@ -250,6 +250,14 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read WAL, topic: {}", topic))]
|
||||
ReadKafkaWal {
|
||||
topic: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode WAL entry, region_id: {}", region_id))]
|
||||
DecodeWal {
|
||||
region_id: RegionId,
|
||||
@@ -742,6 +750,7 @@ impl ErrorExt for Error {
|
||||
| ReadParquet { .. }
|
||||
| WriteWal { .. }
|
||||
| ReadWal { .. }
|
||||
| ReadKafkaWal { .. }
|
||||
| DeleteWal { .. } => StatusCode::StorageUnavailable,
|
||||
CompressObject { .. }
|
||||
| DecompressObject { .. }
|
||||
|
||||
@@ -12,11 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use futures::stream::BoxStream;
|
||||
use store_api::logstore::entry::RawEntry;
|
||||
use store_api::storage::RegionId;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::error::Result;
|
||||
use async_stream::try_stream;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions};
|
||||
use futures::stream::BoxStream;
|
||||
use futures::TryStreamExt;
|
||||
use snafu::ResultExt;
|
||||
use store_api::logstore::entry::{Entry, RawEntry};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::wal::EntryId;
|
||||
|
||||
/// A stream that yields [RawEntry].
|
||||
@@ -32,6 +41,12 @@ pub struct RaftEngineNamespace {
|
||||
region_id: RegionId,
|
||||
}
|
||||
|
||||
impl RaftEngineNamespace {
|
||||
pub fn new(region_id: RegionId) -> Self {
|
||||
Self { region_id }
|
||||
}
|
||||
}
|
||||
|
||||
/// The namespace of [RawEntryReader].
|
||||
pub(crate) enum LogStoreNamespace<'a> {
|
||||
RaftEngine(RaftEngineNamespace),
|
||||
@@ -40,5 +55,298 @@ pub(crate) enum LogStoreNamespace<'a> {
|
||||
|
||||
/// [RawEntryReader] provides the ability to read [RawEntry] from the underlying [LogStore].
|
||||
pub(crate) trait RawEntryReader: Send + Sync {
|
||||
fn read(&self, ctx: LogStoreNamespace, start_id: EntryId) -> Result<RawEntryStream<'static>>;
|
||||
fn read<'a>(
|
||||
&'a self,
|
||||
ctx: LogStoreNamespace<'a>,
|
||||
start_id: EntryId,
|
||||
) -> Result<RawEntryStream<'a>>;
|
||||
}
|
||||
|
||||
/// Implement the [RawEntryReader] for the [LogStore].
|
||||
pub struct LogStoreRawEntryReader<S> {
|
||||
store: Arc<S>,
|
||||
}
|
||||
|
||||
impl<S: LogStore> LogStoreRawEntryReader<S> {
|
||||
pub fn new(store: Arc<S>) -> Self {
|
||||
Self { store }
|
||||
}
|
||||
|
||||
fn read_region(&self, ns: RaftEngineNamespace, start_id: EntryId) -> Result<RawEntryStream> {
|
||||
let region_id = ns.region_id;
|
||||
let stream = try_stream!({
|
||||
// TODO(weny): refactor the `namespace` method.
|
||||
let namespace = self.store.namespace(region_id.into(), &Default::default());
|
||||
let mut stream = self
|
||||
.store
|
||||
.read(&namespace, start_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ReadWalSnafu { region_id })?;
|
||||
|
||||
while let Some(entries) = stream.next().await {
|
||||
let entries = entries
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ReadWalSnafu { region_id })?;
|
||||
|
||||
for entry in entries {
|
||||
yield entry.into_raw_entry()
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
fn read_topic<'a>(
|
||||
&'a self,
|
||||
ns: KafkaNamespace<'a>,
|
||||
start_id: EntryId,
|
||||
) -> Result<RawEntryStream> {
|
||||
let topic = ns.topic;
|
||||
let stream = try_stream!({
|
||||
// TODO(weny): refactor the `namespace` method.
|
||||
let namespace = self.store.namespace(
|
||||
RegionId::from_u64(0).into(),
|
||||
&WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: topic.to_string(),
|
||||
}),
|
||||
);
|
||||
|
||||
let mut stream = self
|
||||
.store
|
||||
.read(&namespace, start_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ReadKafkaWalSnafu { topic })?;
|
||||
|
||||
while let Some(entries) = stream.next().await {
|
||||
let entries = entries
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ReadKafkaWalSnafu { topic })?;
|
||||
|
||||
for entry in entries {
|
||||
yield entry.into_raw_entry()
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: LogStore> RawEntryReader for LogStoreRawEntryReader<S> {
|
||||
fn read<'a>(
|
||||
&'a self,
|
||||
ctx: LogStoreNamespace<'a>,
|
||||
start_id: EntryId,
|
||||
) -> Result<RawEntryStream<'a>> {
|
||||
let stream = match ctx {
|
||||
LogStoreNamespace::RaftEngine(ns) => self.read_region(ns, start_id)?,
|
||||
LogStoreNamespace::Kafka(ns) => self.read_topic(ns, start_id)?,
|
||||
};
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
/// A filter implement the [RawEntryReader]
|
||||
pub struct RawEntryReaderFilter<R, F> {
|
||||
reader: R,
|
||||
filter: F,
|
||||
}
|
||||
|
||||
impl<R, F> RawEntryReaderFilter<R, F>
|
||||
where
|
||||
R: RawEntryReader,
|
||||
F: Fn(&RawEntry) -> bool + Sync + Send,
|
||||
{
|
||||
pub fn new(reader: R, filter: F) -> Self {
|
||||
Self { reader, filter }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R, F> RawEntryReader for RawEntryReaderFilter<R, F>
|
||||
where
|
||||
R: RawEntryReader,
|
||||
F: Fn(&RawEntry) -> bool + Sync + Send,
|
||||
{
|
||||
fn read<'a>(
|
||||
&'a self,
|
||||
ctx: LogStoreNamespace<'a>,
|
||||
start_id: EntryId,
|
||||
) -> Result<RawEntryStream<'a>> {
|
||||
let mut stream = self.reader.read(ctx, start_id)?;
|
||||
let filter = &(self.filter);
|
||||
let stream = try_stream!({
|
||||
while let Some(entry) = stream.next().await {
|
||||
let entry = entry?;
|
||||
if filter(&entry) {
|
||||
yield entry
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_wal::options::WalOptions;
|
||||
use futures::stream;
|
||||
use store_api::logstore::entry::{Entry, RawEntry};
|
||||
use store_api::logstore::entry_stream::SendableEntryStream;
|
||||
use store_api::logstore::namespace::Namespace;
|
||||
use store_api::logstore::{
|
||||
AppendBatchResponse, AppendResponse, EntryId, LogStore, NamespaceId,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::error;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockLogStore {
|
||||
entries: Vec<RawEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone, Copy, Default, Hash)]
|
||||
struct MockNamespace;
|
||||
|
||||
impl Namespace for MockNamespace {
|
||||
fn id(&self) -> NamespaceId {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl LogStore for MockLogStore {
|
||||
type Entry = RawEntry;
|
||||
type Error = error::Error;
|
||||
type Namespace = MockNamespace;
|
||||
|
||||
async fn stop(&self) -> Result<(), Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn append(&self, entry: Self::Entry) -> Result<AppendResponse, Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn append_batch(
|
||||
&self,
|
||||
entries: Vec<Self::Entry>,
|
||||
) -> Result<AppendBatchResponse, Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn read(
|
||||
&self,
|
||||
ns: &Self::Namespace,
|
||||
id: EntryId,
|
||||
) -> Result<SendableEntryStream<Self::Entry, Self::Error>, Self::Error> {
|
||||
Ok(Box::pin(stream::iter(vec![Ok(self.entries.clone())])))
|
||||
}
|
||||
|
||||
async fn create_namespace(&self, ns: &Self::Namespace) -> Result<(), Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn delete_namespace(&self, ns: &Self::Namespace) -> Result<(), Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn list_namespaces(&self) -> Result<Vec<Self::Namespace>, Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn obsolete(
|
||||
&self,
|
||||
ns: Self::Namespace,
|
||||
entry_id: EntryId,
|
||||
) -> Result<(), Self::Error> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
fn entry(&self, data: &mut Vec<u8>, entry_id: EntryId, ns: Self::Namespace) -> Self::Entry {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
fn namespace(&self, _ns_id: NamespaceId, _wal_options: &WalOptions) -> Self::Namespace {
|
||||
MockNamespace
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_raw_entry_reader() {
|
||||
let expected_entries = vec![RawEntry {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
entry_id: 1,
|
||||
data: vec![],
|
||||
}];
|
||||
let store = MockLogStore {
|
||||
entries: expected_entries.clone(),
|
||||
};
|
||||
|
||||
let reader = LogStoreRawEntryReader::new(Arc::new(store));
|
||||
let entries = reader
|
||||
.read(
|
||||
LogStoreNamespace::RaftEngine(RaftEngineNamespace::new(RegionId::new(1024, 1))),
|
||||
0,
|
||||
)
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected_entries, entries);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_raw_entry_reader_filter() {
|
||||
let all_entries = vec![
|
||||
RawEntry {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
entry_id: 1,
|
||||
data: vec![1],
|
||||
},
|
||||
RawEntry {
|
||||
region_id: RegionId::new(1024, 2),
|
||||
entry_id: 2,
|
||||
data: vec![2],
|
||||
},
|
||||
RawEntry {
|
||||
region_id: RegionId::new(1024, 3),
|
||||
entry_id: 3,
|
||||
data: vec![3],
|
||||
},
|
||||
];
|
||||
let store = MockLogStore {
|
||||
entries: all_entries.clone(),
|
||||
};
|
||||
|
||||
let expected_region_id = RegionId::new(1024, 3);
|
||||
let reader =
|
||||
RawEntryReaderFilter::new(LogStoreRawEntryReader::new(Arc::new(store)), |entry| {
|
||||
entry.region_id == expected_region_id
|
||||
});
|
||||
let entries = reader
|
||||
.read(
|
||||
LogStoreNamespace::RaftEngine(RaftEngineNamespace::new(RegionId::new(1024, 1))),
|
||||
0,
|
||||
)
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
all_entries
|
||||
.into_iter()
|
||||
.filter(|entry| entry.region_id == expected_region_id)
|
||||
.collect::<Vec<_>>(),
|
||||
entries
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,12 +19,35 @@ use crate::storage::RegionId;
|
||||
pub type Id = u64;
|
||||
|
||||
/// The raw Wal entry.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RawEntry {
|
||||
pub region_id: RegionId,
|
||||
pub entry_id: Id,
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Entry for RawEntry {
|
||||
fn into_raw_entry(self) -> RawEntry {
|
||||
self
|
||||
}
|
||||
|
||||
fn data(&self) -> &[u8] {
|
||||
&self.data
|
||||
}
|
||||
|
||||
fn id(&self) -> Id {
|
||||
self.entry_id
|
||||
}
|
||||
|
||||
fn region_id(&self) -> RegionId {
|
||||
self.region_id
|
||||
}
|
||||
|
||||
fn estimated_size(&self) -> usize {
|
||||
std::mem::size_of_val(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Entry is the minimal data storage unit through which users interact with the log store.
|
||||
/// The log store implementation may have larger or smaller data storage unit than an entry.
|
||||
pub trait Entry: Send + Sync {
|
||||
|
||||
Reference in New Issue
Block a user