feat(bloom-filter): integrate indexer with mito2 (#5236)

* feat(bloom-filter): integrate indexer with mito2

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* rename skippingindextype

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-12-25 22:30:07 +08:00
committed by GitHub
parent 039989f77b
commit a9f21915ef
22 changed files with 1032 additions and 254 deletions

View File

@@ -29,7 +29,7 @@ use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, R
use crate::prelude::ConcreteDataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,

View File

@@ -543,7 +543,7 @@ pub struct SkippingIndexOptions {
pub granularity: u32,
/// The type of the skip index.
#[serde(default)]
pub index_type: SkipIndexType,
pub index_type: SkippingIndexType,
}
impl fmt::Display for SkippingIndexOptions {
@@ -556,15 +556,15 @@ impl fmt::Display for SkippingIndexOptions {
/// Skip index types.
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
pub enum SkipIndexType {
pub enum SkippingIndexType {
#[default]
BloomFilter,
}
impl fmt::Display for SkipIndexType {
impl fmt::Display for SkippingIndexType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SkipIndexType::BloomFilter => write!(f, "BLOOM"),
SkippingIndexType::BloomFilter => write!(f, "BLOOM"),
}
}
}
@@ -587,7 +587,7 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
// Parse index type with default value BloomFilter
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
Some(typ) => match typ.to_ascii_uppercase().as_str() {
"BLOOM" => SkipIndexType::BloomFilter,
"BLOOM" => SkippingIndexType::BloomFilter,
_ => {
return error::InvalidSkippingIndexOptionSnafu {
msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
@@ -595,7 +595,7 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
.fail();
}
},
None => SkipIndexType::default(),
None => SkippingIndexType::default(),
};
Ok(SkippingIndexOptions {

View File

@@ -73,7 +73,7 @@ impl BloomFilterCreator {
/// `rows_per_segment` <= 0
pub fn new(
rows_per_segment: usize,
intermediate_provider: Box<dyn ExternalTempFileProvider>,
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
global_memory_usage: Arc<AtomicUsize>,
global_memory_usage_threshold: Option<usize>,
) -> Self {
@@ -252,7 +252,7 @@ mod tests {
let mut writer = Cursor::new(Vec::new());
let mut creator = BloomFilterCreator::new(
2,
Box::new(MockExternalTempFileProvider::new()),
Arc::new(MockExternalTempFileProvider::new()),
Arc::new(AtomicUsize::new(0)),
None,
);
@@ -322,7 +322,7 @@ mod tests {
let mut writer = Cursor::new(Vec::new());
let mut creator = BloomFilterCreator::new(
2,
Box::new(MockExternalTempFileProvider::new()),
Arc::new(MockExternalTempFileProvider::new()),
Arc::new(AtomicUsize::new(0)),
None,
);

View File

@@ -43,7 +43,7 @@ pub struct FinalizedBloomFilterStorage {
intermediate_prefix: String,
/// The provider for intermediate Bloom filter files.
intermediate_provider: Box<dyn ExternalTempFileProvider>,
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
/// The memory usage of the in-memory Bloom filters.
memory_usage: usize,
@@ -59,7 +59,7 @@ pub struct FinalizedBloomFilterStorage {
impl FinalizedBloomFilterStorage {
/// Creates a new `FinalizedBloomFilterStorage`.
pub fn new(
intermediate_provider: Box<dyn ExternalTempFileProvider>,
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
global_memory_usage: Arc<AtomicUsize>,
global_memory_usage_threshold: Option<usize>,
) -> Self {
@@ -132,7 +132,7 @@ impl FinalizedBloomFilterStorage {
/// Drains the storage and returns a stream of finalized Bloom filter segments.
pub async fn drain(
&mut self,
) -> Result<Pin<Box<dyn Stream<Item = Result<FinalizedBloomFilterSegment>> + '_>>> {
) -> Result<Pin<Box<dyn Stream<Item = Result<FinalizedBloomFilterSegment>> + Send + '_>>> {
// FAST PATH: memory only
if self.intermediate_file_id_counter == 0 {
return Ok(Box::pin(stream::iter(self.in_memory.drain(..).map(Ok))));
@@ -257,7 +257,7 @@ mod tests {
let global_memory_usage = Arc::new(AtomicUsize::new(0));
let global_memory_usage_threshold = Some(1024 * 1024); // 1MB
let provider = Box::new(mock_provider);
let provider = Arc::new(mock_provider);
let mut storage = FinalizedBloomFilterStorage::new(
provider,
global_memory_usage.clone(),

View File

@@ -190,7 +190,7 @@ mod tests {
let mut writer = Cursor::new(vec![]);
let mut creator = BloomFilterCreator::new(
2,
Box::new(MockExternalTempFileProvider::new()),
Arc::new(MockExternalTempFileProvider::new()),
Arc::new(AtomicUsize::new(0)),
None,
);

View File

@@ -21,7 +21,6 @@ use common_telemetry::{info, warn};
use common_time::TimeToLive;
use object_store::manager::ObjectStoreManagerRef;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId;
@@ -41,7 +40,7 @@ use crate::region::options::RegionOptions;
use crate::region::version::VersionRef;
use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
use crate::schedule::scheduler::LocalScheduler;
use crate::sst::file::{FileMeta, IndexType};
use crate::sst::file::FileMeta;
use crate::sst::file_purger::LocalFilePurger;
use crate::sst::index::intermediate::IntermediateManager;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
@@ -336,16 +335,7 @@ impl Compactor for DefaultCompactor {
time_range: sst_info.time_range,
level: output.output_level,
file_size: sst_info.file_size,
available_indexes: {
let mut indexes = SmallVec::new();
if sst_info.index_metadata.inverted_index.is_available() {
indexes.push(IndexType::InvertedIndex);
}
if sst_info.index_metadata.fulltext_index.is_available() {
indexes.push(IndexType::FulltextIndex);
}
indexes
},
available_indexes: sst_info.index_metadata.build_available_indexes(),
index_file_size: sst_info.index_metadata.file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,

View File

@@ -816,8 +816,8 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to retrieve fulltext options from column metadata"))]
FulltextOptions {
#[snafu(display("Failed to retrieve index options from column metadata"))]
IndexOptions {
#[snafu(implicit)]
location: Location,
source: datatypes::error::Error,
@@ -904,6 +904,20 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to push value to bloom filter"))]
PushBloomFilterValue {
source: index::bloom_filter::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to finish bloom filter"))]
BloomFilterFinish {
source: index::bloom_filter::error::Error,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -1029,7 +1043,7 @@ impl ErrorExt for Error {
UnsupportedOperation { .. } => StatusCode::Unsupported,
RemoteCompaction { .. } => StatusCode::Unexpected,
FulltextOptions { source, .. } => source.status_code(),
IndexOptions { source, .. } => source.status_code(),
CreateFulltextCreator { source, .. } => source.status_code(),
CastVector { source, .. } => source.status_code(),
FulltextPushText { source, .. }
@@ -1039,7 +1053,12 @@ impl ErrorExt for Error {
RegionBusy { .. } => StatusCode::RegionBusy,
GetSchemaMetadata { source, .. } => source.status_code(),
Timeout { .. } => StatusCode::Cancelled,
DecodeArrowRowGroup { .. } => StatusCode::Internal,
PushBloomFilterValue { source, .. } | BloomFilterFinish { source, .. } => {
source.status_code()
}
}
}

View File

@@ -19,7 +19,6 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use common_telemetry::{debug, error, info, trace};
use smallvec::SmallVec;
use snafu::ResultExt;
use store_api::storage::RegionId;
use strum::IntoStaticStr;
@@ -45,7 +44,7 @@ use crate::request::{
SenderWriteRequest, WorkerRequest,
};
use crate::schedule::scheduler::{Job, SchedulerRef};
use crate::sst::file::{FileId, FileMeta, IndexType};
use crate::sst::file::{FileId, FileMeta};
use crate::sst::parquet::WriteOptions;
use crate::worker::WorkerListener;
@@ -378,16 +377,7 @@ impl RegionFlushTask {
time_range: sst_info.time_range,
level: 0,
file_size: sst_info.file_size,
available_indexes: {
let mut indexes = SmallVec::new();
if sst_info.index_metadata.inverted_index.is_available() {
indexes.push(IndexType::InvertedIndex);
}
if sst_info.index_metadata.fulltext_index.is_available() {
indexes.push(IndexType::FulltextIndex);
}
indexes
},
available_indexes: sst_info.index_metadata.build_available_indexes(),
index_file_size: sst_info.index_metadata.file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,

View File

@@ -143,6 +143,8 @@ pub enum IndexType {
InvertedIndex,
/// Full-text index.
FulltextIndex,
/// Bloom filter.
BloomFilter,
}
impl FileMeta {
@@ -156,6 +158,11 @@ impl FileMeta {
self.available_indexes.contains(&IndexType::FulltextIndex)
}
/// Returns true if the file has a bloom filter
pub fn bloom_filter_available(&self) -> bool {
self.available_indexes.contains(&IndexType::BloomFilter)
}
/// Returns the size of the inverted index file
pub fn inverted_index_size(&self) -> Option<u64> {
if self.available_indexes.len() == 1 && self.inverted_index_available() {

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod bloom_filter;
mod codec;
pub(crate) mod fulltext_index;
mod indexer;
pub(crate) mod intermediate;
@@ -22,8 +24,10 @@ pub(crate) mod store;
use std::num::NonZeroUsize;
use bloom_filter::creator::BloomFilterIndexer;
use common_telemetry::{debug, warn};
use puffin_manager::SstPuffinManager;
use smallvec::SmallVec;
use statistics::{ByteCount, RowCount};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::{ColumnId, RegionId};
@@ -33,13 +37,14 @@ use crate::config::{FulltextIndexConfig, InvertedIndexConfig};
use crate::metrics::INDEX_CREATE_MEMORY_USAGE;
use crate::read::Batch;
use crate::region::options::IndexOptions;
use crate::sst::file::FileId;
use crate::sst::file::{FileId, IndexType};
use crate::sst::index::fulltext_index::creator::FulltextIndexer;
use crate::sst::index::intermediate::IntermediateManager;
use crate::sst::index::inverted_index::creator::InvertedIndexer;
pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
pub(crate) const TYPE_BLOOM_FILTER: &str = "bloom_filter";
/// Output of the index creation.
#[derive(Debug, Clone, Default)]
@@ -50,6 +55,24 @@ pub struct IndexOutput {
pub inverted_index: InvertedIndexOutput,
/// Fulltext index output.
pub fulltext_index: FulltextIndexOutput,
/// Bloom filter output.
pub bloom_filter: BloomFilterOutput,
}
impl IndexOutput {
pub fn build_available_indexes(&self) -> SmallVec<[IndexType; 4]> {
let mut indexes = SmallVec::new();
if self.inverted_index.is_available() {
indexes.push(IndexType::InvertedIndex);
}
if self.fulltext_index.is_available() {
indexes.push(IndexType::FulltextIndex);
}
if self.bloom_filter.is_available() {
indexes.push(IndexType::BloomFilter);
}
indexes
}
}
/// Base output of the index creation.
@@ -73,6 +96,8 @@ impl IndexBaseOutput {
pub type InvertedIndexOutput = IndexBaseOutput;
/// Output of the fulltext index creation.
pub type FulltextIndexOutput = IndexBaseOutput;
/// Output of the bloom filter creation.
pub type BloomFilterOutput = IndexBaseOutput;
/// The index creator that hides the error handling details.
#[derive(Default)]
@@ -86,6 +111,8 @@ pub struct Indexer {
last_mem_inverted_index: usize,
fulltext_indexer: Option<FulltextIndexer>,
last_mem_fulltext_index: usize,
bloom_filter_indexer: Option<BloomFilterIndexer>,
last_mem_bloom_filter: usize,
}
impl Indexer {
@@ -129,6 +156,15 @@ impl Indexer {
.with_label_values(&[TYPE_FULLTEXT_INDEX])
.add(fulltext_mem as i64 - self.last_mem_fulltext_index as i64);
self.last_mem_fulltext_index = fulltext_mem;
let bloom_filter_mem = self
.bloom_filter_indexer
.as_ref()
.map_or(0, |creator| creator.memory_usage());
INDEX_CREATE_MEMORY_USAGE
.with_label_values(&[TYPE_BLOOM_FILTER])
.add(bloom_filter_mem as i64 - self.last_mem_bloom_filter as i64);
self.last_mem_bloom_filter = bloom_filter_mem;
}
}
@@ -158,7 +194,11 @@ impl<'a> IndexerBuilder<'a> {
indexer.inverted_indexer = self.build_inverted_indexer();
indexer.fulltext_indexer = self.build_fulltext_indexer().await;
if indexer.inverted_indexer.is_none() && indexer.fulltext_indexer.is_none() {
indexer.bloom_filter_indexer = self.build_bloom_filter_indexer();
if indexer.inverted_indexer.is_none()
&& indexer.fulltext_indexer.is_none()
&& indexer.bloom_filter_indexer.is_none()
{
indexer.abort().await;
return Indexer::default();
}
@@ -266,7 +306,7 @@ impl<'a> IndexerBuilder<'a> {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to create full-text indexer, region_id: {}, file_id: {}, err: {}",
"Failed to create full-text indexer, region_id: {}, file_id: {}, err: {:?}",
self.metadata.region_id, self.file_id, err
);
} else {
@@ -278,6 +318,53 @@ impl<'a> IndexerBuilder<'a> {
None
}
fn build_bloom_filter_indexer(&self) -> Option<BloomFilterIndexer> {
let create = true; // TODO(zhongzc): add config for bloom filter
if !create {
debug!(
"Skip creating bloom filter due to config, region_id: {}, file_id: {}",
self.metadata.region_id, self.file_id,
);
return None;
}
let mem_limit = Some(16 * 1024 * 1024); // TODO(zhongzc): add config for bloom filter
let indexer = BloomFilterIndexer::new(
self.file_id,
self.metadata,
self.intermediate_manager.clone(),
mem_limit,
);
let err = match indexer {
Ok(indexer) => {
if indexer.is_none() {
debug!(
"Skip creating bloom filter due to no columns require indexing, region_id: {}, file_id: {}",
self.metadata.region_id, self.file_id,
);
}
return indexer;
}
Err(err) => err,
};
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to create bloom filter, region_id: {}, file_id: {}, err: {:?}",
self.metadata.region_id, self.file_id, err
);
} else {
warn!(
err; "Failed to create bloom filter, region_id: {}, file_id: {}",
self.metadata.region_id, self.file_id,
);
}
None
}
}
#[cfg(test)]
@@ -286,7 +373,9 @@ mod tests {
use api::v1::SemanticType;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, FulltextOptions};
use datatypes::schema::{
ColumnSchema, FulltextOptions, SkippingIndexOptions, SkippingIndexType,
};
use object_store::services::Memory;
use object_store::ObjectStore;
use puffin_manager::PuffinManagerFactory;
@@ -298,12 +387,14 @@ mod tests {
struct MetaConfig {
with_tag: bool,
with_fulltext: bool,
with_skipping_bloom: bool,
}
fn mock_region_metadata(
MetaConfig {
with_tag,
with_fulltext,
with_skipping_bloom,
}: MetaConfig,
) -> RegionMetadataRef {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 2));
@@ -354,6 +445,24 @@ mod tests {
builder.push_column_metadata(column);
}
if with_skipping_bloom {
let column_schema =
ColumnSchema::new("bloom", ConcreteDataType::string_datatype(), false)
.with_skipping_options(SkippingIndexOptions {
granularity: 42,
index_type: SkippingIndexType::BloomFilter,
})
.unwrap();
let column = ColumnMetadata {
column_schema,
semantic_type: SemanticType::Field,
column_id: 5,
};
builder.push_column_metadata(column);
}
Arc::new(builder.build().unwrap())
}
@@ -374,6 +483,7 @@ mod tests {
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,
@@ -392,6 +502,7 @@ mod tests {
assert!(indexer.inverted_indexer.is_some());
assert!(indexer.fulltext_indexer.is_some());
assert!(indexer.bloom_filter_indexer.is_some());
}
#[tokio::test]
@@ -403,6 +514,7 @@ mod tests {
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,
@@ -456,6 +568,7 @@ mod tests {
let metadata = mock_region_metadata(MetaConfig {
with_tag: false,
with_fulltext: true,
with_skipping_bloom: true,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,
@@ -474,10 +587,36 @@ mod tests {
assert!(indexer.inverted_indexer.is_none());
assert!(indexer.fulltext_indexer.is_some());
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_fulltext: false,
with_skipping_bloom: true,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,
file_id: FileId::random(),
file_path: "test".to_string(),
metadata: &metadata,
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
}
.build()
.await;
assert!(indexer.inverted_indexer.is_some());
assert!(indexer.fulltext_indexer.is_none());
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_fulltext: true,
with_skipping_bloom: false,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,
@@ -495,7 +634,8 @@ mod tests {
.await;
assert!(indexer.inverted_indexer.is_some());
assert!(indexer.fulltext_indexer.is_none());
assert!(indexer.fulltext_indexer.is_some());
assert!(indexer.bloom_filter_indexer.is_none());
}
#[tokio::test]
@@ -507,6 +647,7 @@ mod tests {
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
let indexer = IndexerBuilder {
op_type: OperationType::Flush,

View File

@@ -0,0 +1,17 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod creator;
const INDEX_BLOB_TYPE: &str = "greptime-bloom-filter-v1";

View File

@@ -0,0 +1,530 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::atomic::AtomicUsize;
use std::sync::Arc;
use common_telemetry::warn;
use datatypes::schema::SkippingIndexType;
use index::bloom_filter::creator::BloomFilterCreator;
use puffin::puffin_manager::{PuffinWriter, PutOptions};
use snafu::{ensure, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
use crate::error::{
BiErrorsSnafu, BloomFilterFinishSnafu, IndexOptionsSnafu, OperateAbortedIndexSnafu,
PuffinAddBlobSnafu, PushBloomFilterValueSnafu, Result,
};
use crate::read::Batch;
use crate::row_converter::SortField;
use crate::sst::file::FileId;
use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE;
use crate::sst::index::codec::{IndexValueCodec, IndexValuesCodec};
use crate::sst::index::intermediate::{
IntermediateLocation, IntermediateManager, TempFileProvider,
};
use crate::sst::index::puffin_manager::SstPuffinWriter;
use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
use crate::sst::index::TYPE_BLOOM_FILTER;
/// The buffer size for the pipe used to send index data to the puffin blob.
const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192;
/// The indexer for the bloom filter index.
pub struct BloomFilterIndexer {
/// The bloom filter creators.
creators: HashMap<ColumnId, BloomFilterCreator>,
/// The provider for intermediate files.
temp_file_provider: Arc<TempFileProvider>,
/// Codec for decoding primary keys.
codec: IndexValuesCodec,
/// Whether the indexing process has been aborted.
aborted: bool,
/// The statistics of the indexer.
stats: Statistics,
/// The global memory usage.
global_memory_usage: Arc<AtomicUsize>,
}
impl BloomFilterIndexer {
/// Creates a new bloom filter indexer.
pub fn new(
sst_file_id: FileId,
metadata: &RegionMetadataRef,
intermediate_manager: IntermediateManager,
memory_usage_threshold: Option<usize>,
) -> Result<Option<Self>> {
let mut creators = HashMap::new();
let temp_file_provider = Arc::new(TempFileProvider::new(
IntermediateLocation::new(&metadata.region_id, &sst_file_id),
intermediate_manager,
));
let global_memory_usage = Arc::new(AtomicUsize::new(0));
for column in &metadata.column_metadatas {
let options =
column
.column_schema
.skipping_index_options()
.context(IndexOptionsSnafu {
column_name: &column.column_schema.name,
})?;
let options = match options {
Some(options) if options.index_type == SkippingIndexType::BloomFilter => options,
_ => continue,
};
let creator = BloomFilterCreator::new(
options.granularity as _,
temp_file_provider.clone(),
global_memory_usage.clone(),
memory_usage_threshold,
);
creators.insert(column.column_id, creator);
}
if creators.is_empty() {
return Ok(None);
}
let codec = IndexValuesCodec::from_tag_columns(metadata.primary_key_columns());
let indexer = Self {
creators,
temp_file_provider,
codec,
aborted: false,
stats: Statistics::new(TYPE_BLOOM_FILTER),
global_memory_usage,
};
Ok(Some(indexer))
}
/// Updates index with a batch of rows.
/// Garbage will be cleaned up if failed to update.
///
/// TODO(zhongzc): duplicate with `mito2::sst::index::inverted_index::creator::InvertedIndexCreator`
pub async fn update(&mut self, batch: &Batch) -> Result<()> {
ensure!(!self.aborted, OperateAbortedIndexSnafu);
if self.creators.is_empty() {
return Ok(());
}
if let Err(update_err) = self.do_update(batch).await {
// clean up garbage if failed to update
if let Err(err) = self.do_cleanup().await {
if cfg!(any(test, feature = "test")) {
panic!("Failed to clean up index creator, err: {err:?}",);
} else {
warn!(err; "Failed to clean up index creator");
}
}
return Err(update_err);
}
Ok(())
}
/// Finishes index creation and cleans up garbage.
/// Returns the number of rows and bytes written.
///
/// TODO(zhongzc): duplicate with `mito2::sst::index::inverted_index::creator::InvertedIndexCreator`
pub async fn finish(
&mut self,
puffin_writer: &mut SstPuffinWriter,
) -> Result<(RowCount, ByteCount)> {
ensure!(!self.aborted, OperateAbortedIndexSnafu);
if self.stats.row_count() == 0 {
// no IO is performed, no garbage to clean up, just return
return Ok((0, 0));
}
let finish_res = self.do_finish(puffin_writer).await;
// clean up garbage no matter finish successfully or not
if let Err(err) = self.do_cleanup().await {
if cfg!(any(test, feature = "test")) {
panic!("Failed to clean up index creator, err: {err:?}",);
} else {
warn!(err; "Failed to clean up index creator");
}
}
finish_res.map(|_| (self.stats.row_count(), self.stats.byte_count()))
}
/// Aborts index creation and clean up garbage.
///
/// TODO(zhongzc): duplicate with `mito2::sst::index::inverted_index::creator::InvertedIndexCreator`
pub async fn abort(&mut self) -> Result<()> {
if self.aborted {
return Ok(());
}
self.aborted = true;
self.do_cleanup().await
}
async fn do_update(&mut self, batch: &Batch) -> Result<()> {
let mut guard = self.stats.record_update();
let n = batch.num_rows();
guard.inc_row_count(n);
// Tags
for ((col_id, _), field, value) in self.codec.decode(batch.primary_key())? {
let Some(creator) = self.creators.get_mut(col_id) else {
continue;
};
let elems = value
.map(|v| {
let mut buf = vec![];
IndexValueCodec::encode_nonnull_value(v.as_value_ref(), field, &mut buf)?;
Ok(buf)
})
.transpose()?;
creator
.push_n_row_elems(n, elems)
.await
.context(PushBloomFilterValueSnafu)?;
}
// Fields
for field in batch.fields() {
let Some(creator) = self.creators.get_mut(&field.column_id) else {
continue;
};
let sort_field = SortField::new(field.data.data_type());
for i in 0..n {
let value = field.data.get_ref(i);
let elems = (!value.is_null())
.then(|| {
let mut buf = vec![];
IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut buf)?;
Ok(buf)
})
.transpose()?;
creator
.push_row_elems(elems)
.await
.context(PushBloomFilterValueSnafu)?;
}
}
Ok(())
}
/// TODO(zhongzc): duplicate with `mito2::sst::index::inverted_index::creator::InvertedIndexCreator`
async fn do_finish(&mut self, puffin_writer: &mut SstPuffinWriter) -> Result<()> {
let mut guard = self.stats.record_finish();
for (id, creator) in &mut self.creators {
let written_bytes = Self::do_finish_single_creator(id, creator, puffin_writer).await?;
guard.inc_byte_count(written_bytes);
}
Ok(())
}
async fn do_cleanup(&mut self) -> Result<()> {
let mut _guard = self.stats.record_cleanup();
self.creators.clear();
self.temp_file_provider.cleanup().await
}
/// Data flow of finishing index:
///
/// ```text
/// (In Memory Buffer)
/// ┌──────┐
/// ┌─────────────┐ │ PIPE │
/// │ │ write index data │ │
/// │ IndexWriter ├──────────────────►│ tx │
/// │ │ │ │
/// └─────────────┘ │ │
/// ┌─────────────────┤ rx │
/// ┌─────────────┐ │ read as blob └──────┘
/// │ │ │
/// │ PuffinWriter├─┤
/// │ │ │ copy to file ┌──────┐
/// └─────────────┘ └────────────────►│ File │
/// └──────┘
/// ```
///
/// TODO(zhongzc): duplicate with `mito2::sst::index::inverted_index::creator::InvertedIndexCreator`
async fn do_finish_single_creator(
col_id: &ColumnId,
creator: &mut BloomFilterCreator,
puffin_writer: &mut SstPuffinWriter,
) -> Result<ByteCount> {
let (tx, rx) = tokio::io::duplex(PIPE_BUFFER_SIZE_FOR_SENDING_BLOB);
let blob_name = format!("{}-{}", INDEX_BLOB_TYPE, col_id);
let (index_finish, puffin_add_blob) = futures::join!(
creator.finish(tx.compat_write()),
puffin_writer.put_blob(&blob_name, rx.compat(), PutOptions::default())
);
match (
puffin_add_blob.context(PuffinAddBlobSnafu),
index_finish.context(BloomFilterFinishSnafu),
) {
(Err(e1), Err(e2)) => BiErrorsSnafu {
first: Box::new(e1),
second: Box::new(e2),
}
.fail()?,
(Ok(_), e @ Err(_)) => e?,
(e @ Err(_), Ok(_)) => e.map(|_| ())?,
(Ok(written_bytes), Ok(_)) => {
return Ok(written_bytes);
}
}
Ok(0)
}
/// Returns the memory usage of the indexer.
pub fn memory_usage(&self) -> usize {
self.global_memory_usage
.load(std::sync::atomic::Ordering::Relaxed)
}
/// Returns the column ids to be indexed.
pub fn column_ids(&self) -> impl Iterator<Item = ColumnId> + use<'_> {
self.creators.keys().copied()
}
}
#[cfg(test)]
mod tests {
use std::iter;
use api::v1::SemanticType;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SkippingIndexOptions};
use datatypes::value::ValueRef;
use datatypes::vectors::{UInt64Vector, UInt8Vector};
use index::bloom_filter::reader::{BloomFilterReader, BloomFilterReaderImpl};
use object_store::services::Memory;
use object_store::ObjectStore;
use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
use store_api::storage::RegionId;
use super::*;
use crate::read::BatchColumn;
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
use crate::sst::index::puffin_manager::PuffinManagerFactory;
fn mock_object_store() -> ObjectStore {
ObjectStore::new(Memory::default()).unwrap().finish()
}
async fn new_intm_mgr(path: impl AsRef<str>) -> IntermediateManager {
IntermediateManager::init_fs(path).await.unwrap()
}
/// tag_str:
/// - type: string
/// - index: bloom filter
/// - granularity: 2
/// - column_id: 1
///
/// ts:
/// - type: timestamp
/// - index: time index
/// - column_id: 2
///
/// field_u64:
/// - type: uint64
/// - index: bloom filter
/// - granularity: 4
/// - column_id: 3
fn mock_region_metadata() -> RegionMetadataRef {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 2));
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"tag_str",
ConcreteDataType::string_datatype(),
false,
)
.with_skipping_options(SkippingIndexOptions {
index_type: SkippingIndexType::BloomFilter,
granularity: 2,
})
.unwrap(),
semantic_type: SemanticType::Tag,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 2,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"field_u64",
ConcreteDataType::uint64_datatype(),
false,
)
.with_skipping_options(SkippingIndexOptions {
index_type: SkippingIndexType::BloomFilter,
granularity: 4,
})
.unwrap(),
semantic_type: SemanticType::Field,
column_id: 3,
})
.primary_key(vec![1]);
Arc::new(builder.build().unwrap())
}
fn new_batch(str_tag: impl AsRef<str>, u64_field: impl IntoIterator<Item = u64>) -> Batch {
let fields = vec![SortField::new(ConcreteDataType::string_datatype())];
let codec = McmpRowCodec::new(fields);
let row: [ValueRef; 1] = [str_tag.as_ref().into()];
let primary_key = codec.encode(row.into_iter()).unwrap();
let u64_field = BatchColumn {
column_id: 3,
data: Arc::new(UInt64Vector::from_iter_values(u64_field)),
};
let num_rows = u64_field.data.len();
Batch::new(
primary_key,
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt8Vector::from_iter_values(
iter::repeat(1).take(num_rows),
)),
vec![u64_field],
)
.unwrap()
}
#[tokio::test]
async fn test_bloom_filter_indexer() {
let prefix = "test_bloom_filter_indexer_";
let object_store = mock_object_store();
let intm_mgr = new_intm_mgr(prefix).await;
let region_metadata = mock_region_metadata();
let memory_usage_threshold = Some(1024);
let mut indexer = BloomFilterIndexer::new(
FileId::random(),
&region_metadata,
intm_mgr,
memory_usage_threshold,
)
.unwrap()
.unwrap();
// push 20 rows
let batch = new_batch("tag1", 0..10);
indexer.update(&batch).await.unwrap();
let batch = new_batch("tag2", 10..20);
indexer.update(&batch).await.unwrap();
let (_d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await;
let puffin_manager = factory.build(object_store);
let index_file_name = "index_file";
let mut puffin_writer = puffin_manager.writer(index_file_name).await.unwrap();
let (row_count, byte_count) = indexer.finish(&mut puffin_writer).await.unwrap();
assert_eq!(row_count, 20);
assert!(byte_count > 0);
puffin_writer.finish().await.unwrap();
let puffin_reader = puffin_manager.reader(index_file_name).await.unwrap();
// tag_str
{
let blob_guard = puffin_reader
.blob("greptime-bloom-filter-v1-1")
.await
.unwrap();
let reader = blob_guard.reader().await.unwrap();
let mut bloom_filter = BloomFilterReaderImpl::new(reader);
let metadata = bloom_filter.metadata().await.unwrap();
assert_eq!(metadata.bloom_filter_segments.len(), 10);
for i in 0..5 {
let bf = bloom_filter
.bloom_filter(&metadata.bloom_filter_segments[i])
.await
.unwrap();
assert!(bf.contains(b"tag1"));
}
for i in 5..10 {
let bf = bloom_filter
.bloom_filter(&metadata.bloom_filter_segments[i])
.await
.unwrap();
assert!(bf.contains(b"tag2"));
}
}
// field_u64
{
let sort_field = SortField::new(ConcreteDataType::uint64_datatype());
let blob_guard = puffin_reader
.blob("greptime-bloom-filter-v1-3")
.await
.unwrap();
let reader = blob_guard.reader().await.unwrap();
let mut bloom_filter = BloomFilterReaderImpl::new(reader);
let metadata = bloom_filter.metadata().await.unwrap();
assert_eq!(metadata.bloom_filter_segments.len(), 5);
for i in 0u64..20 {
let bf = bloom_filter
.bloom_filter(&metadata.bloom_filter_segments[i as usize / 4])
.await
.unwrap();
let mut buf = vec![];
IndexValueCodec::encode_nonnull_value(ValueRef::UInt64(i), &sort_field, &mut buf)
.unwrap();
assert!(bf.contains(&buf));
}
}
}
}

View File

@@ -27,8 +27,7 @@ use store_api::storage::{ColumnId, ConcreteDataType, RegionId};
use crate::error::{
CastVectorSnafu, CreateFulltextCreatorSnafu, FieldTypeMismatchSnafu, FulltextFinishSnafu,
FulltextOptionsSnafu, FulltextPushTextSnafu, OperateAbortedIndexSnafu, PuffinAddBlobSnafu,
Result,
FulltextPushTextSnafu, IndexOptionsSnafu, OperateAbortedIndexSnafu, PuffinAddBlobSnafu, Result,
};
use crate::read::Batch;
use crate::sst::file::FileId;
@@ -61,13 +60,12 @@ impl FulltextIndexer {
let mut creators = HashMap::new();
for column in &metadata.column_metadatas {
let options =
column
.column_schema
.fulltext_options()
.context(FulltextOptionsSnafu {
column_name: &column.column_schema.name,
})?;
let options = column
.column_schema
.fulltext_options()
.context(IndexOptionsSnafu {
column_name: &column.column_schema.name,
})?;
// Relax the type constraint here as many types can be casted to string.

View File

@@ -20,6 +20,7 @@ impl Indexer {
pub(crate) async fn do_abort(&mut self) {
self.do_abort_inverted_index().await;
self.do_abort_fulltext_index().await;
self.do_abort_bloom_filter().await;
self.puffin_manager = None;
}
@@ -33,7 +34,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to abort inverted index, region_id: {}, file_id: {}, err: {}",
"Failed to abort inverted index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -54,7 +55,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to abort full-text index, region_id: {}, file_id: {}, err: {}",
"Failed to abort full-text index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -64,4 +65,25 @@ impl Indexer {
);
}
}
async fn do_abort_bloom_filter(&mut self) {
let Some(mut indexer) = self.bloom_filter_indexer.take() else {
return;
};
let Err(err) = indexer.abort().await else {
return;
};
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to abort bloom filter, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
warn!(
err; "Failed to abort bloom filter, region_id: {}, file_id: {}",
self.region_id, self.file_id,
);
}
}
}

View File

@@ -15,11 +15,14 @@
use common_telemetry::{debug, warn};
use puffin::puffin_manager::{PuffinManager, PuffinWriter};
use crate::sst::index::bloom_filter::creator::BloomFilterIndexer;
use crate::sst::index::fulltext_index::creator::FulltextIndexer;
use crate::sst::index::inverted_index::creator::InvertedIndexer;
use crate::sst::index::puffin_manager::SstPuffinWriter;
use crate::sst::index::statistics::{ByteCount, RowCount};
use crate::sst::index::{FulltextIndexOutput, IndexOutput, Indexer, InvertedIndexOutput};
use crate::sst::index::{
BloomFilterOutput, FulltextIndexOutput, IndexOutput, Indexer, InvertedIndexOutput,
};
impl Indexer {
pub(crate) async fn do_finish(&mut self) -> IndexOutput {
@@ -46,6 +49,12 @@ impl Indexer {
return IndexOutput::default();
}
let success = self.do_finish_bloom_filter(&mut writer, &mut output).await;
if !success {
self.do_abort().await;
return IndexOutput::default();
}
output.file_size = self.do_finish_puffin_writer(writer).await;
output
}
@@ -60,7 +69,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to create puffin writer, region_id: {}, file_id: {}, err: {}",
"Failed to create puffin writer, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -81,7 +90,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to finish puffin writer, region_id: {}, file_id: {}, err: {}",
"Failed to finish puffin writer, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -119,7 +128,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to finish inverted index, region_id: {}, file_id: {}, err: {}",
"Failed to finish inverted index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -156,7 +165,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to finish full-text index, region_id: {}, file_id: {}, err: {}",
"Failed to finish full-text index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -169,6 +178,43 @@ impl Indexer {
false
}
async fn do_finish_bloom_filter(
&mut self,
puffin_writer: &mut SstPuffinWriter,
index_output: &mut IndexOutput,
) -> bool {
let Some(mut indexer) = self.bloom_filter_indexer.take() else {
return true;
};
let err = match indexer.finish(puffin_writer).await {
Ok((row_count, byte_count)) => {
self.fill_bloom_filter_output(
&mut index_output.bloom_filter,
row_count,
byte_count,
&indexer,
);
return true;
}
Err(err) => err,
};
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to finish bloom filter, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
warn!(
err; "Failed to finish bloom filter, region_id: {}, file_id: {}",
self.region_id, self.file_id,
);
}
false
}
fn fill_inverted_index_output(
&mut self,
output: &mut InvertedIndexOutput,
@@ -202,4 +248,21 @@ impl Indexer {
output.row_count = row_count;
output.columns = indexer.column_ids().collect();
}
fn fill_bloom_filter_output(
&mut self,
output: &mut BloomFilterOutput,
row_count: RowCount,
byte_count: ByteCount,
indexer: &BloomFilterIndexer,
) {
debug!(
"Bloom filter created, region_id: {}, file_id: {}, written_bytes: {}, written_rows: {}",
self.region_id, self.file_id, byte_count, row_count
);
output.index_size = byte_count;
output.row_count = row_count;
output.columns = indexer.column_ids().collect();
}
}

View File

@@ -29,6 +29,9 @@ impl Indexer {
if !self.do_update_fulltext_index(batch).await {
self.do_abort().await;
}
if !self.do_update_bloom_filter(batch).await {
self.do_abort().await;
}
}
/// Returns false if the update failed.
@@ -43,7 +46,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to update inverted index, region_id: {}, file_id: {}, err: {}",
"Failed to update inverted index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -68,7 +71,7 @@ impl Indexer {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to update full-text index, region_id: {}, file_id: {}, err: {}",
"Failed to update full-text index, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
@@ -80,4 +83,29 @@ impl Indexer {
false
}
/// Returns false if the update failed.
async fn do_update_bloom_filter(&mut self, batch: &Batch) -> bool {
let Some(creator) = self.bloom_filter_indexer.as_mut() else {
return true;
};
let Err(err) = creator.update(batch).await else {
return true;
};
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to update bloom filter, region_id: {}, file_id: {}, err: {:?}",
self.region_id, self.file_id, err
);
} else {
warn!(
err; "Failed to update bloom filter, region_id: {}, file_id: {}",
self.region_id, self.file_id,
);
}
false
}
}

View File

@@ -14,13 +14,25 @@
use std::path::PathBuf;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_telemetry::warn;
use futures::{AsyncRead, AsyncWrite};
use index::error as index_error;
use index::error::Result as IndexResult;
use index::external_provider::ExternalTempFileProvider;
use object_store::util::{self, normalize_dir};
use snafu::ResultExt;
use store_api::storage::{ColumnId, RegionId};
use uuid::Uuid;
use crate::access_layer::new_fs_cache_store;
use crate::error::Result;
use crate::metrics::{
INDEX_INTERMEDIATE_FLUSH_OP_TOTAL, INDEX_INTERMEDIATE_READ_BYTES_TOTAL,
INDEX_INTERMEDIATE_READ_OP_TOTAL, INDEX_INTERMEDIATE_SEEK_OP_TOTAL,
INDEX_INTERMEDIATE_WRITE_BYTES_TOTAL, INDEX_INTERMEDIATE_WRITE_OP_TOTAL,
};
use crate::sst::file::FileId;
use crate::sst::index::store::InstrumentedStore;
@@ -129,14 +141,105 @@ impl IntermediateLocation {
}
}
/// `TempFileProvider` implements `ExternalTempFileProvider`.
/// It uses `InstrumentedStore` to create and read intermediate files.
pub(crate) struct TempFileProvider {
/// Provides the location of intermediate files.
location: IntermediateLocation,
/// Provides store to access to intermediate files.
manager: IntermediateManager,
}
#[async_trait]
impl ExternalTempFileProvider for TempFileProvider {
async fn create(
&self,
file_group: &str,
file_id: &str,
) -> IndexResult<Box<dyn AsyncWrite + Unpin + Send>> {
let path = self.location.file_path(file_group, file_id);
let writer = self
.manager
.store()
.writer(
&path,
&INDEX_INTERMEDIATE_WRITE_BYTES_TOTAL,
&INDEX_INTERMEDIATE_WRITE_OP_TOTAL,
&INDEX_INTERMEDIATE_FLUSH_OP_TOTAL,
)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
Ok(Box::new(writer))
}
async fn read_all(
&self,
file_group: &str,
) -> IndexResult<Vec<(String, Box<dyn AsyncRead + Unpin + Send>)>> {
let file_group_path = self.location.file_group_path(file_group);
let entries = self
.manager
.store()
.list(&file_group_path)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
let mut readers = Vec::with_capacity(entries.len());
for entry in entries {
if entry.metadata().is_dir() {
warn!("Unexpected entry in index creation dir: {:?}", entry.path());
continue;
}
let im_file_id = self.location.im_file_id_from_path(entry.path());
let reader = self
.manager
.store()
.reader(
entry.path(),
&INDEX_INTERMEDIATE_READ_BYTES_TOTAL,
&INDEX_INTERMEDIATE_READ_OP_TOTAL,
&INDEX_INTERMEDIATE_SEEK_OP_TOTAL,
)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
readers.push((im_file_id, Box::new(reader) as _));
}
Ok(readers)
}
}
impl TempFileProvider {
/// Creates a new `TempFileProvider`.
pub fn new(location: IntermediateLocation, manager: IntermediateManager) -> Self {
Self { location, manager }
}
/// Removes all intermediate files.
pub async fn cleanup(&self) -> Result<()> {
self.manager
.store()
.remove_all(self.location.dir_to_cleanup())
.await
}
}
#[cfg(test)]
mod tests {
use std::ffi::OsStr;
use common_test_util::temp_dir;
use futures::{AsyncReadExt, AsyncWriteExt};
use regex::Regex;
use store_api::storage::RegionId;
use super::*;
use crate::sst::file::FileId;
#[tokio::test]
async fn test_manager() {
@@ -212,4 +315,58 @@ mod tests {
.is_match(&pi.next().unwrap().to_string_lossy())); // fulltext path
assert!(pi.next().is_none());
}
#[tokio::test]
async fn test_temp_file_provider_basic() {
let temp_dir = temp_dir::create_temp_dir("intermediate");
let path = temp_dir.path().display().to_string();
let location = IntermediateLocation::new(&RegionId::new(0, 0), &FileId::random());
let store = IntermediateManager::init_fs(path).await.unwrap();
let provider = TempFileProvider::new(location.clone(), store);
let file_group = "tag0";
let file_id = "0000000010";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"hello").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let file_id = "0000000100";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"world").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let file_group = "tag1";
let file_id = "0000000010";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"foo").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let readers = provider.read_all("tag0").await.unwrap();
assert_eq!(readers.len(), 2);
for (_, mut reader) in readers {
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert!(matches!(buf.as_slice(), b"hello" | b"world"));
}
let readers = provider.read_all("tag1").await.unwrap();
assert_eq!(readers.len(), 1);
let mut reader = readers.into_iter().map(|x| x.1).next().unwrap();
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"foo");
provider.cleanup().await.unwrap();
assert!(provider
.manager
.store()
.list(location.dir_to_cleanup())
.await
.unwrap()
.is_empty());
}
}

View File

@@ -13,7 +13,6 @@
// limitations under the License.
pub(crate) mod applier;
mod codec;
pub(crate) mod creator;
const INDEX_BLOB_TYPE: &str = "greptime-inverted-index-v1";

View File

@@ -37,8 +37,8 @@ use crate::cache::file_cache::FileCacheRef;
use crate::cache::index::inverted_index::InvertedIndexCacheRef;
use crate::error::{BuildIndexApplierSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, Result};
use crate::row_converter::SortField;
use crate::sst::index::codec::IndexValueCodec;
use crate::sst::index::inverted_index::applier::InvertedIndexApplier;
use crate::sst::index::inverted_index::codec::IndexValueCodec;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
/// Constructs an [`InvertedIndexApplier`] which applies predicates to SST files during scan.

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod temp_provider;
use std::collections::HashSet;
use std::num::NonZeroUsize;
use std::sync::atomic::AtomicUsize;
@@ -38,9 +36,10 @@ use crate::error::{
use crate::read::Batch;
use crate::row_converter::SortField;
use crate::sst::file::FileId;
use crate::sst::index::intermediate::{IntermediateLocation, IntermediateManager};
use crate::sst::index::inverted_index::codec::{IndexValueCodec, IndexValuesCodec};
use crate::sst::index::inverted_index::creator::temp_provider::TempFileProvider;
use crate::sst::index::codec::{IndexValueCodec, IndexValuesCodec};
use crate::sst::index::intermediate::{
IntermediateLocation, IntermediateManager, TempFileProvider,
};
use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
use crate::sst::index::puffin_manager::SstPuffinWriter;
use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};

View File

@@ -1,182 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_telemetry::warn;
use futures::{AsyncRead, AsyncWrite};
use index::error as index_error;
use index::error::Result as IndexResult;
use index::external_provider::ExternalTempFileProvider;
use snafu::ResultExt;
use crate::error::Result;
use crate::metrics::{
INDEX_INTERMEDIATE_FLUSH_OP_TOTAL, INDEX_INTERMEDIATE_READ_BYTES_TOTAL,
INDEX_INTERMEDIATE_READ_OP_TOTAL, INDEX_INTERMEDIATE_SEEK_OP_TOTAL,
INDEX_INTERMEDIATE_WRITE_BYTES_TOTAL, INDEX_INTERMEDIATE_WRITE_OP_TOTAL,
};
use crate::sst::index::intermediate::{IntermediateLocation, IntermediateManager};
/// `TempFileProvider` implements `ExternalTempFileProvider`.
/// It uses `InstrumentedStore` to create and read intermediate files.
pub(crate) struct TempFileProvider {
/// Provides the location of intermediate files.
location: IntermediateLocation,
/// Provides store to access to intermediate files.
manager: IntermediateManager,
}
#[async_trait]
impl ExternalTempFileProvider for TempFileProvider {
async fn create(
&self,
file_group: &str,
file_id: &str,
) -> IndexResult<Box<dyn AsyncWrite + Unpin + Send>> {
let path = self.location.file_path(file_group, file_id);
let writer = self
.manager
.store()
.writer(
&path,
&INDEX_INTERMEDIATE_WRITE_BYTES_TOTAL,
&INDEX_INTERMEDIATE_WRITE_OP_TOTAL,
&INDEX_INTERMEDIATE_FLUSH_OP_TOTAL,
)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
Ok(Box::new(writer))
}
async fn read_all(
&self,
file_group: &str,
) -> IndexResult<Vec<(String, Box<dyn AsyncRead + Unpin + Send>)>> {
let file_group_path = self.location.file_group_path(file_group);
let entries = self
.manager
.store()
.list(&file_group_path)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
let mut readers = Vec::with_capacity(entries.len());
for entry in entries {
if entry.metadata().is_dir() {
warn!("Unexpected entry in index creation dir: {:?}", entry.path());
continue;
}
let im_file_id = self.location.im_file_id_from_path(entry.path());
let reader = self
.manager
.store()
.reader(
entry.path(),
&INDEX_INTERMEDIATE_READ_BYTES_TOTAL,
&INDEX_INTERMEDIATE_READ_OP_TOTAL,
&INDEX_INTERMEDIATE_SEEK_OP_TOTAL,
)
.await
.map_err(BoxedError::new)
.context(index_error::ExternalSnafu)?;
readers.push((im_file_id, Box::new(reader) as _));
}
Ok(readers)
}
}
impl TempFileProvider {
/// Creates a new `TempFileProvider`.
pub fn new(location: IntermediateLocation, manager: IntermediateManager) -> Self {
Self { location, manager }
}
/// Removes all intermediate files.
pub async fn cleanup(&self) -> Result<()> {
self.manager
.store()
.remove_all(self.location.dir_to_cleanup())
.await
}
}
#[cfg(test)]
mod tests {
use common_test_util::temp_dir;
use futures::{AsyncReadExt, AsyncWriteExt};
use store_api::storage::RegionId;
use super::*;
use crate::sst::file::FileId;
#[tokio::test]
async fn test_temp_file_provider_basic() {
let temp_dir = temp_dir::create_temp_dir("intermediate");
let path = temp_dir.path().display().to_string();
let location = IntermediateLocation::new(&RegionId::new(0, 0), &FileId::random());
let store = IntermediateManager::init_fs(path).await.unwrap();
let provider = TempFileProvider::new(location.clone(), store);
let file_group = "tag0";
let file_id = "0000000010";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"hello").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let file_id = "0000000100";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"world").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let file_group = "tag1";
let file_id = "0000000010";
let mut writer = provider.create(file_group, file_id).await.unwrap();
writer.write_all(b"foo").await.unwrap();
writer.flush().await.unwrap();
writer.close().await.unwrap();
let readers = provider.read_all("tag0").await.unwrap();
assert_eq!(readers.len(), 2);
for (_, mut reader) in readers {
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert!(matches!(buf.as_slice(), b"hello" | b"world"));
}
let readers = provider.read_all("tag1").await.unwrap();
assert_eq!(readers.len(), 1);
let mut reader = readers.into_iter().map(|x| x.1).next().unwrap();
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"foo");
provider.cleanup().await.unwrap();
assert!(provider
.manager
.store()
.list(location.dir_to_cleanup())
.await
.unwrap()
.is_empty());
}
}