diff --git a/config/config.md b/config/config.md
index a594e73680..32f34304c6 100644
--- a/config/config.md
+++ b/config/config.md
@@ -118,12 +118,15 @@
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores). - `0`: using the default value (1/4 of cpu cores). - `1`: scan in current thread. - `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
+| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`. The default name for this directory is `index_intermediate` for backward compatibility.
This path contains two subdirectories: - `__intm`: for storing intermediate files used during creating index. - `staging`: for storing staging files used during searching index. |
+| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush. - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction. - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation. Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
-| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.memtable` | -- | -- | -- |
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type. - `time_series`: time-series memtable - `partition_tree`: partition tree memtable (experimental) |
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard. Only available for `partition_tree` memtable. |
@@ -399,12 +402,15 @@
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores). - `0`: using the default value (1/4 of cpu cores). - `1`: scan in current thread. - `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
+| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`. The default name for this directory is `index_intermediate` for backward compatibility.
This path contains two subdirectories: - `__intm`: for storing intermediate files used during creating index. - `staging`: for storing staging files used during searching index. |
+| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush. - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction. - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query - `auto`: automatically - `disable`: never |
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation. Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
-| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.memtable` | -- | -- | -- |
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type. - `time_series`: time-series memtable - `partition_tree`: partition tree memtable (experimental) |
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard. Only available for `partition_tree` memtable. |
diff --git a/config/datanode.example.toml b/config/datanode.example.toml
index b3be8b5836..c12606110f 100644
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -394,6 +394,21 @@ parallel_scan_channel_size = 32
## Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
+## The options for index in Mito engine.
+[region_engine.mito.index]
+
+## Auxiliary directory path for the index in filesystem, used to store intermediate files for
+## creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.
+## The default name for this directory is `index_intermediate` for backward compatibility.
+##
+## This path contains two subdirectories:
+## - `__intm`: for storing intermediate files used during creating index.
+## - `staging`: for storing staging files used during searching index.
+aux_path = ""
+
+## The max capacity of the staging directory.
+staging_size = "2GB"
+
## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]
@@ -416,7 +431,7 @@ apply_on_query = "auto"
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64M"
-## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+## Deprecated, use `region_engine.mito.index.aux_path` instead.
intermediate_path = ""
[region_engine.mito.memtable]
diff --git a/config/standalone.example.toml b/config/standalone.example.toml
index 0a2544a772..32c1840eea 100644
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -417,6 +417,21 @@ parallel_scan_channel_size = 32
## Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
+## The options for index in Mito engine.
+[region_engine.mito.index]
+
+## Auxiliary directory path for the index in filesystem, used to store intermediate files for
+## creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.
+## The default name for this directory is `index_intermediate` for backward compatibility.
+##
+## This path contains two subdirectories:
+## - `__intm`: for storing intermediate files used during creating index.
+## - `staging`: for storing staging files used during searching index.
+aux_path = ""
+
+## The max capacity of the staging directory.
+staging_size = "2GB"
+
## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]
@@ -439,7 +454,7 @@ apply_on_query = "auto"
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64M"
-## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+## Deprecated, use `region_engine.mito.index.aux_path` instead.
intermediate_path = ""
[region_engine.mito.memtable]
diff --git a/src/mito2/src/access_layer.rs b/src/mito2/src/access_layer.rs
index 40308124f5..98d9396bf7 100644
--- a/src/mito2/src/access_layer.rs
+++ b/src/mito2/src/access_layer.rs
@@ -27,6 +27,7 @@ use crate::read::Source;
use crate::region::options::IndexOptions;
use crate::sst::file::{FileHandle, FileId, FileMeta};
use crate::sst::index::intermediate::IntermediateManager;
+use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::sst::index::IndexerBuilder;
use crate::sst::location;
use crate::sst::parquet::reader::ParquetReaderBuilder;
@@ -40,6 +41,8 @@ pub struct AccessLayer {
region_dir: String,
/// Target object store.
object_store: ObjectStore,
+ /// Puffin manager factory for index.
+ puffin_manager_factory: PuffinManagerFactory,
/// Intermediate manager for inverted index.
intermediate_manager: IntermediateManager,
}
@@ -57,11 +60,13 @@ impl AccessLayer {
pub fn new(
region_dir: impl Into,
object_store: ObjectStore,
+ puffin_manager_factory: PuffinManagerFactory,
intermediate_manager: IntermediateManager,
) -> AccessLayer {
AccessLayer {
region_dir: region_dir.into(),
object_store,
+ puffin_manager_factory,
intermediate_manager,
}
}
@@ -76,6 +81,11 @@ impl AccessLayer {
&self.object_store
}
+ /// Returns the puffin manager factory.
+ pub fn puffin_manager_factory(&self) -> &PuffinManagerFactory {
+ &self.puffin_manager_factory
+ }
+
/// Deletes a SST file (and its index file if it has one) with given file id.
pub(crate) async fn delete_sst(&self, file_meta: &FileMeta) -> Result<()> {
let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
@@ -86,15 +96,13 @@ impl AccessLayer {
file_id: file_meta.file_id,
})?;
- if file_meta.inverted_index_available() {
- let path = location::index_file_path(&self.region_dir, file_meta.file_id);
- self.object_store
- .delete(&path)
- .await
- .context(DeleteIndexSnafu {
- file_id: file_meta.file_id,
- })?;
- }
+ let path = location::index_file_path(&self.region_dir, file_meta.file_id);
+ self.object_store
+ .delete(&path)
+ .await
+ .context(DeleteIndexSnafu {
+ file_id: file_meta.file_id,
+ })?;
Ok(())
}
diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs
index 931e506269..008a717593 100644
--- a/src/mito2/src/cache/file_cache.rs
+++ b/src/mito2/src/cache/file_cache.rs
@@ -117,6 +117,7 @@ impl FileCache {
}
/// Reads a file from the cache.
+ #[allow(unused)]
pub(crate) async fn reader(&self, key: IndexKey) -> Option {
// We must use `get()` to update the estimator of the cache.
// See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs
index 062e5423c4..a303367a34 100644
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -45,6 +45,7 @@ use crate::schedule::scheduler::LocalScheduler;
use crate::sst::file::{FileMeta, IndexType};
use crate::sst::file_purger::LocalFilePurger;
use crate::sst::index::intermediate::IntermediateManager;
+use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::sst::parquet::WriteOptions;
/// CompactionRegion represents a region that needs to be compacted.
@@ -93,13 +94,19 @@ pub async fn open_compaction_region(
};
let access_layer = {
+ let puffin_manager_factory = PuffinManagerFactory::new(
+ &mito_config.index.aux_path,
+ mito_config.index.staging_size.as_bytes(),
+ Some(mito_config.index.write_buffer_size.as_bytes() as _),
+ )
+ .await?;
let intermediate_manager =
- IntermediateManager::init_fs(mito_config.inverted_index.intermediate_path.clone())
- .await?;
+ IntermediateManager::init_fs(mito_config.index.aux_path.clone()).await?;
Arc::new(AccessLayer::new(
req.region_dir.as_str(),
object_store.clone(),
+ puffin_manager_factory,
intermediate_manager,
))
};
@@ -266,7 +273,7 @@ impl Compactor for DefaultCompactor {
let index_write_buffer_size = Some(
compaction_region
.engine_config
- .inverted_index
+ .index
.write_buffer_size
.as_bytes() as usize,
);
diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs
index 5f5799ec2f..04d085dda8 100644
--- a/src/mito2/src/config.rs
+++ b/src/mito2/src/config.rs
@@ -15,6 +15,7 @@
//! Configurations.
use std::cmp;
+use std::path::Path;
use std::time::Duration;
use common_base::readable_size::ReadableSize;
@@ -104,6 +105,8 @@ pub struct MitoConfig {
/// Whether to allow stale entries read during replay.
pub allow_stale_entries: bool,
+ /// Index configs.
+ pub index: IndexConfig,
/// Inverted index configs.
pub inverted_index: InvertedIndexConfig,
@@ -134,6 +137,7 @@ impl Default for MitoConfig {
scan_parallelism: divide_num_cpus(4),
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
allow_stale_entries: false,
+ index: IndexConfig::default(),
inverted_index: InvertedIndexConfig::default(),
memtable: MemtableConfig::default(),
};
@@ -202,7 +206,7 @@ impl MitoConfig {
self.experimental_write_cache_path = join_dir(data_home, "write_cache");
}
- self.inverted_index.sanitize(data_home)?;
+ self.index.sanitize(data_home, &self.inverted_index)?;
Ok(())
}
@@ -246,6 +250,70 @@ impl MitoConfig {
}
}
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+#[serde(default)]
+pub struct IndexConfig {
+ /// Auxiliary directory path for the index in filesystem, used to
+ /// store intermediate files for creating the index and staging files
+ /// for searching the index, defaults to `{data_home}/index_intermediate`.
+ ///
+ /// This path contains two subdirectories:
+ /// - `__intm`: for storing intermediate files used during creating index.
+ /// - `staging`: for storing staging files used during searching index.
+ ///
+ /// The default name for this directory is `index_intermediate` for backward compatibility.
+ pub aux_path: String,
+
+ /// The max capacity of the staging directory.
+ pub staging_size: ReadableSize,
+
+ /// Write buffer size for creating the index.
+ pub write_buffer_size: ReadableSize,
+}
+
+impl Default for IndexConfig {
+ fn default() -> Self {
+ Self {
+ aux_path: String::new(),
+ staging_size: ReadableSize::gb(2),
+ write_buffer_size: ReadableSize::mb(8),
+ }
+ }
+}
+
+impl IndexConfig {
+ pub fn sanitize(
+ &mut self,
+ data_home: &str,
+ inverted_index: &InvertedIndexConfig,
+ ) -> Result<()> {
+ #[allow(deprecated)]
+ if self.aux_path.is_empty() && !inverted_index.intermediate_path.is_empty() {
+ self.aux_path.clone_from(&inverted_index.intermediate_path);
+ warn!(
+ "`inverted_index.intermediate_path` is deprecated, use
+ `index.aux_path` instead. Set `index.aux_path` to {}",
+ &inverted_index.intermediate_path
+ )
+ }
+ if self.aux_path.is_empty() {
+ let path = Path::new(data_home).join("index_intermediate");
+ self.aux_path = path.as_os_str().to_string_lossy().to_string();
+ }
+
+ if self.write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
+ self.write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
+ warn!(
+ "Sanitize index write buffer size to {}",
+ self.write_buffer_size
+ );
+ }
+
+ Ok(())
+ }
+}
+
/// Operational mode for certain actions.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
@@ -280,17 +348,23 @@ pub struct InvertedIndexConfig {
pub create_on_compaction: Mode,
/// Whether to apply the index on query: automatically or never.
pub apply_on_query: Mode,
- /// Write buffer size for creating the index.
- pub write_buffer_size: ReadableSize,
+
/// Memory threshold for performing an external sort during index creation.
/// `None` means all sorting will happen in memory.
#[serde_as(as = "NoneAsEmptyString")]
pub mem_threshold_on_create: Option,
- /// File system path to store intermediate files for external sort, defaults to `{data_home}/index_intermediate`.
+
+ #[deprecated = "use [IndexConfig::aux_path] instead"]
+ #[serde(skip_serializing)]
pub intermediate_path: String,
+
+ #[deprecated = "use [IndexConfig::write_buffer_size] instead"]
+ #[serde(skip_serializing)]
+ pub write_buffer_size: ReadableSize,
}
impl Default for InvertedIndexConfig {
+ #[allow(deprecated)]
fn default() -> Self {
Self {
create_on_flush: Mode::Auto,
@@ -303,24 +377,6 @@ impl Default for InvertedIndexConfig {
}
}
-impl InvertedIndexConfig {
- pub fn sanitize(&mut self, data_home: &str) -> Result<()> {
- if self.intermediate_path.is_empty() {
- self.intermediate_path = join_dir(data_home, "index_intermediate");
- }
-
- if self.write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
- self.write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
- warn!(
- "Sanitize index write buffer size to {}",
- self.write_buffer_size
- );
- }
-
- Ok(())
- }
-}
-
/// Divide cpu num by a non-zero `divisor` and returns at least 1.
fn divide_num_cpus(divisor: usize) -> usize {
debug_assert!(divisor > 0);
diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs
index 1306edf09d..ed665e445c 100644
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -597,13 +597,6 @@ pub enum Error {
location: Location,
},
- #[snafu(display("Blob type not found, blob_type: {blob_type}"))]
- PuffinBlobTypeNotFound {
- blob_type: String,
- #[snafu(implicit)]
- location: Location,
- },
-
#[snafu(display("Failed to write puffin completely"))]
PuffinFinish {
source: puffin::error::Error,
@@ -783,6 +776,20 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
+
+ #[snafu(display("Failed to initialize puffin stager"))]
+ PuffinInitStager {
+ source: puffin::error::Error,
+ #[snafu(implicit)]
+ location: Location,
+ },
+
+ #[snafu(display("Failed to build puffin reader"))]
+ PuffinBuildReader {
+ source: puffin::error::Error,
+ #[snafu(implicit)]
+ location: Location,
+ },
}
pub type Result = std::result::Result;
@@ -821,7 +828,6 @@ impl ErrorExt for Error {
| CreateDefault { .. }
| InvalidParquet { .. }
| OperateAbortedIndex { .. }
- | PuffinBlobTypeNotFound { .. }
| UnexpectedReplay { .. }
| IndexEncodeNull { .. } => StatusCode::Unexpected,
RegionNotFound { .. } => StatusCode::RegionNotFound,
@@ -886,7 +892,9 @@ impl ErrorExt for Error {
PuffinReadMetadata { source, .. }
| PuffinReadBlob { source, .. }
| PuffinFinish { source, .. }
- | PuffinAddBlob { source, .. } => source.status_code(),
+ | PuffinAddBlob { source, .. }
+ | PuffinInitStager { source, .. }
+ | PuffinBuildReader { source, .. } => source.status_code(),
CleanDir { .. } => StatusCode::Unexpected,
InvalidConfig { .. } => StatusCode::InvalidArguments,
StaleLogEntry { .. } => StatusCode::Unexpected,
diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs
index 971295e08d..2d573b423b 100644
--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -327,12 +327,8 @@ impl RegionFlushTask {
.inverted_index
.mem_threshold_on_create
.map(|m| m.as_bytes() as _);
- let index_write_buffer_size = Some(
- self.engine_config
- .inverted_index
- .write_buffer_size
- .as_bytes() as usize,
- );
+ let index_write_buffer_size =
+ Some(self.engine_config.index.write_buffer_size.as_bytes() as usize);
// Flush to level 0.
let write_request = SstWriteRequest {
diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs
index e29b1611a2..c25a040295 100644
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -343,6 +343,7 @@ impl ScanRegion {
.iter()
.copied()
.collect(),
+ self.access_layer.puffin_manager_factory().clone(),
)
.build(&self.request.filters)
.inspect_err(|err| warn!(err; "Failed to build index applier"))
diff --git a/src/mito2/src/region/opener.rs b/src/mito2/src/region/opener.rs
index 50aa7c68cd..65429478f5 100644
--- a/src/mito2/src/region/opener.rs
+++ b/src/mito2/src/region/opener.rs
@@ -48,6 +48,7 @@ use crate::request::OptionOutputTx;
use crate::schedule::scheduler::SchedulerRef;
use crate::sst::file_purger::LocalFilePurger;
use crate::sst::index::intermediate::IntermediateManager;
+use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::time_provider::{StdTimeProvider, TimeProviderRef};
use crate::wal::entry_reader::WalEntryReader;
use crate::wal::{EntryId, Wal};
@@ -63,6 +64,7 @@ pub(crate) struct RegionOpener {
options: Option,
cache_manager: Option,
skip_wal_replay: bool,
+ puffin_manager_factory: PuffinManagerFactory,
intermediate_manager: IntermediateManager,
time_provider: Option,
stats: ManifestStats,
@@ -77,6 +79,7 @@ impl RegionOpener {
memtable_builder_provider: MemtableBuilderProvider,
object_store_manager: ObjectStoreManagerRef,
purge_scheduler: SchedulerRef,
+ puffin_manager_factory: PuffinManagerFactory,
intermediate_manager: IntermediateManager,
) -> RegionOpener {
RegionOpener {
@@ -89,6 +92,7 @@ impl RegionOpener {
options: None,
cache_manager: None,
skip_wal_replay: false,
+ puffin_manager_factory,
intermediate_manager,
time_provider: None,
stats: Default::default(),
@@ -216,6 +220,7 @@ impl RegionOpener {
let access_layer = Arc::new(AccessLayer::new(
self.region_dir,
object_store,
+ self.puffin_manager_factory,
self.intermediate_manager,
));
let time_provider = self
@@ -317,6 +322,7 @@ impl RegionOpener {
let access_layer = Arc::new(AccessLayer::new(
self.region_dir.clone(),
object_store,
+ self.puffin_manager_factory.clone(),
self.intermediate_manager.clone(),
));
let file_purger = Arc::new(LocalFilePurger::new(
diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs
index 4f81170933..0753b1a3eb 100644
--- a/src/mito2/src/sst/file_purger.rs
+++ b/src/mito2/src/sst/file_purger.rs
@@ -97,7 +97,6 @@ impl FilePurger for LocalFilePurger {
mod tests {
use common_test_util::temp_dir::create_temp_dir;
use object_store::services::Fs;
- use object_store::util::join_dir;
use object_store::ObjectStore;
use smallvec::SmallVec;
@@ -106,6 +105,7 @@ mod tests {
use crate::schedule::scheduler::{LocalScheduler, Scheduler};
use crate::sst::file::{FileHandle, FileId, FileMeta, FileTimeRange, IndexType};
use crate::sst::index::intermediate::IntermediateManager;
+ use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::sst::location;
#[tokio::test]
@@ -119,7 +119,12 @@ mod tests {
let sst_file_id = FileId::random();
let sst_dir = "table1";
let path = location::sst_file_path(sst_dir, sst_file_id);
- let intm_mgr = IntermediateManager::init_fs(join_dir(&dir_path, "intm"))
+
+ let index_aux_path = dir.path().join("index_aux");
+ let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None)
+ .await
+ .unwrap();
+ let intm_mgr = IntermediateManager::init_fs(index_aux_path.to_str().unwrap())
.await
.unwrap();
@@ -127,7 +132,12 @@ mod tests {
object_store.write(&path, vec![0; 4096]).await.unwrap();
let scheduler = Arc::new(LocalScheduler::new(3));
- let layer = Arc::new(AccessLayer::new(sst_dir, object_store.clone(), intm_mgr));
+ let layer = Arc::new(AccessLayer::new(
+ sst_dir,
+ object_store.clone(),
+ puffin_mgr,
+ intm_mgr,
+ ));
let file_purger = Arc::new(LocalFilePurger::new(scheduler.clone(), layer, None));
@@ -165,11 +175,16 @@ mod tests {
builder.root(&dir_path);
let sst_file_id = FileId::random();
let sst_dir = "table1";
- let intm_mgr = IntermediateManager::init_fs(join_dir(&dir_path, "intm"))
+
+ let index_aux_path = dir.path().join("index_aux");
+ let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None)
+ .await
+ .unwrap();
+ let intm_mgr = IntermediateManager::init_fs(index_aux_path.to_str().unwrap())
.await
.unwrap();
- let path = location::sst_file_path(sst_dir, sst_file_id);
+ let path = location::sst_file_path(sst_dir, sst_file_id);
let object_store = ObjectStore::new(builder).unwrap().finish();
object_store.write(&path, vec![0; 4096]).await.unwrap();
@@ -180,7 +195,12 @@ mod tests {
.unwrap();
let scheduler = Arc::new(LocalScheduler::new(3));
- let layer = Arc::new(AccessLayer::new(sst_dir, object_store.clone(), intm_mgr));
+ let layer = Arc::new(AccessLayer::new(
+ sst_dir,
+ object_store.clone(),
+ puffin_mgr,
+ intm_mgr,
+ ));
let file_purger = Arc::new(LocalFilePurger::new(scheduler.clone(), layer, None));
diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs
index ebc561c829..5bfee47ef7 100644
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -16,6 +16,7 @@ pub(crate) mod applier;
mod codec;
pub(crate) mod creator;
pub(crate) mod intermediate;
+pub(crate) mod puffin_manager;
mod store;
use std::num::NonZeroUsize;
diff --git a/src/mito2/src/sst/index/applier.rs b/src/mito2/src/sst/index/applier.rs
index a823de56c8..d99d5ea8cd 100644
--- a/src/mito2/src/sst/index/applier.rs
+++ b/src/mito2/src/sst/index/applier.rs
@@ -16,27 +16,21 @@ pub mod builder;
use std::sync::Arc;
-use futures::{AsyncRead, AsyncSeek};
+use common_telemetry::warn;
use index::inverted_index::format::reader::InvertedIndexBlobReader;
use index::inverted_index::search::index_apply::{
ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext,
};
use object_store::ObjectStore;
-use puffin::file_format::reader::{AsyncReader, PuffinFileReader};
-use snafu::{OptionExt, ResultExt};
+use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
+use snafu::ResultExt;
use store_api::storage::RegionId;
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
-use crate::error::{
- ApplyIndexSnafu, OpenDalSnafu, PuffinBlobTypeNotFoundSnafu, PuffinReadBlobSnafu,
- PuffinReadMetadataSnafu, Result,
-};
-use crate::metrics::{
- INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE, INDEX_PUFFIN_READ_BYTES_TOTAL,
- INDEX_PUFFIN_READ_OP_TOTAL, INDEX_PUFFIN_SEEK_OP_TOTAL,
-};
+use crate::error::{ApplyIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result};
+use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE};
use crate::sst::file::FileId;
-use crate::sst::index::store::InstrumentedStore;
+use crate::sst::index::puffin_manager::{BlobReader, PuffinManagerFactory};
use crate::sst::index::INDEX_BLOB_TYPE;
use crate::sst::location;
@@ -50,7 +44,7 @@ pub(crate) struct SstIndexApplier {
region_id: RegionId,
/// Store responsible for accessing remote index files.
- store: InstrumentedStore,
+ store: ObjectStore,
/// The cache of index files.
file_cache: Option,
@@ -58,6 +52,9 @@ pub(crate) struct SstIndexApplier {
/// Predefined index applier used to apply predicates to index files
/// and return the relevant row group ids for further scan.
index_applier: Box,
+
+ /// The puffin manager factory.
+ puffin_manager_factory: PuffinManagerFactory,
}
pub(crate) type SstIndexApplierRef = Arc;
@@ -67,18 +64,20 @@ impl SstIndexApplier {
pub fn new(
region_dir: String,
region_id: RegionId,
- object_store: ObjectStore,
+ store: ObjectStore,
file_cache: Option,
index_applier: Box,
+ puffin_manager_factory: PuffinManagerFactory,
) -> Self {
INDEX_APPLY_MEMORY_USAGE.add(index_applier.memory_usage() as i64);
Self {
region_dir,
region_id,
- store: InstrumentedStore::new(object_store),
+ store,
file_cache,
index_applier,
+ puffin_manager_factory,
}
}
@@ -91,94 +90,65 @@ impl SstIndexApplier {
index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty,
};
- match self.cached_puffin_reader(file_id).await? {
- Some(mut puffin_reader) => {
- let blob_reader = Self::index_blob_reader(&mut puffin_reader).await?;
- let mut index_reader = InvertedIndexBlobReader::new(blob_reader);
- self.index_applier
- .apply(context, &mut index_reader)
- .await
- .context(ApplyIndexSnafu)
+ let blob = match self.cached_blob_reader(file_id).await {
+ Ok(Some(puffin_reader)) => puffin_reader,
+ other => {
+ if let Err(err) = other {
+ warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
+ }
+ self.remote_blob_reader(file_id).await?
}
- None => {
- let mut puffin_reader = self.remote_puffin_reader(file_id).await?;
- let blob_reader = Self::index_blob_reader(&mut puffin_reader).await?;
- let mut index_reader = InvertedIndexBlobReader::new(blob_reader);
- self.index_applier
- .apply(context, &mut index_reader)
- .await
- .context(ApplyIndexSnafu)
- }
- }
+ };
+ let mut blob_reader = InvertedIndexBlobReader::new(blob);
+ let output = self
+ .index_applier
+ .apply(context, &mut blob_reader)
+ .await
+ .context(ApplyIndexSnafu)?;
+ Ok(output)
}
- /// Helper function to create a [`PuffinFileReader`] from the cached index file.
- async fn cached_puffin_reader(
- &self,
- file_id: FileId,
- ) -> Result