From 8dc3e7704ca034433439e7781d74922fb2aa6f4d Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 22 Apr 2021 21:07:39 +0200 Subject: [PATCH] add IndexSettings to Index, use Indexbuilder in Index --- src/core/index.rs | 127 ++++++++++++++++++++++++-------- src/directory/mmap_directory.rs | 4 +- src/indexer/segment_updater.rs | 6 +- 3 files changed, 102 insertions(+), 35 deletions(-) diff --git a/src/core/index.rs b/src/core/index.rs index 73a0b6e37..33e8eadab 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -78,21 +78,18 @@ fn load_metas( pub struct IndexBuilder { index_meta: IndexMeta, } -impl Default for IndexBuilder { - fn default() -> Self { +impl IndexBuilder { + pub fn new() -> Self { Self { index_meta: IndexMeta::with_schema(SchemaBuilder::new().build()), } } -} -impl IndexBuilder { - pub fn new() -> Self { - Self::default() - } + /// Set the settings pub fn settings(mut self, settings: IndexSettings) -> Self { self.index_meta.index_settings = settings; self } + /// Set the schema pub fn schema(mut self, schema: Schema) -> Self { self.index_meta.schema = schema; self @@ -102,7 +99,13 @@ impl IndexBuilder { /// The index will be allocated in anonymous memory. /// This should only be used for unit tests. pub fn create_in_ram(self) -> Index { - Index::create_in_ram(self.index_meta.schema) + let ram_directory = RAMDirectory::create(); + Index::create( + ram_directory, + self.index_meta.schema, + self.index_meta.index_settings, + ) + .expect("Creating a RAMDirectory should never fail") } /// Creates a new index in a given filepath. /// The index will use the `MMapDirectory`. @@ -110,9 +113,33 @@ impl IndexBuilder { /// If a previous index was in this directory, then its meta file will be destroyed. #[cfg(feature = "mmap")] pub fn create_in_dir>(self, directory_path: P) -> crate::Result { - Index::create_in_dir(directory_path, self.index_meta.schema) + let mmap_directory = MmapDirectory::open(directory_path)?; + if Index::exists(&mmap_directory)? { + return Err(TantivyError::IndexAlreadyExists); + } + Index::create( + mmap_directory, + self.index_meta.schema, + self.index_meta.index_settings, + ) + } + /// Creates a new index in a temp directory. + /// + /// The index will use the `MMapDirectory` in a newly created directory. + /// The temp directory will be destroyed automatically when the `Index` object + /// is destroyed. + /// + /// The temp directory is only used for testing the `MmapDirectory`. + /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`. + #[cfg(feature = "mmap")] + pub fn create_from_tempdir(self) -> crate::Result { + let mmap_directory = MmapDirectory::create_from_tempdir()?; + Index::create( + mmap_directory, + self.index_meta.schema, + self.index_meta.index_settings, + ) } - // todo add rest, change Index methods to accept IndexMeta or schema + IndexSettings } /// Search Index @@ -120,6 +147,7 @@ impl IndexBuilder { pub struct Index { directory: ManagedDirectory, schema: Schema, + settings: IndexSettings, executor: Arc, tokenizers: TokenizerManager, inventory: SegmentMetaInventory, @@ -166,8 +194,7 @@ impl Index { /// The index will be allocated in anonymous memory. /// This should only be used for unit tests. pub fn create_in_ram(schema: Schema) -> Index { - let ram_directory = RAMDirectory::create(); - Index::create(ram_directory, schema).expect("Creating a RAMDirectory should never fail") + IndexBuilder::new().schema(schema).create_in_ram() } /// Creates a new index in a given filepath. @@ -179,17 +206,15 @@ impl Index { directory_path: P, schema: Schema, ) -> crate::Result { - let mmap_directory = MmapDirectory::open(directory_path)?; - if Index::exists(&mmap_directory)? { - return Err(TantivyError::IndexAlreadyExists); - } - Index::create(mmap_directory, schema) + IndexBuilder::new() + .schema(schema) + .create_in_dir(directory_path) } /// Opens or creates a new index in the provided directory pub fn open_or_create(dir: Dir, schema: Schema) -> crate::Result { if !Index::exists(&dir)? { - return Index::create(dir, schema); + return Index::create(dir, schema, IndexSettings::default()); } let index = Index::open(dir)?; if index.schema() == schema { @@ -211,24 +236,32 @@ impl Index { /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`. #[cfg(feature = "mmap")] pub fn create_from_tempdir(schema: Schema) -> crate::Result { - let mmap_directory = MmapDirectory::create_from_tempdir()?; - Index::create(mmap_directory, schema) + IndexBuilder::new().schema(schema).create_from_tempdir() } /// Creates a new index given an implementation of the trait `Directory`. /// /// If a directory previously existed, it will be erased. - pub fn create(dir: Dir, schema: Schema) -> crate::Result { + pub fn create( + dir: Dir, + schema: Schema, + settings: IndexSettings, + ) -> crate::Result { let directory = ManagedDirectory::wrap(dir)?; - Index::from_directory(directory, schema) + Index::from_directory(directory, schema, settings) } /// Create a new index from a directory. /// /// This will overwrite existing meta.json - fn from_directory(directory: ManagedDirectory, schema: Schema) -> crate::Result { - save_new_metas(schema.clone(), &directory)?; - let metas = IndexMeta::with_schema(schema); + fn from_directory( + directory: ManagedDirectory, + schema: Schema, + settings: IndexSettings, + ) -> crate::Result { + save_new_metas(schema.clone(), settings.clone(), &directory)?; + let mut metas = IndexMeta::with_schema(schema); + metas.index_settings = settings; let index = Index::create_from_metas(directory, &metas, SegmentMetaInventory::default()); Ok(index) } @@ -241,6 +274,7 @@ impl Index { ) -> Index { let schema = metas.schema.clone(); Index { + settings: metas.index_settings.clone(), directory, schema, tokenizers: TokenizerManager::default(), @@ -407,6 +441,11 @@ impl Index { self.writer_with_num_threads(num_threads, overall_heap_size_in_bytes) } + /// Accessor to the index settings + /// + pub fn settings(&self) -> &IndexSettings { + &self.settings + } /// Accessor to the index schema /// /// The schema is actually cloned. @@ -479,6 +518,7 @@ mod tests { use crate::schema::Field; use crate::schema::{Schema, INDEXED, TEXT}; use crate::IndexReader; + use crate::IndexSettings; use crate::ReloadPolicy; use crate::{Directory, Index}; @@ -500,7 +540,12 @@ mod tests { fn test_index_exists() { let directory = RAMDirectory::create(); assert!(!Index::exists(&directory).unwrap()); - assert!(Index::create(directory.clone(), throw_away_schema()).is_ok()); + assert!(Index::create( + directory.clone(), + throw_away_schema(), + IndexSettings::default() + ) + .is_ok()); assert!(Index::exists(&directory).unwrap()); } @@ -515,7 +560,12 @@ mod tests { #[test] fn open_or_create_should_open() { let directory = RAMDirectory::create(); - assert!(Index::create(directory.clone(), throw_away_schema()).is_ok()); + assert!(Index::create( + directory.clone(), + throw_away_schema(), + IndexSettings::default() + ) + .is_ok()); assert!(Index::exists(&directory).unwrap()); assert!(Index::open_or_create(directory, throw_away_schema()).is_ok()); } @@ -523,15 +573,30 @@ mod tests { #[test] fn create_should_wipeoff_existing() { let directory = RAMDirectory::create(); - assert!(Index::create(directory.clone(), throw_away_schema()).is_ok()); + assert!(Index::create( + directory.clone(), + throw_away_schema(), + IndexSettings::default() + ) + .is_ok()); assert!(Index::exists(&directory).unwrap()); - assert!(Index::create(directory.clone(), Schema::builder().build()).is_ok()); + assert!(Index::create( + directory.clone(), + Schema::builder().build(), + IndexSettings::default() + ) + .is_ok()); } #[test] fn open_or_create_exists_but_schema_does_not_match() { let directory = RAMDirectory::create(); - assert!(Index::create(directory.clone(), throw_away_schema()).is_ok()); + assert!(Index::create( + directory.clone(), + throw_away_schema(), + IndexSettings::default() + ) + .is_ok()); assert!(Index::exists(&directory).unwrap()); assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok()); let err = Index::open_or_create(directory, Schema::builder().build()); @@ -666,7 +731,7 @@ mod tests { let directory = RAMDirectory::create(); let schema = throw_away_schema(); let field = schema.get_field("num_likes").unwrap(); - let index = Index::create(directory.clone(), schema).unwrap(); + let index = Index::create(directory.clone(), schema, IndexSettings::default()).unwrap(); let mut writer = index.writer_with_num_threads(8, 24_000_000).unwrap(); for i in 0u64..8_000u64 { diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 2e8af1d0d..dc2804ebd 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -487,6 +487,7 @@ mod tests { use super::*; use crate::schema::{Schema, SchemaBuilder, TEXT}; use crate::Index; + use crate::IndexSettings; use crate::ReloadPolicy; use crate::{common::HasLen, indexer::LogMergePolicy}; @@ -585,7 +586,8 @@ mod tests { let schema = schema_builder.build(); { - let index = Index::create(mmap_directory.clone(), schema).unwrap(); + let index = + Index::create(mmap_directory.clone(), schema, IndexSettings::default()).unwrap(); let mut index_writer = index.writer_for_tests().unwrap(); let mut log_merge_policy = LogMergePolicy::default(); diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 2bf00ceaf..9b066362e 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -44,10 +44,10 @@ const NUM_MERGE_THREADS: usize = 4; /// and flushed. /// /// This method is not part of tantivy's public API -pub fn save_new_metas(schema: Schema, directory: &dyn Directory) -> crate::Result<()> { +pub fn save_new_metas(schema: Schema, index_settings: IndexSettings, directory: &dyn Directory) -> crate::Result<()> { save_metas( &IndexMeta { - index_settings: IndexSettings::default(), + index_settings, segments: Vec::new(), schema, opstamp: 0u64, @@ -184,7 +184,7 @@ pub fn merge_segments( segments.extend(index.searchable_segments()?); } - let mut merged_index = Index::create(output_directory, target_schema.clone())?; + let mut merged_index = Index::create(output_directory, target_schema.clone(), indices[0].settings().clone())?; let merged_segment = merged_index.new_segment(); let merged_segment_id = merged_segment.id(); let merger: IndexMerger = IndexMerger::open(merged_index.schema(), &segments[..])?;