Merge branch 'main' into indexmeta

This commit is contained in:
Paul Masurel
2021-04-26 14:34:58 +09:00
committed by GitHub
74 changed files with 839 additions and 797 deletions

View File

@@ -10,7 +10,7 @@ use crate::directory::ManagedDirectory;
#[cfg(feature = "mmap")]
use crate::directory::MmapDirectory;
use crate::directory::INDEX_WRITER_LOCK;
use crate::directory::{Directory, RAMDirectory};
use crate::directory::{Directory, RamDirectory};
use crate::error::DataCorruption;
use crate::error::TantivyError;
use crate::indexer::index_writer::HEAP_SIZE_MIN;
@@ -222,7 +222,7 @@ impl Index {
self.set_multithread_executor(default_num_threads)
}
/// Creates a new index using the `RAMDirectory`.
/// Creates a new index using the `RamDirectory`.
///
/// The index will be allocated in anonymous memory.
/// This should only be used for unit tests.
@@ -256,7 +256,7 @@ impl Index {
/// is destroyed.
///
/// The temp directory is only used for testing the `MmapDirectory`.
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
#[cfg(feature = "mmap")]
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
IndexBuilder::new().schema(schema).create_from_tempdir()
@@ -390,7 +390,7 @@ impl Index {
/// Each thread will receive a budget of `overall_heap_size_in_bytes / num_threads`.
///
/// # Errors
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IOError`.
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.
///
/// # Panics
/// If the heap size per thread is too small, panics.
@@ -524,7 +524,7 @@ impl fmt::Debug for Index {
#[cfg(test)]
mod tests {
use crate::directory::{RAMDirectory, WatchCallback};
use crate::directory::{RamDirectory, WatchCallback};
use crate::schema::Field;
use crate::schema::{Schema, INDEXED, TEXT};
use crate::IndexReader;
@@ -548,7 +548,7 @@ mod tests {
#[test]
fn test_index_exists() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
assert!(!Index::exists(&directory).unwrap());
assert!(Index::create(
directory.clone(),
@@ -561,7 +561,7 @@ mod tests {
#[test]
fn open_or_create_should_create() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
assert!(!Index::exists(&directory).unwrap());
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
assert!(Index::exists(&directory).unwrap());
@@ -569,7 +569,7 @@ mod tests {
#[test]
fn open_or_create_should_open() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
assert!(Index::create(
directory.clone(),
throw_away_schema(),
@@ -582,7 +582,7 @@ mod tests {
#[test]
fn create_should_wipeoff_existing() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
assert!(Index::create(
directory.clone(),
throw_away_schema(),
@@ -600,7 +600,7 @@ mod tests {
#[test]
fn open_or_create_exists_but_schema_does_not_match() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
assert!(Index::create(
directory.clone(),
throw_away_schema(),
@@ -738,7 +738,7 @@ mod tests {
#[cfg(not(target_os = "windows"))]
#[test]
fn garbage_collect_works_as_intended() {
let directory = RAMDirectory::create();
let directory = RamDirectory::create();
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let index = Index::create(directory.clone(), schema, IndexSettings::default()).unwrap();

View File

@@ -108,14 +108,13 @@ impl SegmentMeta {
pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
let mut path = self.id().uuid_string();
path.push_str(&*match component {
SegmentComponent::POSTINGS => ".idx".to_string(),
SegmentComponent::POSITIONS => ".pos".to_string(),
SegmentComponent::POSITIONSSKIP => ".posidx".to_string(),
SegmentComponent::TERMS => ".term".to_string(),
SegmentComponent::STORE => ".store".to_string(),
SegmentComponent::FASTFIELDS => ".fast".to_string(),
SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
SegmentComponent::Postings => ".idx".to_string(),
SegmentComponent::Positions => ".pos".to_string(),
SegmentComponent::Terms => ".term".to_string(),
SegmentComponent::Store => ".store".to_string(),
SegmentComponent::FastFields => ".fast".to_string(),
SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
});
PathBuf::from(path)
}

View File

@@ -26,7 +26,6 @@ pub struct InvertedIndexReader {
termdict: TermDictionary,
postings_file_slice: FileSlice,
positions_file_slice: FileSlice,
positions_idx_file_slice: FileSlice,
record_option: IndexRecordOption,
total_num_tokens: u64,
}
@@ -37,7 +36,6 @@ impl InvertedIndexReader {
termdict: TermDictionary,
postings_file_slice: FileSlice,
positions_file_slice: FileSlice,
positions_idx_file_slice: FileSlice,
record_option: IndexRecordOption,
) -> io::Result<InvertedIndexReader> {
let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
@@ -46,7 +44,6 @@ impl InvertedIndexReader {
termdict,
postings_file_slice: postings_body,
positions_file_slice,
positions_idx_file_slice,
record_option,
total_num_tokens,
})
@@ -59,7 +56,6 @@ impl InvertedIndexReader {
termdict: TermDictionary::empty(),
postings_file_slice: FileSlice::empty(),
positions_file_slice: FileSlice::empty(),
positions_idx_file_slice: FileSlice::empty(),
record_option,
total_num_tokens: 0u64,
}
@@ -141,12 +137,12 @@ impl InvertedIndexReader {
option: IndexRecordOption,
) -> io::Result<SegmentPostings> {
let block_postings = self.read_block_postings_from_terminfo(term_info, option)?;
let position_stream = {
let position_reader = {
if option.has_positions() {
let position_reader = self.positions_file_slice.clone();
let skip_reader = self.positions_idx_file_slice.clone();
let position_reader =
PositionReader::new(position_reader, skip_reader, term_info.positions_idx)?;
let positions_data = self
.positions_file_slice
.read_bytes_slice(term_info.positions_range.clone())?;
let position_reader = PositionReader::open(positions_data)?;
Some(position_reader)
} else {
None
@@ -154,7 +150,7 @@ impl InvertedIndexReader {
};
Ok(SegmentPostings::from_block_postings(
block_postings,
position_stream,
position_reader,
))
}

View File

@@ -7,39 +7,36 @@ use std::slice;
#[derive(Copy, Clone)]
pub enum SegmentComponent {
/// Postings (or inverted list). Sorted lists of document ids, associated to terms
POSTINGS,
Postings,
/// Positions of terms in each document.
POSITIONS,
/// Index to seek within the position file
POSITIONSSKIP,
Positions,
/// Column-oriented random-access storage of fields.
FASTFIELDS,
FastFields,
/// Stores the sum of the length (in terms) of each field for each document.
/// Field norms are stored as a special u64 fast field.
FIELDNORMS,
FieldNorms,
/// Dictionary associating `Term`s to `TermInfo`s which is
/// simply an address into the `postings` file and the `positions` file.
TERMS,
Terms,
/// Row-oriented, compressed storage of the documents.
/// Accessing a document from the store is relatively slow, as it
/// requires to decompress the entire block it belongs to.
STORE,
Store,
/// Bitset describing which document of the segment is deleted.
DELETE,
Delete,
}
impl SegmentComponent {
/// Iterates through the components.
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
SegmentComponent::POSTINGS,
SegmentComponent::POSITIONS,
SegmentComponent::POSITIONSSKIP,
SegmentComponent::FASTFIELDS,
SegmentComponent::FIELDNORMS,
SegmentComponent::TERMS,
SegmentComponent::STORE,
SegmentComponent::DELETE,
static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
SegmentComponent::Postings,
SegmentComponent::Positions,
SegmentComponent::FastFields,
SegmentComponent::FieldNorms,
SegmentComponent::Terms,
SegmentComponent::Store,
SegmentComponent::Delete,
];
SEGMENT_COMPONENTS.iter()
}

View File

@@ -46,7 +46,6 @@ pub struct SegmentReader {
termdict_composite: CompositeFile,
postings_composite: CompositeFile,
positions_composite: CompositeFile,
positions_idx_composite: CompositeFile,
fast_fields_readers: Arc<FastFieldReaders>,
fieldnorm_readers: FieldNormReaders,
@@ -151,44 +150,36 @@ impl SegmentReader {
/// Open a new segment for reading.
pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
let termdict_file = segment.open_read(SegmentComponent::TERMS)?;
let termdict_file = segment.open_read(SegmentComponent::Terms)?;
let termdict_composite = CompositeFile::open(&termdict_file)?;
let store_file = segment.open_read(SegmentComponent::STORE)?;
let store_file = segment.open_read(SegmentComponent::Store)?;
fail_point!("SegmentReader::open#middle");
let postings_file = segment.open_read(SegmentComponent::POSTINGS)?;
let postings_file = segment.open_read(SegmentComponent::Postings)?;
let postings_composite = CompositeFile::open(&postings_file)?;
let positions_composite = {
if let Ok(positions_file) = segment.open_read(SegmentComponent::POSITIONS) {
if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
CompositeFile::open(&positions_file)?
} else {
CompositeFile::empty()
}
};
let positions_idx_composite = {
if let Ok(positions_skip_file) = segment.open_read(SegmentComponent::POSITIONSSKIP) {
CompositeFile::open(&positions_skip_file)?
} else {
CompositeFile::empty()
}
};
let schema = segment.schema();
let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
let fast_field_readers =
Arc::new(FastFieldReaders::new(schema.clone(), fast_fields_composite));
let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let delete_bitset_opt = if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
let delete_data = segment.open_read(SegmentComponent::Delete)?;
let delete_bitset = DeleteBitSet::open(delete_data)?;
Some(delete_bitset)
} else {
@@ -207,7 +198,6 @@ impl SegmentReader {
store_file,
delete_bitset_opt,
positions_composite,
positions_idx_composite,
schema,
})
}
@@ -263,18 +253,15 @@ impl SegmentReader {
let positions_file = self
.positions_composite
.open_read(field)
.expect("Index corrupted. Failed to open field positions in composite file.");
let positions_idx_file = self
.positions_idx_composite
.open_read(field)
.expect("Index corrupted. Failed to open field positions in composite file.");
.ok_or_else(|| {
let error_msg = format!("Failed to open field {:?}'s positions in the composite file. Has the schema been modified?", field_entry.name());
DataCorruption::comment_only(error_msg)
})?;
let inv_idx_reader = Arc::new(InvertedIndexReader::new(
TermDictionary::open(termdict_file)?,
postings_file,
positions_file,
positions_idx_file,
record_option,
)?);
@@ -319,7 +306,6 @@ impl SegmentReader {
self.termdict_composite.space_usage(),
self.postings_composite.space_usage(),
self.positions_composite.space_usage(),
self.positions_idx_composite.space_usage(),
self.fast_fields_readers.space_usage(),
self.fieldnorm_readers.space_usage(),
self.get_store_reader()?.space_usage(),