mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-02 08:30:41 +00:00
Merge branch 'main' into indexmeta
This commit is contained in:
@@ -10,7 +10,7 @@ use crate::directory::ManagedDirectory;
|
||||
#[cfg(feature = "mmap")]
|
||||
use crate::directory::MmapDirectory;
|
||||
use crate::directory::INDEX_WRITER_LOCK;
|
||||
use crate::directory::{Directory, RAMDirectory};
|
||||
use crate::directory::{Directory, RamDirectory};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::error::TantivyError;
|
||||
use crate::indexer::index_writer::HEAP_SIZE_MIN;
|
||||
@@ -222,7 +222,7 @@ impl Index {
|
||||
self.set_multithread_executor(default_num_threads)
|
||||
}
|
||||
|
||||
/// Creates a new index using the `RAMDirectory`.
|
||||
/// Creates a new index using the `RamDirectory`.
|
||||
///
|
||||
/// The index will be allocated in anonymous memory.
|
||||
/// This should only be used for unit tests.
|
||||
@@ -256,7 +256,7 @@ impl Index {
|
||||
/// is destroyed.
|
||||
///
|
||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
||||
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
||||
IndexBuilder::new().schema(schema).create_from_tempdir()
|
||||
@@ -390,7 +390,7 @@ impl Index {
|
||||
/// Each thread will receive a budget of `overall_heap_size_in_bytes / num_threads`.
|
||||
///
|
||||
/// # Errors
|
||||
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IOError`.
|
||||
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.
|
||||
///
|
||||
/// # Panics
|
||||
/// If the heap size per thread is too small, panics.
|
||||
@@ -524,7 +524,7 @@ impl fmt::Debug for Index {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::directory::{RAMDirectory, WatchCallback};
|
||||
use crate::directory::{RamDirectory, WatchCallback};
|
||||
use crate::schema::Field;
|
||||
use crate::schema::{Schema, INDEXED, TEXT};
|
||||
use crate::IndexReader;
|
||||
@@ -548,7 +548,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_index_exists() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
assert!(!Index::exists(&directory).unwrap());
|
||||
assert!(Index::create(
|
||||
directory.clone(),
|
||||
@@ -561,7 +561,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn open_or_create_should_create() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
assert!(!Index::exists(&directory).unwrap());
|
||||
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
|
||||
assert!(Index::exists(&directory).unwrap());
|
||||
@@ -569,7 +569,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn open_or_create_should_open() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
assert!(Index::create(
|
||||
directory.clone(),
|
||||
throw_away_schema(),
|
||||
@@ -582,7 +582,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn create_should_wipeoff_existing() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
assert!(Index::create(
|
||||
directory.clone(),
|
||||
throw_away_schema(),
|
||||
@@ -600,7 +600,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn open_or_create_exists_but_schema_does_not_match() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
assert!(Index::create(
|
||||
directory.clone(),
|
||||
throw_away_schema(),
|
||||
@@ -738,7 +738,7 @@ mod tests {
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[test]
|
||||
fn garbage_collect_works_as_intended() {
|
||||
let directory = RAMDirectory::create();
|
||||
let directory = RamDirectory::create();
|
||||
let schema = throw_away_schema();
|
||||
let field = schema.get_field("num_likes").unwrap();
|
||||
let index = Index::create(directory.clone(), schema, IndexSettings::default()).unwrap();
|
||||
|
||||
@@ -108,14 +108,13 @@ impl SegmentMeta {
|
||||
pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
|
||||
let mut path = self.id().uuid_string();
|
||||
path.push_str(&*match component {
|
||||
SegmentComponent::POSTINGS => ".idx".to_string(),
|
||||
SegmentComponent::POSITIONS => ".pos".to_string(),
|
||||
SegmentComponent::POSITIONSSKIP => ".posidx".to_string(),
|
||||
SegmentComponent::TERMS => ".term".to_string(),
|
||||
SegmentComponent::STORE => ".store".to_string(),
|
||||
SegmentComponent::FASTFIELDS => ".fast".to_string(),
|
||||
SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
|
||||
SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
|
||||
SegmentComponent::Postings => ".idx".to_string(),
|
||||
SegmentComponent::Positions => ".pos".to_string(),
|
||||
SegmentComponent::Terms => ".term".to_string(),
|
||||
SegmentComponent::Store => ".store".to_string(),
|
||||
SegmentComponent::FastFields => ".fast".to_string(),
|
||||
SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
|
||||
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
|
||||
});
|
||||
PathBuf::from(path)
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ pub struct InvertedIndexReader {
|
||||
termdict: TermDictionary,
|
||||
postings_file_slice: FileSlice,
|
||||
positions_file_slice: FileSlice,
|
||||
positions_idx_file_slice: FileSlice,
|
||||
record_option: IndexRecordOption,
|
||||
total_num_tokens: u64,
|
||||
}
|
||||
@@ -37,7 +36,6 @@ impl InvertedIndexReader {
|
||||
termdict: TermDictionary,
|
||||
postings_file_slice: FileSlice,
|
||||
positions_file_slice: FileSlice,
|
||||
positions_idx_file_slice: FileSlice,
|
||||
record_option: IndexRecordOption,
|
||||
) -> io::Result<InvertedIndexReader> {
|
||||
let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
|
||||
@@ -46,7 +44,6 @@ impl InvertedIndexReader {
|
||||
termdict,
|
||||
postings_file_slice: postings_body,
|
||||
positions_file_slice,
|
||||
positions_idx_file_slice,
|
||||
record_option,
|
||||
total_num_tokens,
|
||||
})
|
||||
@@ -59,7 +56,6 @@ impl InvertedIndexReader {
|
||||
termdict: TermDictionary::empty(),
|
||||
postings_file_slice: FileSlice::empty(),
|
||||
positions_file_slice: FileSlice::empty(),
|
||||
positions_idx_file_slice: FileSlice::empty(),
|
||||
record_option,
|
||||
total_num_tokens: 0u64,
|
||||
}
|
||||
@@ -141,12 +137,12 @@ impl InvertedIndexReader {
|
||||
option: IndexRecordOption,
|
||||
) -> io::Result<SegmentPostings> {
|
||||
let block_postings = self.read_block_postings_from_terminfo(term_info, option)?;
|
||||
let position_stream = {
|
||||
let position_reader = {
|
||||
if option.has_positions() {
|
||||
let position_reader = self.positions_file_slice.clone();
|
||||
let skip_reader = self.positions_idx_file_slice.clone();
|
||||
let position_reader =
|
||||
PositionReader::new(position_reader, skip_reader, term_info.positions_idx)?;
|
||||
let positions_data = self
|
||||
.positions_file_slice
|
||||
.read_bytes_slice(term_info.positions_range.clone())?;
|
||||
let position_reader = PositionReader::open(positions_data)?;
|
||||
Some(position_reader)
|
||||
} else {
|
||||
None
|
||||
@@ -154,7 +150,7 @@ impl InvertedIndexReader {
|
||||
};
|
||||
Ok(SegmentPostings::from_block_postings(
|
||||
block_postings,
|
||||
position_stream,
|
||||
position_reader,
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -7,39 +7,36 @@ use std::slice;
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum SegmentComponent {
|
||||
/// Postings (or inverted list). Sorted lists of document ids, associated to terms
|
||||
POSTINGS,
|
||||
Postings,
|
||||
/// Positions of terms in each document.
|
||||
POSITIONS,
|
||||
/// Index to seek within the position file
|
||||
POSITIONSSKIP,
|
||||
Positions,
|
||||
/// Column-oriented random-access storage of fields.
|
||||
FASTFIELDS,
|
||||
FastFields,
|
||||
/// Stores the sum of the length (in terms) of each field for each document.
|
||||
/// Field norms are stored as a special u64 fast field.
|
||||
FIELDNORMS,
|
||||
FieldNorms,
|
||||
/// Dictionary associating `Term`s to `TermInfo`s which is
|
||||
/// simply an address into the `postings` file and the `positions` file.
|
||||
TERMS,
|
||||
Terms,
|
||||
/// Row-oriented, compressed storage of the documents.
|
||||
/// Accessing a document from the store is relatively slow, as it
|
||||
/// requires to decompress the entire block it belongs to.
|
||||
STORE,
|
||||
Store,
|
||||
/// Bitset describing which document of the segment is deleted.
|
||||
DELETE,
|
||||
Delete,
|
||||
}
|
||||
|
||||
impl SegmentComponent {
|
||||
/// Iterates through the components.
|
||||
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
|
||||
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
|
||||
SegmentComponent::POSTINGS,
|
||||
SegmentComponent::POSITIONS,
|
||||
SegmentComponent::POSITIONSSKIP,
|
||||
SegmentComponent::FASTFIELDS,
|
||||
SegmentComponent::FIELDNORMS,
|
||||
SegmentComponent::TERMS,
|
||||
SegmentComponent::STORE,
|
||||
SegmentComponent::DELETE,
|
||||
static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
|
||||
SegmentComponent::Postings,
|
||||
SegmentComponent::Positions,
|
||||
SegmentComponent::FastFields,
|
||||
SegmentComponent::FieldNorms,
|
||||
SegmentComponent::Terms,
|
||||
SegmentComponent::Store,
|
||||
SegmentComponent::Delete,
|
||||
];
|
||||
SEGMENT_COMPONENTS.iter()
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ pub struct SegmentReader {
|
||||
termdict_composite: CompositeFile,
|
||||
postings_composite: CompositeFile,
|
||||
positions_composite: CompositeFile,
|
||||
positions_idx_composite: CompositeFile,
|
||||
fast_fields_readers: Arc<FastFieldReaders>,
|
||||
fieldnorm_readers: FieldNormReaders,
|
||||
|
||||
@@ -151,44 +150,36 @@ impl SegmentReader {
|
||||
|
||||
/// Open a new segment for reading.
|
||||
pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
|
||||
let termdict_file = segment.open_read(SegmentComponent::TERMS)?;
|
||||
let termdict_file = segment.open_read(SegmentComponent::Terms)?;
|
||||
let termdict_composite = CompositeFile::open(&termdict_file)?;
|
||||
|
||||
let store_file = segment.open_read(SegmentComponent::STORE)?;
|
||||
let store_file = segment.open_read(SegmentComponent::Store)?;
|
||||
|
||||
fail_point!("SegmentReader::open#middle");
|
||||
|
||||
let postings_file = segment.open_read(SegmentComponent::POSTINGS)?;
|
||||
let postings_file = segment.open_read(SegmentComponent::Postings)?;
|
||||
let postings_composite = CompositeFile::open(&postings_file)?;
|
||||
|
||||
let positions_composite = {
|
||||
if let Ok(positions_file) = segment.open_read(SegmentComponent::POSITIONS) {
|
||||
if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
|
||||
CompositeFile::open(&positions_file)?
|
||||
} else {
|
||||
CompositeFile::empty()
|
||||
}
|
||||
};
|
||||
|
||||
let positions_idx_composite = {
|
||||
if let Ok(positions_skip_file) = segment.open_read(SegmentComponent::POSITIONSSKIP) {
|
||||
CompositeFile::open(&positions_skip_file)?
|
||||
} else {
|
||||
CompositeFile::empty()
|
||||
}
|
||||
};
|
||||
|
||||
let schema = segment.schema();
|
||||
|
||||
let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
|
||||
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
|
||||
let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
|
||||
let fast_field_readers =
|
||||
Arc::new(FastFieldReaders::new(schema.clone(), fast_fields_composite));
|
||||
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||
|
||||
let delete_bitset_opt = if segment.meta().has_deletes() {
|
||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||
let delete_data = segment.open_read(SegmentComponent::Delete)?;
|
||||
let delete_bitset = DeleteBitSet::open(delete_data)?;
|
||||
Some(delete_bitset)
|
||||
} else {
|
||||
@@ -207,7 +198,6 @@ impl SegmentReader {
|
||||
store_file,
|
||||
delete_bitset_opt,
|
||||
positions_composite,
|
||||
positions_idx_composite,
|
||||
schema,
|
||||
})
|
||||
}
|
||||
@@ -263,18 +253,15 @@ impl SegmentReader {
|
||||
let positions_file = self
|
||||
.positions_composite
|
||||
.open_read(field)
|
||||
.expect("Index corrupted. Failed to open field positions in composite file.");
|
||||
|
||||
let positions_idx_file = self
|
||||
.positions_idx_composite
|
||||
.open_read(field)
|
||||
.expect("Index corrupted. Failed to open field positions in composite file.");
|
||||
.ok_or_else(|| {
|
||||
let error_msg = format!("Failed to open field {:?}'s positions in the composite file. Has the schema been modified?", field_entry.name());
|
||||
DataCorruption::comment_only(error_msg)
|
||||
})?;
|
||||
|
||||
let inv_idx_reader = Arc::new(InvertedIndexReader::new(
|
||||
TermDictionary::open(termdict_file)?,
|
||||
postings_file,
|
||||
positions_file,
|
||||
positions_idx_file,
|
||||
record_option,
|
||||
)?);
|
||||
|
||||
@@ -319,7 +306,6 @@ impl SegmentReader {
|
||||
self.termdict_composite.space_usage(),
|
||||
self.postings_composite.space_usage(),
|
||||
self.positions_composite.space_usage(),
|
||||
self.positions_idx_composite.space_usage(),
|
||||
self.fast_fields_readers.space_usage(),
|
||||
self.fieldnorm_readers.space_usage(),
|
||||
self.get_store_reader()?.space_usage(),
|
||||
|
||||
Reference in New Issue
Block a user