diff --git a/Cargo.toml b/Cargo.toml index 7c64ecbc4..87107078f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,11 @@ readme = "README.md" keywords = ["search", "information", "retrieval"] edition = "2018" + +[[bin]] +name = "debug_position" +path = "src/debug_position.rs" + [dependencies] base64 = "0.13" byteorder = "1.4.3" diff --git a/src/debug_position.rs b/src/debug_position.rs new file mode 100644 index 000000000..ca4dff58b --- /dev/null +++ b/src/debug_position.rs @@ -0,0 +1,53 @@ +use std::panic; + +use tantivy; +use tantivy::DocSet; +use tantivy::Postings; +use tantivy::Searcher; +use tantivy::TERMINATED; +use tantivy::schema::Field; +use tantivy::schema::IndexRecordOption; + +fn test_field(searcher: &Searcher, field: Field) -> tantivy::Result<()> { + for segment_reader in searcher.segment_readers() { + println!("\n\n====\nsegment {:?}", segment_reader.segment_id()); + println!("maxdoc {} del {} ", segment_reader.max_doc(), segment_reader.num_deleted_docs()); + let inv_idx = segment_reader.inverted_index(field)?; + let termdict = inv_idx.terms(); + println!("num terms {}", termdict.num_terms()); + let mut terms = termdict.stream()?; + while terms.advance() { + let term_info = terms.value(); + let mut postings = inv_idx.read_postings_from_terminfo(term_info, tantivy::schema::IndexRecordOption::WithFreqsAndPositions)?; + let mut seen_doc = 0; + while postings.doc() != TERMINATED { + let mut postings_clone= postings.clone(); + // println!("termord {} seen_doc {} termpositions {:?} docfreq {}", terms.term_ord(), seen_doc, term_info.positions_range, term_info.doc_freq); + let mut positions = Vec::new(); + postings_clone.positions(&mut positions); + seen_doc += 1; + postings.advance(); + } + } + } + Ok(()) +} + +fn main() -> tantivy::Result<()> { + let index = tantivy::Index::open_in_dir(".")?; + let reader = index.reader()?; + let searcher = reader.searcher(); + let schema = index.schema(); + for (field, field_entry) in schema.fields() { + let field_type = field_entry.field_type(); + let has_position = field_type.get_index_record_option() + .map(|opt| opt == IndexRecordOption::WithFreqsAndPositions) + .unwrap_or(false); + if !has_position { + continue; + } + test_field(&*searcher, field)?; + } + + Ok(()) +} diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs index bcd85a2f7..8c9f3d5bf 100644 --- a/src/directory/file_slice.rs +++ b/src/directory/file_slice.rs @@ -4,11 +4,12 @@ use crate::common::HasLen; use crate::directory::OwnedBytes; use std::fmt; use std::ops::Range; +use std::panic::{RefUnwindSafe, UnwindSafe}; use std::sync::{Arc, Weak}; use std::{io, ops::Deref}; -pub type ArcBytes = Arc + Send + Sync + 'static>; -pub type WeakArcBytes = Weak + Send + Sync + 'static>; +pub type ArcBytes = Arc + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>; +pub type WeakArcBytes = Weak + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>; /// Objects that represents files sections in tantivy. /// @@ -40,7 +41,7 @@ impl> HasLen for T { impl From for FileSlice where - B: StableDeref + Deref + 'static + Send + Sync, + B: StableDeref + Deref + 'static + Send + Sync + UnwindSafe + RefUnwindSafe, { fn from(bytes: B) -> FileSlice { FileSlice::new(Box::new(OwnedBytes::new(bytes))) diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index be0603271..29af3421e 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -20,6 +20,7 @@ use std::fs::OpenOptions; use std::fs::{self, File}; use std::io::{self, Seek, SeekFrom}; use std::io::{BufWriter, Read, Write}; +use std::panic::{RefUnwindSafe, UnwindSafe}; use std::path::{Path, PathBuf}; use std::result; use std::sync::Arc; @@ -314,7 +315,7 @@ impl TerminatingWrite for SafeFileWriter { } #[derive(Clone)] -struct MmapArc(Arc + Send + Sync>); +struct MmapArc(Arc + Send + Sync + RefUnwindSafe + UnwindSafe>); impl Deref for MmapArc { type Target = [u8];