diff --git a/src/functional_test.rs b/src/functional_test.rs index 7e188738b..8c93bbb91 100644 --- a/src/functional_test.rs +++ b/src/functional_test.rs @@ -1,31 +1,76 @@ -use rand::thread_rng; use std::collections::HashSet; - -use crate::schema::*; +use rand::thread_rng; use crate::Index; use crate::Searcher; +use crate::{doc, schema::*}; use rand::Rng; -fn check_index_content(searcher: &Searcher, vals: &HashSet) { +fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> { assert!(searcher.segment_readers().len() < 20); assert_eq!(searcher.num_docs() as usize, vals.len()); + for segment_reader in searcher.segment_readers() { + let store_reader = segment_reader.get_store_reader()?; + for doc_id in 0..segment_reader.max_doc() { + let _doc = store_reader.get(doc_id)?; + } + } + Ok(()) } #[test] #[ignore] -fn test_indexing() { +fn test_functional_store() -> crate::Result<()> { + let mut schema_builder = Schema::builder(); + + let id_field = schema_builder.add_u64_field("id", INDEXED | STORED); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + let reader = index.reader()?; + + let mut rng = thread_rng(); + + let mut index_writer = index.writer_with_num_threads(3, 12_000_000)?; + + let mut doc_set: Vec = Vec::new(); + + let mut doc_id = 0u64; + for iteration in 0..500 { + dbg!(iteration); + let num_docs: usize = rng.gen_range(0..4); + if doc_set.len() >= 1 { + let doc_to_remove_id = rng.gen_range(0..doc_set.len()); + let removed_doc_id = doc_set.swap_remove(doc_to_remove_id); + index_writer.delete_term(Term::from_field_u64(id_field, removed_doc_id)); + } + for _ in 0..num_docs { + doc_set.push(doc_id); + index_writer.add_document(doc!(id_field=>doc_id)); + doc_id += 1; + } + index_writer.commit()?; + reader.reload()?; + let searcher = reader.searcher(); + check_index_content(&searcher, &doc_set)?; + } + Ok(()) +} + +#[test] +#[ignore] +fn test_functional_indexing() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let id_field = schema_builder.add_u64_field("id", INDEXED); let multiples_field = schema_builder.add_u64_field("multiples", INDEXED); let schema = schema_builder.build(); - let index = Index::create_from_tempdir(schema).unwrap(); - let reader = index.reader().unwrap(); + let index = Index::create_from_tempdir(schema)?; + let reader = index.reader()?; let mut rng = thread_rng(); - let mut index_writer = index.writer_with_num_threads(3, 120_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(3, 120_000_000)?; let mut committed_docs: HashSet = HashSet::new(); let mut uncommitted_docs: HashSet = HashSet::new(); @@ -33,13 +78,13 @@ fn test_indexing() { for _ in 0..200 { let random_val = rng.gen_range(0..20); if random_val == 0 { - index_writer.commit().expect("Commit failed"); + index_writer.commit()?; committed_docs.extend(&uncommitted_docs); uncommitted_docs.clear(); - reader.reload().unwrap(); + reader.reload()?; let searcher = reader.searcher(); // check that everything is correct. - check_index_content(&searcher, &committed_docs); + check_index_content(&searcher, &committed_docs.iter().cloned().collect::>())?; } else { if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) { let doc_id_term = Term::from_field_u64(id_field, random_val); @@ -55,4 +100,5 @@ fn test_indexing() { } } } + Ok(()) } diff --git a/src/store/index/mod.rs b/src/store/index/mod.rs index f0779f60a..f0fe8fe2e 100644 --- a/src/store/index/mod.rs +++ b/src/store/index/mod.rs @@ -54,7 +54,7 @@ mod tests { let mut output: Vec = Vec::new(); let skip_index_builder: SkipIndexBuilder = SkipIndexBuilder::new(); skip_index_builder.write(&mut output)?; - let skip_index: SkipIndex = SkipIndex::from(OwnedBytes::new(output)); + let skip_index: SkipIndex = SkipIndex::open(OwnedBytes::new(output)); let mut skip_cursor = skip_index.checkpoints(); assert!(skip_cursor.next().is_none()); Ok(()) @@ -72,7 +72,7 @@ mod tests { }; skip_index_builder.insert(checkpoint); skip_index_builder.write(&mut output)?; - let skip_index: SkipIndex = SkipIndex::from(OwnedBytes::new(output)); + let skip_index: SkipIndex = SkipIndex::open(OwnedBytes::new(output)); let mut skip_cursor = skip_index.checkpoints(); assert_eq!(skip_cursor.next(), Some(checkpoint)); assert_eq!(skip_cursor.next(), None); @@ -121,7 +121,7 @@ mod tests { } skip_index_builder.write(&mut output)?; - let skip_index: SkipIndex = SkipIndex::from(OwnedBytes::new(output)); + let skip_index: SkipIndex = SkipIndex::open(OwnedBytes::new(output)); assert_eq!( &skip_index.checkpoints().collect::>()[..], &checkpoints[..] @@ -150,7 +150,7 @@ mod tests { } skip_index_builder.write(&mut output)?; assert_eq!(output.len(), 4035); - let resulting_checkpoints: Vec = SkipIndex::from(OwnedBytes::new(output)) + let resulting_checkpoints: Vec = SkipIndex::open(OwnedBytes::new(output)) .checkpoints() .collect(); assert_eq!(&resulting_checkpoints, &checkpoints); @@ -221,7 +221,7 @@ mod tests { } let mut buffer = Vec::new(); skip_index_builder.write(&mut buffer).unwrap(); - let skip_index = SkipIndex::from(OwnedBytes::new(buffer)); + let skip_index = SkipIndex::open(OwnedBytes::new(buffer)); let iter_checkpoints: Vec = skip_index.checkpoints().collect(); assert_eq!(&checkpoints[..], &iter_checkpoints[..]); test_skip_index_aux(skip_index, &checkpoints[..]); diff --git a/src/store/index/skip_index.rs b/src/store/index/skip_index.rs index 43816c2b0..c78acc96e 100644 --- a/src/store/index/skip_index.rs +++ b/src/store/index/skip_index.rs @@ -59,6 +59,25 @@ pub struct SkipIndex { } impl SkipIndex { + + pub fn open(mut data: OwnedBytes) -> SkipIndex { + let offsets: Vec = Vec::::deserialize(&mut data) + .unwrap() + .into_iter() + .map(|el| el.0) + .collect(); + let mut start_offset = 0; + let mut layers = Vec::new(); + for end_offset in offsets { + let layer = Layer { + data: data.slice(start_offset as usize, end_offset as usize), + }; + layers.push(layer); + start_offset = end_offset; + } + SkipIndex { layers } + } + pub(crate) fn checkpoints<'a>(&'a self) -> impl Iterator + 'a { self.layers .last() @@ -89,23 +108,6 @@ impl SkipIndex { } Some(cur_checkpoint) } -} -impl From for SkipIndex { - fn from(mut data: OwnedBytes) -> SkipIndex { - let offsets: Vec = Vec::::deserialize(&mut data) - .unwrap() - .into_iter() - .map(|el| el.0) - .collect(); - let mut start_offset = 0; - let mut layers = Vec::new(); - for end_offset in offsets { - layers.push(Layer { - data: data.slice(start_offset as usize, end_offset as usize), - }); - start_offset = end_offset; - } - SkipIndex { layers } - } + } diff --git a/src/store/reader.rs b/src/store/reader.rs index fa62a3257..4a47b716c 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -35,7 +35,7 @@ impl StoreReader { let (data_file, offset_index_file) = split_file(store_file)?; let index_data = offset_index_file.read_bytes()?; let space_usage = StoreSpaceUsage::new(data_file.len(), offset_index_file.len()); - let skip_index = SkipIndex::from(index_data); + let skip_index = SkipIndex::open(index_data); Ok(StoreReader { data: data_file, cache: Arc::new(Mutex::new(LruCache::new(LRU_CACHE_CAPACITY))),