diff --git a/Cargo.toml b/Cargo.toml index 77fede634..c1526a286 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,10 +53,11 @@ lru = "0.7.5" fastdivide = "0.4.0" itertools = "0.10.3" measure_time = "0.8.2" -ciborium = { version = "0.2", optional = true} async-trait = "0.1.53" arc-swap = "1.5.0" +sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optional = true } +stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" } tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" } tantivy-bitpacker = { version= "0.3", path="./bitpacker" } common = { version= "0.4", path = "./common/", package = "tantivy-common" } @@ -104,10 +105,10 @@ zstd-compression = ["zstd"] failpoints = ["fail/failpoints"] unstable = [] # useful for benches. -quickwit = ["ciborium"] +quickwit = ["sstable"] [workspace] -members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes"] +members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes", "stacker", "sstable"] # Following the "fail" crate best practises, we isolate # tests that define specific behavior in fail check points diff --git a/src/postings/indexing_context.rs b/src/postings/indexing_context.rs index 9fc8a922f..074361d86 100644 --- a/src/postings/indexing_context.rs +++ b/src/postings/indexing_context.rs @@ -1,11 +1,11 @@ -use crate::postings::stacker::{MemoryArena, TermHashMap}; +use stacker::{ArenaHashMap, MemoryArena}; /// IndexingContext contains all of the transient memory arenas /// required for building the inverted index. pub(crate) struct IndexingContext { /// The term index is an adhoc hashmap, /// itself backed by a dedicated memory arena. - pub term_index: TermHashMap, + pub term_index: ArenaHashMap, /// Arena is a memory arena that stores posting lists / term frequencies / positions. pub arena: MemoryArena, } @@ -13,9 +13,9 @@ pub(crate) struct IndexingContext { impl IndexingContext { /// Create a new IndexingContext given the size of the term hash map. pub(crate) fn new(table_size: usize) -> IndexingContext { - let term_index = TermHashMap::new(table_size); + let term_index = ArenaHashMap::new(table_size); IndexingContext { - arena: MemoryArena::new(), + arena: MemoryArena::default(), term_index, } } diff --git a/src/postings/json_postings_writer.rs b/src/postings/json_postings_writer.rs index 34a5092e9..2e29aa0a0 100644 --- a/src/postings/json_postings_writer.rs +++ b/src/postings/json_postings_writer.rs @@ -1,10 +1,11 @@ use std::io; +use stacker::Addr; + use crate::fastfield::MultiValuedFastFieldWriter; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::postings_writer::SpecializedPostingsWriter; use crate::postings::recorder::{BufferLender, DocIdRecorder, Recorder}; -use crate::postings::stacker::Addr; use crate::postings::{ FieldSerializer, IndexingContext, IndexingPosition, PostingsWriter, UnorderedTermId, }; diff --git a/src/postings/mod.rs b/src/postings/mod.rs index fe42cbff6..df10e4e40 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -15,9 +15,10 @@ mod recorder; mod segment_postings; mod serializer; mod skip; -mod stacker; mod term_info; +pub(crate) use stacker::compute_table_size; + pub use self::block_segment_postings::BlockSegmentPostings; pub(crate) use self::indexing_context::IndexingContext; pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter; @@ -26,10 +27,9 @@ pub(crate) use self::postings_writer::{serialize_postings, IndexingPosition, Pos pub use self::segment_postings::SegmentPostings; pub use self::serializer::{FieldSerializer, InvertedIndexSerializer}; pub(crate) use self::skip::{BlockInfo, SkipReader}; -pub(crate) use self::stacker::compute_table_size; pub use self::term_info::TermInfo; -pub(crate) type UnorderedTermId = u64; +pub(crate) type UnorderedTermId = stacker::UnorderedId; #[allow(clippy::enum_variant_names)] #[derive(Debug, PartialEq, Clone, Copy, Eq)] diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 656a05bc3..19c127475 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -4,8 +4,8 @@ use std::marker::PhantomData; use std::ops::Range; use rustc_hash::FxHashMap; +use stacker::Addr; -use super::stacker::Addr; use crate::fastfield::MultiValuedFastFieldWriter; use crate::fieldnorm::FieldNormReaders; use crate::indexer::doc_id_mapping::DocIdMapping; @@ -59,7 +59,11 @@ pub(crate) fn serialize_postings( ) -> crate::Result>> { let mut term_offsets: Vec<(Term<&[u8]>, Addr, UnorderedTermId)> = Vec::with_capacity(ctx.term_index.len()); - term_offsets.extend(ctx.term_index.iter()); + term_offsets.extend( + ctx.term_index + .iter() + .map(|(bytes, addr, unordered_id)| (Term::wrap(bytes), addr, unordered_id)), + ); term_offsets.sort_unstable_by_key(|(k, _, _)| k.clone()); let mut unordered_term_mappings: HashMap> = HashMap::new(); diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index e66993e09..35788787e 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -1,6 +1,6 @@ use common::read_u32_vint; +use stacker::{ExpUnrolledLinkedList, MemoryArena}; -use super::stacker::{ExpUnrolledLinkedList, MemoryArena}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::FieldSerializer; use crate::DocId; @@ -91,7 +91,7 @@ pub struct DocIdRecorder { impl Default for DocIdRecorder { fn default() -> Self { DocIdRecorder { - stack: ExpUnrolledLinkedList::new(), + stack: ExpUnrolledLinkedList::default(), current_doc: u32::MAX, } } @@ -144,7 +144,7 @@ impl Recorder for DocIdRecorder { } /// Recorder encoding document ids, and term frequencies -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub struct TermFrequencyRecorder { stack: ExpUnrolledLinkedList, current_doc: DocId, @@ -152,17 +152,6 @@ pub struct TermFrequencyRecorder { term_doc_freq: u32, } -impl Default for TermFrequencyRecorder { - fn default() -> Self { - TermFrequencyRecorder { - stack: ExpUnrolledLinkedList::new(), - current_doc: 0, - current_tf: 0u32, - term_doc_freq: 0u32, - } - } -} - impl Recorder for TermFrequencyRecorder { fn current_doc(&self) -> DocId { self.current_doc @@ -229,7 +218,7 @@ pub struct TfAndPositionRecorder { impl Default for TfAndPositionRecorder { fn default() -> Self { TfAndPositionRecorder { - stack: ExpUnrolledLinkedList::new(), + stack: ExpUnrolledLinkedList::default(), current_doc: u32::MAX, term_doc_freq: 0u32, } diff --git a/src/postings/stacker/mod.rs b/src/postings/stacker/mod.rs deleted file mode 100644 index 19fff9216..000000000 --- a/src/postings/stacker/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod expull; -mod memory_arena; -mod term_hashmap; - -pub(crate) use self::expull::ExpUnrolledLinkedList; -pub(crate) use self::memory_arena::{Addr, MemoryArena}; -pub(crate) use self::term_hashmap::{compute_table_size, TermHashMap}; diff --git a/src/termdict/sstable_termdict/mod.rs b/src/termdict/sstable_termdict/mod.rs index 5b1174686..729fd73da 100644 --- a/src/termdict/sstable_termdict/mod.rs +++ b/src/termdict/sstable_termdict/mod.rs @@ -1,17 +1,16 @@ use std::io; mod merger; -mod sstable; mod streamer; mod termdict; use std::iter::ExactSizeIterator; use common::VInt; +use sstable::value::{ValueReader, ValueWriter}; +use sstable::{BlockReader, SSTable}; pub use self::merger::TermMerger; -use self::sstable::value::{ValueReader, ValueWriter}; -use self::sstable::{BlockReader, SSTable}; pub use self::streamer::{TermStreamer, TermStreamerBuilder}; pub use self::termdict::{TermDictionary, TermDictionaryBuilder}; use crate::postings::TermInfo; diff --git a/src/termdict/sstable_termdict/streamer.rs b/src/termdict/sstable_termdict/streamer.rs index ef388dc17..8289862ec 100644 --- a/src/termdict/sstable_termdict/streamer.rs +++ b/src/termdict/sstable_termdict/streamer.rs @@ -87,7 +87,7 @@ where { automaton: A, states: Vec, - delta_reader: super::sstable::DeltaReader<'a, TermInfoReader>, + delta_reader: sstable::DeltaReader<'a, TermInfoReader>, key: Vec, term_ord: Option, lower_bound: Bound>, diff --git a/src/termdict/sstable_termdict/termdict.rs b/src/termdict/sstable_termdict/termdict.rs index 7c4f8ae5f..c618d7ecf 100644 --- a/src/termdict/sstable_termdict/termdict.rs +++ b/src/termdict/sstable_termdict/termdict.rs @@ -3,15 +3,12 @@ use std::sync::Arc; use common::BinarySerializable; use once_cell::sync::Lazy; +use sstable::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, Writer}; use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::Automaton; use crate::directory::{FileSlice, OwnedBytes}; use crate::postings::TermInfo; -use crate::termdict::sstable_termdict::sstable::sstable_index::BlockAddr; -use crate::termdict::sstable_termdict::sstable::{ - DeltaReader, Reader, SSTable, SSTableIndex, Writer, -}; use crate::termdict::sstable_termdict::{ TermInfoReader, TermInfoWriter, TermSSTable, TermStreamer, TermStreamerBuilder, }; @@ -132,7 +129,8 @@ impl TermDictionary { let num_terms = u64::deserialize(&mut footer_len_bytes)?; let (sstable_slice, index_slice) = main_slice.split(index_offset as usize); let sstable_index_bytes = index_slice.read_bytes()?; - let sstable_index = SSTableIndex::load(sstable_index_bytes.as_slice())?; + let sstable_index = SSTableIndex::load(sstable_index_bytes.as_slice()) + .map_err(|_| crate::error::DataCorruption::comment_only("SSTable corruption"))?; Ok(TermDictionary { sstable_slice, sstable_index, diff --git a/sstable/Cargo.toml b/sstable/Cargo.toml new file mode 100644 index 000000000..1f50efa7d --- /dev/null +++ b/sstable/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tantivy-sstable" +version = "0.1.0" +edition = "2021" + +[dependencies] +common = {path="../common", package="tantivy-common"} +ciborium = "0.2" +byteorder = "1" +serde = "1" + +[dev-dependencies] +proptest = "1" diff --git a/src/termdict/sstable_termdict/sstable/block_reader.rs b/sstable/src/block_reader.rs similarity index 100% rename from src/termdict/sstable_termdict/sstable/block_reader.rs rename to sstable/src/block_reader.rs diff --git a/src/termdict/sstable_termdict/sstable/delta.rs b/sstable/src/delta.rs similarity index 100% rename from src/termdict/sstable_termdict/sstable/delta.rs rename to sstable/src/delta.rs diff --git a/src/termdict/sstable_termdict/sstable/mod.rs b/sstable/src/lib.rs similarity index 97% rename from src/termdict/sstable_termdict/sstable/mod.rs rename to sstable/src/lib.rs index 0afef761c..59288f165 100644 --- a/src/termdict/sstable_termdict/sstable/mod.rs +++ b/sstable/src/lib.rs @@ -7,21 +7,19 @@ mod delta; pub mod merge; pub mod value; -pub(crate) mod sstable_index; - -pub(crate) use self::sstable_index::{SSTableIndex, SSTableIndexBuilder}; +mod sstable_index; +pub use sstable_index::{BlockAddr, SSTableIndex, SSTableIndexBuilder}; pub(crate) mod vint; mod block_reader; pub use self::block_reader::BlockReader; -pub use self::delta::DeltaReader; -use self::delta::DeltaWriter; +pub use self::delta::{DeltaReader, DeltaWriter}; pub use self::merge::VoidMerge; use self::value::{U64MonotonicReader, U64MonotonicWriter, ValueReader, ValueWriter}; const DEFAULT_KEY_CAPACITY: usize = 50; -pub(crate) fn common_prefix_len(left: &[u8], right: &[u8]) -> usize { +fn common_prefix_len(left: &[u8], right: &[u8]) -> usize { left.iter() .cloned() .zip(right.iter().cloned()) @@ -29,6 +27,9 @@ pub(crate) fn common_prefix_len(left: &[u8], right: &[u8]) -> usize { .count() } +#[derive(Debug, Copy, Clone)] +pub struct SSTableDataCorruption; + pub trait SSTable: Sized { type Value; type Reader: ValueReader; diff --git a/src/termdict/sstable_termdict/sstable/merge/heap_merge.rs b/sstable/src/merge/heap_merge.rs similarity index 96% rename from src/termdict/sstable_termdict/sstable/merge/heap_merge.rs rename to sstable/src/merge/heap_merge.rs index 50af21962..9e852918d 100644 --- a/src/termdict/sstable_termdict/sstable/merge/heap_merge.rs +++ b/sstable/src/merge/heap_merge.rs @@ -4,7 +4,7 @@ use std::collections::BinaryHeap; use std::io; use super::{SingleValueMerger, ValueMerger}; -use crate::termdict::sstable_termdict::sstable::{Reader, SSTable, Writer}; +use crate::{Reader, SSTable, Writer}; struct HeapItem>(B); diff --git a/src/termdict/sstable_termdict/sstable/merge/mod.rs b/sstable/src/merge/mod.rs similarity index 100% rename from src/termdict/sstable_termdict/sstable/merge/mod.rs rename to sstable/src/merge/mod.rs diff --git a/src/termdict/sstable_termdict/sstable/sstable_index.rs b/sstable/src/sstable_index.rs similarity index 92% rename from src/termdict/sstable_termdict/sstable/sstable_index.rs rename to sstable/src/sstable_index.rs index 14f795178..c7e132e89 100644 --- a/src/termdict/sstable_termdict/sstable/sstable_index.rs +++ b/sstable/src/sstable_index.rs @@ -3,8 +3,7 @@ use std::ops::Range; use serde::{Deserialize, Serialize}; -use crate::error::DataCorruption; -use crate::termdict::sstable_termdict::sstable::common_prefix_len; +use crate::{common_prefix_len, SSTableDataCorruption}; #[derive(Default, Debug, Serialize, Deserialize)] pub struct SSTableIndex { @@ -12,9 +11,8 @@ pub struct SSTableIndex { } impl SSTableIndex { - pub(crate) fn load(data: &[u8]) -> Result { - ciborium::de::from_reader(data) - .map_err(|_| DataCorruption::comment_only("SSTable index is corrupted")) + pub fn load(data: &[u8]) -> Result { + ciborium::de::from_reader(data).map_err(|_| SSTableDataCorruption) } pub fn search(&self, key: &[u8]) -> Option { @@ -94,6 +92,7 @@ impl SSTableIndexBuilder { #[cfg(test)] mod tests { use super::{BlockAddr, SSTableIndex, SSTableIndexBuilder}; + use crate::SSTableDataCorruption; #[test] fn test_sstable_index() { @@ -125,10 +124,7 @@ mod tests { sstable_builder.serialize(&mut buffer).unwrap(); buffer[1] = 9u8; let data_corruption_err = SSTableIndex::load(&buffer[..]).err().unwrap(); - assert_eq!( - format!("{data_corruption_err:?}"), - "Data corruption: SSTable index is corrupted." - ); + assert!(matches!(data_corruption_err, SSTableDataCorruption)); } #[track_caller] diff --git a/src/termdict/sstable_termdict/sstable/value.rs b/sstable/src/value.rs similarity index 100% rename from src/termdict/sstable_termdict/sstable/value.rs rename to sstable/src/value.rs diff --git a/src/termdict/sstable_termdict/sstable/vint.rs b/sstable/src/vint.rs similarity index 100% rename from src/termdict/sstable_termdict/sstable/vint.rs rename to sstable/src/vint.rs diff --git a/stacker/Cargo.toml b/stacker/Cargo.toml new file mode 100644 index 000000000..f683ff5ec --- /dev/null +++ b/stacker/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "tantivy-stacker" +version = "0.1.0" +edition = "2021" + +[dependencies] +murmurhash32 = "0.2" +byteorder = "1" +common = { version = "0.4", path = "../common/", package = "tantivy-common" } diff --git a/src/postings/stacker/term_hashmap.rs b/stacker/src/arena_hashmap.rs similarity index 86% rename from src/postings/stacker/term_hashmap.rs rename to stacker/src/arena_hashmap.rs index 19c29b7ca..5bd8eaf22 100644 --- a/src/postings/stacker/term_hashmap.rs +++ b/stacker/src/arena_hashmap.rs @@ -4,14 +4,13 @@ use byteorder::{ByteOrder, NativeEndian}; use murmurhash32::murmurhash2; use super::{Addr, MemoryArena}; -use crate::postings::stacker::memory_arena::store; -use crate::postings::UnorderedTermId; -use crate::Term; +use crate::memory_arena::store; +use crate::UnorderedId; /// Returns the actual memory size in bytes /// required to create a table with a given capacity. /// required to create a table of size -pub(crate) fn compute_table_size(capacity: usize) -> usize { +pub fn compute_table_size(capacity: usize) -> usize { capacity * mem::size_of::() } @@ -22,7 +21,7 @@ pub(crate) fn compute_table_size(capacity: usize) -> usize { struct KeyValue { key_value_addr: Addr, hash: u32, - unordered_term_id: UnorderedTermId, + unordered_id: UnorderedId, } impl Default for KeyValue { @@ -30,7 +29,7 @@ impl Default for KeyValue { KeyValue { key_value_addr: Addr::null_pointer(), hash: 0u32, - unordered_term_id: UnorderedTermId::default(), + unordered_id: UnorderedId::default(), } } } @@ -41,15 +40,16 @@ impl KeyValue { } } -/// Customized `HashMap` with string keys +/// Customized `HashMap` with `&[u8]` keys /// -/// This `HashMap` takes String as keys. Keys are -/// stored in a user defined memory arena. +/// Its main particularity is that rather than storing its +/// keys in the heap, keys are stored in a memory arena +/// inline with the values. /// /// The quirky API has the benefit of avoiding /// the computation of the hash of the key twice, /// or copying the key as long as there is no insert. -pub struct TermHashMap { +pub struct ArenaHashMap { table: Box<[KeyValue]>, memory_arena: MemoryArena, mask: usize, @@ -64,6 +64,7 @@ struct QuadraticProbing { } impl QuadraticProbing { + #[inline] fn compute(hash: usize, mask: usize) -> QuadraticProbing { QuadraticProbing { hash, i: 0, mask } } @@ -76,18 +77,18 @@ impl QuadraticProbing { } pub struct Iter<'a> { - hashmap: &'a TermHashMap, + hashmap: &'a ArenaHashMap, inner: slice::Iter<'a, usize>, } impl<'a> Iterator for Iter<'a> { - type Item = (Term<&'a [u8]>, Addr, UnorderedTermId); + type Item = (&'a [u8], Addr, UnorderedId); fn next(&mut self) -> Option { self.inner.next().cloned().map(move |bucket: usize| { let kv = self.hashmap.table[bucket]; let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr); - (Term::wrap(key), offset, kv.unordered_term_id) + (key, offset, kv.unordered_id) }) } } @@ -102,15 +103,15 @@ fn compute_previous_power_of_two(n: usize) -> usize { 1 << msb } -impl TermHashMap { - pub(crate) fn new(table_size: usize) -> TermHashMap { +impl ArenaHashMap { + pub fn new(table_size: usize) -> ArenaHashMap { assert!(table_size > 0); let table_size_power_of_2 = compute_previous_power_of_two(table_size); - let memory_arena = MemoryArena::new(); + let memory_arena = MemoryArena::default(); let table: Vec = iter::repeat(KeyValue::default()) .take(table_size_power_of_2) .collect(); - TermHashMap { + ArenaHashMap { table: table.into_boxed_slice(), memory_arena, mask: table_size_power_of_2 - 1, @@ -119,18 +120,22 @@ impl TermHashMap { } } + #[inline] pub fn read(&self, addr: Addr) -> Item { self.memory_arena.read(addr) } + #[inline] fn probe(&self, hash: u32) -> QuadraticProbing { QuadraticProbing::compute(hash as usize, self.mask) } + #[inline] pub fn mem_usage(&self) -> usize { self.table.len() * mem::size_of::() } + #[inline] fn is_saturated(&self) -> bool { self.table.len() < self.occupied.len() * 3 } @@ -153,22 +158,30 @@ impl TermHashMap { } } - fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) -> UnorderedTermId { + #[inline] + fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) -> UnorderedId { self.occupied.push(bucket); - let unordered_term_id = self.len as UnorderedTermId; + let unordered_id = self.len as UnorderedId; self.len += 1; self.table[bucket] = KeyValue { key_value_addr, hash, - unordered_term_id, + unordered_id, }; - unordered_term_id + unordered_id } + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] pub fn len(&self) -> usize { self.len } + #[inline] pub fn iter(&self) -> Iter<'_> { Iter { inner: self.occupied.iter(), @@ -210,7 +223,7 @@ impl TermHashMap { &mut self, key: &[u8], mut updater: TMutator, - ) -> UnorderedTermId + ) -> UnorderedId where V: Copy + 'static, TMutator: FnMut(Option) -> V, @@ -241,7 +254,7 @@ impl TermHashMap { let v = self.memory_arena.read(val_addr); let new_v = updater(Some(v)); self.memory_arena.write_at(val_addr, new_v); - return kv.unordered_term_id; + return kv.unordered_id; } } } @@ -253,11 +266,11 @@ mod tests { use std::collections::HashMap; - use super::{compute_previous_power_of_two, TermHashMap}; + use super::{compute_previous_power_of_two, ArenaHashMap}; #[test] fn test_hash_map() { - let mut hash_map: TermHashMap = TermHashMap::new(1 << 18); + let mut hash_map: ArenaHashMap = ArenaHashMap::new(1 << 18); hash_map.mutate_or_create(b"abc", |opt_val: Option| { assert_eq!(opt_val, None); 3u32 diff --git a/src/postings/stacker/expull.rs b/stacker/src/expull.rs similarity index 93% rename from src/postings/stacker/expull.rs rename to stacker/src/expull.rs index cbd66ceb8..7ebb59f50 100644 --- a/src/postings/stacker/expull.rs +++ b/stacker/src/expull.rs @@ -2,8 +2,8 @@ use std::mem; use common::serialize_vint_u32; -use super::{Addr, MemoryArena}; -use crate::postings::stacker::memory_arena::{load, store}; +use crate::memory_arena::{load, store}; +use crate::{Addr, MemoryArena}; const MAX_BLOCK_LEN: u32 = 1u32 << 15; const FIRST_BLOCK: usize = 16; @@ -120,15 +120,17 @@ impl<'a> ExpUnrolledLinkedListWriter<'a> { } } -impl ExpUnrolledLinkedList { - pub fn new() -> ExpUnrolledLinkedList { +impl Default for ExpUnrolledLinkedList { + fn default() -> ExpUnrolledLinkedList { ExpUnrolledLinkedList { len: 0u32, tail: Addr::null_pointer(), inlined_data: [0u8; INLINED_BLOCK_LEN as usize], } } +} +impl ExpUnrolledLinkedList { #[inline] pub fn writer<'a>(&'a mut self, arena: &'a mut MemoryArena) -> ExpUnrolledLinkedListWriter<'a> { ExpUnrolledLinkedListWriter { eull: self, arena } @@ -169,8 +171,8 @@ mod tests { #[test] fn test_eull() { - let mut arena = MemoryArena::new(); - let mut stack = ExpUnrolledLinkedList::new(); + let mut arena = MemoryArena::default(); + let mut stack = ExpUnrolledLinkedList::default(); stack.writer(&mut arena).extend_from_slice(&[1u8]); stack.writer(&mut arena).extend_from_slice(&[2u8]); stack.writer(&mut arena).extend_from_slice(&[3u8, 4u8]); @@ -184,8 +186,8 @@ mod tests { #[test] fn test_eull_long() { - let mut arena = MemoryArena::new(); - let mut eull = ExpUnrolledLinkedList::new(); + let mut arena = MemoryArena::default(); + let mut eull = ExpUnrolledLinkedList::default(); let data: Vec = (0..100).collect(); for &el in &data { eull.writer(&mut arena).write_u32_vint(el); @@ -202,9 +204,9 @@ mod tests { #[test] fn test_eull_interlaced() { - let mut eull = MemoryArena::new(); - let mut stack = ExpUnrolledLinkedList::new(); - let mut stack2 = ExpUnrolledLinkedList::new(); + let mut eull = MemoryArena::default(); + let mut stack = ExpUnrolledLinkedList::default(); + let mut stack2 = ExpUnrolledLinkedList::default(); let mut vec1: Vec = vec![]; let mut vec2: Vec = vec![]; @@ -306,9 +308,9 @@ mod bench { #[bench] fn bench_push_stack(bench: &mut Bencher) { bench.iter(|| { - let mut arena = MemoryArena::new(); + let mut arena = MemoryArena::default(); let mut stacks: Vec = - iter::repeat_with(ExpUnrolledLinkedList::new) + iter::repeat_with(ExpUnrolledLinkedList::default) .take(NUM_STACK) .collect(); for s in 0..NUM_STACK { diff --git a/stacker/src/lib.rs b/stacker/src/lib.rs new file mode 100644 index 000000000..add06359a --- /dev/null +++ b/stacker/src/lib.rs @@ -0,0 +1,10 @@ +mod arena_hashmap; +mod expull; +mod memory_arena; + +pub use self::arena_hashmap::{compute_table_size, ArenaHashMap}; +pub use self::expull::ExpUnrolledLinkedList; +pub use self::memory_arena::{Addr, MemoryArena}; + +/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key. +pub type UnorderedId = u64; diff --git a/src/postings/stacker/memory_arena.rs b/stacker/src/memory_arena.rs similarity index 97% rename from src/postings/stacker/memory_arena.rs rename to stacker/src/memory_arena.rs index 41ec95e53..2155d4a0f 100644 --- a/src/postings/stacker/memory_arena.rs +++ b/stacker/src/memory_arena.rs @@ -86,15 +86,16 @@ pub struct MemoryArena { pages: Vec, } -impl MemoryArena { - /// Creates a new memory arena. - pub fn new() -> MemoryArena { +impl Default for MemoryArena { + fn default() -> MemoryArena { let first_page = Page::new(0); MemoryArena { pages: vec![first_page], } } +} +impl MemoryArena { fn add_page(&mut self) -> &mut Page { let new_page_id = self.pages.len(); self.pages.push(Page::new(new_page_id)); @@ -197,7 +198,7 @@ mod tests { #[test] fn test_arena_allocate_slice() { - let mut arena = MemoryArena::new(); + let mut arena = MemoryArena::default(); let a = b"hello"; let b = b"happy tax payer"; @@ -220,7 +221,7 @@ mod tests { #[test] fn test_store_object() { - let mut arena = MemoryArena::new(); + let mut arena = MemoryArena::default(); let a = MyTest { a: 143, b: 21,