Using hashmap

This commit is contained in:
Paul Masurel
2016-08-12 09:40:28 +09:00
parent e644a371d6
commit 6bfe41e963
2 changed files with 12 additions and 6 deletions

View File

@@ -13,9 +13,10 @@ find solution to "I have a docaddress but the segment does not exist anymore pro
pass over offset from previous block
test untokenized
test field with more than one value
doc values for other types
use skip list for each blocks
find a clear way to put the tokenized/untokenized thing upstream

View File

@@ -1,5 +1,5 @@
use DocId;
use std::collections::BTreeMap;
use std::collections::HashMap;
use schema::Term;
use postings::PostingsSerializer;
use std::io;
@@ -62,7 +62,7 @@ pub trait PostingsWriter {
pub struct SpecializedPostingsWriter<Rec: Recorder + 'static> {
postings: Vec<TermPostingsWriter<Rec>>,
term_index: BTreeMap<Term, usize>, // remove btree map
term_index: HashMap<Term, usize>,
}
impl<Rec: Recorder + 'static> SpecializedPostingsWriter<Rec> {
@@ -70,7 +70,7 @@ impl<Rec: Recorder + 'static> SpecializedPostingsWriter<Rec> {
pub fn new() -> SpecializedPostingsWriter<Rec> {
SpecializedPostingsWriter {
postings: Vec::new(),
term_index: BTreeMap::new(),
term_index: HashMap::new(),
}
}
@@ -107,8 +107,13 @@ impl<Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<Rec>
}
fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> {
for (term, postings_id) in &self.term_index {
let term_postings_writer = &self.postings[postings_id.clone()];
let mut term_offsets: Vec<(Term, usize)> = self.term_index
.iter()
.map(|(k,v)| (k.clone(), *v)) // Get rid of the clone
.collect();
term_offsets.sort();
for (term, postings_id) in term_offsets {
let term_postings_writer = &self.postings[postings_id];
let term_docfreq = term_postings_writer.doc_freq();
try!(serializer.new_term(&term, term_docfreq));
try!(term_postings_writer.serialize(serializer));