This commit is contained in:
Paul Masurel
2018-02-03 11:46:01 +09:00
parent 13aaca7e11
commit 920f086e1d
13 changed files with 55 additions and 53 deletions

View File

@@ -217,7 +217,7 @@ fn skip<'a, I: Iterator<Item = &'a Facet>>(
) -> SkipResult {
loop {
match collapse_it.peek() {
Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(&target) {
Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
Ordering::Less => {}
Ordering::Greater => {
return SkipResult::OverStep;
@@ -274,7 +274,7 @@ impl FacetCollector {
"Tried to add a facet which is a descendant of an already added facet."
);
assert!(
!facet.is_prefix_of(&old_facet),
!facet.is_prefix_of(old_facet),
"Tried to add a facet which is an ancestor of an already added facet."
);
}
@@ -305,7 +305,8 @@ impl FacetCollector {
let depth = facet_depth(facet_streamer.key());
if depth <= collapse_depth {
continue 'outer;
} else if depth == collapse_depth + 1 {
}
if depth == collapse_depth + 1 {
collapsed_id = self.current_collapse_facet_ords.len();
self.current_collapse_facet_ords
.push(facet_streamer.term_ord());
@@ -428,6 +429,8 @@ pub struct FacetCounts {
}
impl FacetCounts {
#[allow(needless_lifetimes)] //< compiler fails if we remove the lifetime
pub fn get<'a, T>(&'a self, facet_from: T) -> impl Iterator<Item = (&'a Facet, u64)>
where
Facet: From<T>,
@@ -455,7 +458,7 @@ impl FacetCounts {
let mut heap = BinaryHeap::with_capacity(k);
let mut it = self.get(facet);
for (ref facet, count) in (&mut it).take(k) {
for (facet, count) in (&mut it).take(k) {
heap.push(Hit { count, facet });
}

View File

@@ -10,7 +10,7 @@ mod murmurhash2 {
#[inline(always)]
pub fn murmurhash2(key: &[u8]) -> u32 {
let mut key_ptr: *const u32 = key.as_ptr() as *const u32;
let m: u32 = 0x5bd1e995;
let m: u32 = 0x5bd1_e995;
let r = 24;
let len = key.len() as u32;
@@ -31,18 +31,18 @@ mod murmurhash2 {
let key_ptr_u8: *const u8 = key_ptr as *const u8;
match remaining {
3 => {
h ^= unsafe { *key_ptr_u8.wrapping_offset(2) as u32 } << 16;
h ^= unsafe { *key_ptr_u8.wrapping_offset(1) as u32 } << 8;
h ^= unsafe { *key_ptr_u8 as u32 };
h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(2)) } << 16;
h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(1)) } << 8;
h ^= unsafe { u32::from(*key_ptr_u8) };
h = h.wrapping_mul(m);
}
2 => {
h ^= unsafe { *key_ptr_u8.wrapping_offset(1) as u32 } << 8;
h ^= unsafe { *key_ptr_u8 as u32 };
h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(1)) } << 8;
h ^= unsafe { u32::from(*key_ptr_u8) };
h = h.wrapping_mul(m);
}
1 => {
h ^= unsafe { *key_ptr_u8 as u32 };
h ^= unsafe { u32::from(*key_ptr_u8) };
h = h.wrapping_mul(m);
}
_ => {}

View File

@@ -23,10 +23,10 @@ use tempdir::TempDir;
/// Returns None iff the file exists, can be read, but is empty (and hence
/// cannot be mmapped).
///
fn open_mmap(full_path: &PathBuf) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
let file = File::open(&full_path).map_err(|e| {
fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
let file = File::open(full_path).map_err(|e| {
if e.kind() == io::ErrorKind::NotFound {
OpenReadError::FileDoesNotExist(full_path.clone())
OpenReadError::FileDoesNotExist(full_path.to_owned())
} else {
OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e))
}
@@ -88,8 +88,8 @@ impl MmapCache {
}
}
fn get_mmap(&mut self, full_path: PathBuf) -> Result<Option<MmapReadOnly>, OpenReadError> {
Ok(match self.cache.entry(full_path.clone()) {
fn get_mmap(&mut self, full_path: &Path) -> Result<Option<MmapReadOnly>, OpenReadError> {
Ok(match self.cache.entry(full_path.to_owned()) {
HashMapEntry::Occupied(occupied_entry) => {
let mmap = occupied_entry.get();
self.counters.hit += 1;
@@ -97,7 +97,7 @@ impl MmapCache {
}
HashMapEntry::Vacant(vacant_entry) => {
self.counters.miss += 1;
if let Some(mmap) = open_mmap(&full_path)? {
if let Some(mmap) = open_mmap(full_path)? {
vacant_entry.insert(mmap.clone());
Some(mmap)
} else {
@@ -252,7 +252,7 @@ impl Directory for MmapDirectory {
})?;
Ok(mmap_cache
.get_mmap(full_path)?
.get_mmap(&full_path)?
.map(ReadOnlySource::Mmap)
.unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())))
}

View File

@@ -96,7 +96,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, 14u64);
add_single_field_doc(&mut fast_field_writers, *FIELD, 2u64);
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -132,7 +132,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u64);
add_single_field_doc(&mut fast_field_writers, *FIELD, 215u64);
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -169,7 +169,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u64);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -206,7 +206,7 @@ mod tests {
);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -247,7 +247,7 @@ mod tests {
fast_field_writers.add_document(&doc);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -288,7 +288,7 @@ mod tests {
let doc = Document::default();
fast_field_writers.add_document(&doc);
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -324,7 +324,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -381,7 +381,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
@@ -415,7 +415,7 @@ mod tests {
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}

View File

@@ -149,7 +149,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
}
}
fast_field_writers
.serialize(&mut serializer, HashMap::new())
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}

View File

@@ -105,7 +105,7 @@ impl FastFieldsWriter {
pub fn serialize(
&self,
serializer: &mut FastFieldSerializer,
mapping: HashMap<Field, HashMap<UnorderedTermId, usize>>,
mapping: &HashMap<Field, HashMap<UnorderedTermId, usize>>,
) -> io::Result<()> {
for field_writer in &self.single_value_writers {
field_writer.serialize(serializer)?;

View File

@@ -23,7 +23,6 @@ use indexer::SegmentWriter;
use postings::DocSet;
use schema::IndexRecordOption;
use schema::Document;
use schema::Schema;
use schema::Term;
use std::mem;
use std::mem::swap;
@@ -250,17 +249,17 @@ fn index_documents(
heap: &mut Heap,
table_size: usize,
segment: &Segment,
schema: &Schema,
generation: usize,
document_iterator: &mut Iterator<Item = AddOperation>,
segment_updater: &mut SegmentUpdater,
mut delete_cursor: DeleteCursor,
) -> Result<bool> {
heap.clear();
let schema = segment.schema();
let segment_id = segment.id();
let mut segment_writer = SegmentWriter::for_segment(heap, table_size, segment.clone(), schema)?;
let mut segment_writer = SegmentWriter::for_segment(heap, table_size, segment.clone(), &schema)?;
for doc in document_iterator {
segment_writer.add_document(doc, schema)?;
segment_writer.add_document(doc, &schema)?;
// There is two possible conditions to close the segment.
// One is the memory arena dedicated to the segment is
// getting full.
@@ -368,7 +367,6 @@ impl IndexWriter {
/// The thread consumes documents from the pipeline.
///
fn add_indexing_worker(&mut self) -> Result<()> {
let schema = self.index.schema();
let document_receiver_clone = self.document_receiver.clone();
let mut segment_updater = self.segment_updater.clone();
let (heap_size, table_size) = split_memory(self.heap_size_in_bytes_per_thread);
@@ -409,7 +407,6 @@ impl IndexWriter {
&mut heap,
table_size,
&segment,
&schema,
generation,
&mut document_iterator,
&mut segment_updater,

View File

@@ -69,7 +69,7 @@ pub fn save_metas(
segments: segment_metas,
schema,
opstamp,
payload: payload.clone(),
payload,
};
let mut buffer = serde_json::to_vec_pretty(&metas)?;
write!(&mut buffer, "\n")?;

View File

@@ -142,12 +142,13 @@ impl<'a> SegmentWriter<'a> {
FieldType::HierarchicalFacet => {
let facets: Vec<&[u8]> = field_values
.iter()
.flat_map(|field_value| match field_value.value() {
&Value::Facet(ref facet) => Some(facet.encoded_bytes()),
_ => {
panic!("Expected hierarchical facet");
}
})
.flat_map(|field_value|
match *field_value.value() {
Value::Facet(ref facet) => Some(facet.encoded_bytes()),
_ => {
panic!("Expected hierarchical facet");
}
})
.collect();
let mut term = unsafe { Term::with_capacity(100) };
term.set_field(field);
@@ -155,8 +156,8 @@ impl<'a> SegmentWriter<'a> {
let mut unordered_term_id_opt = None;
let fake_str = unsafe { str::from_utf8_unchecked(facet_bytes) };
FacetTokenizer
.token_stream(&fake_str)
.process(&mut |ref token| {
.token_stream(fake_str)
.process(&mut |token| {
term.set_text(&token.text);
let unordered_term_id =
self.multifield_postings.subscribe(doc_id, &term);
@@ -259,8 +260,8 @@ fn write(
mut serializer: SegmentSerializer,
) -> Result<()> {
let term_ord_map = multifield_postings.serialize(serializer.get_postings_serializer())?;
fast_field_writers.serialize(serializer.get_fast_field_serializer(), term_ord_map)?;
fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer(), HashMap::new())?;
fast_field_writers.serialize(serializer.get_fast_field_serializer(), &term_ord_map)?;
fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer(), &HashMap::new())?;
serializer.close()?;
Ok(())

View File

@@ -171,7 +171,7 @@ pub trait PostingsWriter {
) -> io::Result<()>;
/// Tokenize a text and subscribe all of its token.
fn index_text<'a>(
fn index_text(
&mut self,
term_index: &mut TermHashMap,
doc_id: DocId,

View File

@@ -7,8 +7,8 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Cow;
use common::BinarySerializable;
const SLASH_BYTE: u8 = '/' as u8;
const ESCAPE_BYTE: u8 = '\\' as u8;
const SLASH_BYTE: u8 = b'/';
const ESCAPE_BYTE: u8 = b'\\';
/// BYTE used as a level separation in the binary
/// representation of facets.
@@ -63,7 +63,7 @@ impl Facet {
/// It is conceptually, if one of the steps of this path
/// contains a `/` or a `\`, it should be escaped
/// using an anti-slash `/`.
pub fn from_text<'a, T>(path: &'a T) -> Facet
pub fn from_text<T>(path: &T) -> Facet
where
T: ?Sized + AsRef<str>,
{
@@ -97,6 +97,7 @@ impl Facet {
}
/// Returns `true` iff other is a subfacet of `self`.
#[allow(collapsible_if)]
pub fn is_prefix_of(&self, other: &Facet) -> bool {
let self_bytes: &[u8] = self.encoded_bytes();
let other_bytes: &[u8] = other.encoded_bytes();

View File

@@ -46,7 +46,7 @@ impl StoreWriter {
/// The document id is implicitely the number of times
/// this method has been called.
///
pub fn store<'a>(&mut self, stored_document: &Document) -> io::Result<()> {
pub fn store(&mut self, stored_document: &Document) -> io::Result<()> {
self.intermediary_buffer.clear();
stored_document.serialize(&mut self.intermediary_buffer)?;
let doc_num_bytes = self.intermediary_buffer.len();

View File

@@ -62,7 +62,7 @@ where
fn insert<K: AsRef<[u8]>>(&mut self, key_ref: K, value: &TermInfo) -> io::Result<()> {
let key = key_ref.as_ref();
self.insert_key(key.as_ref())?;
self.insert_key(key)?;
self.insert_value(value)?;
Ok(())
}
@@ -137,7 +137,7 @@ impl<'a> TermDictionary<'a> for TermDictionaryImpl {
return false;
}
}
return true;
true
}
fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> Option<TermOrdinal> {