NOBUG Code cleaning , cargo clippy

This commit is contained in:
Paul Masurel
2016-09-19 00:22:43 +09:00
parent 89cac9225b
commit f3a24f5b3c
61 changed files with 437 additions and 442 deletions

View File

@@ -44,5 +44,3 @@ gcc = "0.3"
# [profile.release]
# debug = true

View File

@@ -24,7 +24,7 @@ fn main() {
fn create_schema() -> Schema {
// We need to declare a schema
// to create a new index.
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
// TEXT | STORED is some syntactic sugar to describe
// how tantivy should index this field.
@@ -69,7 +69,7 @@ fn run(index_path: &Path) -> tantivy::Result<()> {
let body = schema.get_field("body").unwrap();
let mut old_man_doc = Document::new();
let mut old_man_doc = Document::default();
old_man_doc.add_text(title, "The Old Man and the Sea");
old_man_doc.add_text(body, "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without taking a fish.");

View File

@@ -18,29 +18,22 @@ pub trait StreamingIterator<'a, T> {
impl<'a, 'b> TokenIter<'b> {
fn consume_token(&'a mut self) -> Option<&'a str> {
loop {
match self.chars.next() {
Some(c) => {
if c.is_alphanumeric() {
append_char_lowercase(c, &mut self.term_buffer);
}
else {
break;
}
},
None => {
break;
}
for c in &mut self.chars {
if c.is_alphanumeric() {
append_char_lowercase(c, &mut self.term_buffer);
}
else {
break;
}
}
return Some(&self.term_buffer);
Some(&self.term_buffer)
}
}
impl<'a, 'b> StreamingIterator<'a, &'a str> for TokenIter<'b> {
#[inline(always)]
#[inline]
fn next(&'a mut self,) -> Option<&'a str> {
self.term_buffer.clear();
// skipping non-letter characters.

View File

@@ -7,11 +7,11 @@ use ScoredDoc;
pub struct DoNothingCollector;
impl Collector for DoNothingCollector {
#[inline(always)]
#[inline]
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
Ok(())
}
#[inline(always)]
#[inline]
fn collect(&mut self, _: ScoredDoc) {}
}
@@ -29,7 +29,7 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
}
}
pub fn add<'b, C: Collector>(self, new_collector: &'b mut C) -> ChainedCollector<Self, MutRefCollector<'b, C>> {
pub fn push<C: Collector>(self, new_collector: &mut C) -> ChainedCollector<Self, MutRefCollector<C>> {
ChainedCollector {
left: self,
right: MutRefCollector(new_collector),
@@ -79,11 +79,11 @@ mod tests {
#[test]
fn test_chained_collector() {
let mut top_collector = TopCollector::with_limit(2);
let mut count_collector = CountCollector::new();
let mut count_collector = CountCollector::default();
{
let mut collectors = chain()
.add(&mut top_collector)
.add(&mut count_collector);
.push(&mut top_collector)
.push(&mut count_collector);
collectors.collect(ScoredDoc(0.2, 1));
collectors.collect(ScoredDoc(0.1, 2));
collectors.collect(ScoredDoc(0.5, 3));

View File

@@ -11,13 +11,6 @@ pub struct CountCollector {
}
impl CountCollector {
pub fn new() -> CountCollector {
CountCollector {
count: 0,
}
}
// Returns the count of document that where
// collected.
pub fn count(&self,) -> usize {
@@ -25,6 +18,14 @@ impl CountCollector {
}
}
impl Default for CountCollector {
fn default() -> CountCollector {
CountCollector {
count: 0,
}
}
}
impl Collector for CountCollector {
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
@@ -47,7 +48,7 @@ mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
let mut count_collector = CountCollector::new();
let mut count_collector = CountCollector::default();
let docs: Vec<u32> = (0..1_000_000).collect();
for doc in docs {
count_collector.collect(ScoredDoc(1f32, doc));

View File

@@ -29,17 +29,19 @@ pub struct TestCollector {
}
impl TestCollector {
pub fn new() -> TestCollector {
pub fn docs(self,) -> Vec<DocId> {
self.docs
}
}
impl Default for TestCollector {
fn default() -> TestCollector {
TestCollector {
docs: Vec::new(),
offset: 0,
segment_max_doc: 0,
}
}
pub fn docs(self,) -> Vec<DocId> {
self.docs
}
}
impl Collector for TestCollector {
@@ -101,7 +103,7 @@ mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
let mut count_collector = CountCollector::new();
let mut count_collector = CountCollector::default();
let docs: Vec<u32> = (0..1_000_000).collect();
for doc in docs {
count_collector.collect(ScoredDoc(1f32, doc));

View File

@@ -19,14 +19,14 @@ impl<'a> MultiCollector<'a> {
impl<'a> Collector for MultiCollector<'a> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
for collector in self.collectors.iter_mut() {
for collector in &mut self.collectors {
try!(collector.set_segment(segment_local_id, segment));
}
Ok(())
}
fn collect(&mut self, scored_doc: ScoredDoc) {
for collector in self.collectors.iter_mut() {
for collector in &mut self.collectors {
collector.collect(scored_doc);
}
}
@@ -44,7 +44,7 @@ mod tests {
#[test]
fn test_multi_collector() {
let mut top_collector = TopCollector::with_limit(2);
let mut count_collector = CountCollector::new();
let mut count_collector = CountCollector::default();
{
let mut collectors = MultiCollector::from(vec!(&mut top_collector, &mut count_collector));
collectors.collect(ScoredDoc(0.2, 1));

View File

@@ -19,7 +19,7 @@ impl PartialOrd for GlobalScoredDoc {
}
impl Ord for GlobalScoredDoc {
#[inline(always)]
#[inline]
fn cmp(&self, other: &GlobalScoredDoc) -> Ordering {
other.0.partial_cmp(&self.0)
.unwrap_or(
@@ -30,7 +30,7 @@ impl Ord for GlobalScoredDoc {
impl PartialEq for GlobalScoredDoc {
fn eq(&self, other: &GlobalScoredDoc) -> bool {
self.cmp(&other) == Ordering::Equal
self.cmp(other) == Ordering::Equal
}
}
@@ -77,7 +77,7 @@ impl TopCollector {
.collect()
}
#[inline(always)]
#[inline]
pub fn at_capacity(&self, ) -> bool {
self.heap.len() >= self.limit
}

View File

@@ -59,7 +59,7 @@ impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
writer.write_u32::<NativeEndian>(self.clone())
writer.write_u32::<NativeEndian>(*self)
.map(|_| 4)
.map_err(convert_byte_order_error)
}
@@ -73,7 +73,7 @@ impl BinarySerializable for u32 {
impl BinarySerializable for u64 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
writer.write_u64::<NativeEndian>(self.clone())
writer.write_u64::<NativeEndian>(*self)
.map(|_| 8)
.map_err(convert_byte_order_error)
}
@@ -87,7 +87,7 @@ impl BinarySerializable for u64 {
impl BinarySerializable for u8 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
// TODO error
try!(writer.write_u8(self.clone()).map_err(convert_byte_order_error));
try!(writer.write_u8(*self).map_err(convert_byte_order_error));
Ok(1)
}
fn deserialize(reader: &mut Read) -> io::Result<u8> {

View File

@@ -41,11 +41,6 @@ pub struct TimerTree {
}
impl TimerTree {
pub fn new() -> TimerTree {
TimerTree {
timings: Vec::new(),
}
}
pub fn total_time(&self,) -> i64 {
self.timings.last().unwrap().duration
@@ -61,6 +56,13 @@ impl TimerTree {
}
}
impl Default for TimerTree {
fn default() -> TimerTree {
TimerTree {
timings: Vec::new(),
}
}
}
#[cfg(test)]
@@ -70,7 +72,7 @@ mod tests {
#[test]
fn test_timer() {
let mut timer_tree = TimerTree::new();
let mut timer_tree = TimerTree::default();
{
let mut a = timer_tree.open("a");
{

View File

@@ -11,13 +11,13 @@ pub struct VInt(pub u64);
impl VInt {
pub fn val(&self,) -> u64 {
self.0.clone()
self.0
}
}
impl BinarySerializable for VInt {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut remaining = self.0.clone();
let mut remaining = self.0;
let mut written: usize = 0;
let mut buffer = [0u8; 10];
loop {

View File

@@ -22,7 +22,7 @@ impl CompositeEncoder {
let mut offset = 0u32;
for i in 0..num_blocks {
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
let block_compressed = self.block_encoder.compress_block_sorted(&vals_slice, offset);
let block_compressed = self.block_encoder.compress_block_sorted(vals_slice, offset);
offset = vals_slice[NUM_DOCS_PER_BLOCK - 1];
self.output.extend_from_slice(block_compressed);
}
@@ -36,7 +36,7 @@ impl CompositeEncoder {
let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
for i in 0..num_blocks {
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
let block_compressed = self.block_encoder.compress_block_unsorted(&vals_slice);
let block_compressed = self.block_encoder.compress_block_unsorted(vals_slice);
self.output.extend_from_slice(block_compressed);
}
let vint_compressed = self.block_encoder.compress_vint_unsorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..]);

View File

@@ -67,7 +67,7 @@ impl SIMDBlockEncoder {
}
}
}
return &self.output[..byte_written];
&self.output[..byte_written]
}
pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
@@ -88,7 +88,7 @@ impl SIMDBlockEncoder {
}
}
}
return &self.output[..byte_written];
&self.output[..byte_written]
}
}
@@ -170,12 +170,12 @@ impl SIMDBlockDecoder {
&compressed_data[read_byte..]
}
#[inline(always)]
#[inline]
pub fn output_array(&self,) -> &[u32] {
&self.output[..self.output_len]
}
#[inline(always)]
#[inline]
pub fn output(&self, idx: usize) -> u32 {
self.output[idx]
}

View File

@@ -131,7 +131,7 @@ impl Index {
docstamp: u64) -> Result<()> {
{
let mut meta_write = try!(self.metas.write());
meta_write.segments.extend(segment_ids);
meta_write.segments.extend_from_slice(segment_ids);
meta_write.docstamp = docstamp;
}
try!(self.save_metas());
@@ -167,7 +167,7 @@ impl Index {
}
pub fn segment(&self, segment_id: SegmentId) -> Segment {
fn segment(&self, segment_id: SegmentId) -> Segment {
Segment::new(self.clone(), segment_id)
}
@@ -193,7 +193,7 @@ impl Index {
}
pub fn new_segment(&self,) -> Segment {
self.segment(SegmentId::new())
self.segment(SegmentId::generate_random())
}
pub fn save_metas(&mut self,) -> Result<()> {

View File

@@ -8,7 +8,7 @@ use std::path::PathBuf;
pub struct SegmentId(Uuid);
impl SegmentId {
pub fn new() -> SegmentId {
pub fn generate_random() -> SegmentId {
SegmentId(Uuid::new_v4())
}

View File

@@ -53,11 +53,11 @@ impl SegmentReader {
pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
let field_entry = self.schema.get_field_entry(field);
match field_entry.field_type() {
&FieldType::Str(_) => {
match *field_entry.field_type() {
FieldType::Str(_) => {
Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
},
&FieldType::U32(_) => {
FieldType::U32(_) => {
// TODO check that the schema allows that
//Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
self.fast_fields_reader.get_field(field)
@@ -106,8 +106,8 @@ impl SegmentReader {
let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data));
let positions_data = segment
.open_read(SegmentComponent::POSITIONS)
.unwrap_or(ReadOnlySource::empty());
.open_read(SegmentComponent::POSITIONS)
.unwrap_or_else(|_| ReadOnlySource::empty());
let schema = segment.schema();
Ok(SegmentReader {
@@ -145,19 +145,19 @@ impl SegmentReader {
let term_info = get!(self.get_term_info(&term));
let offset = term_info.postings_offset as usize;
let postings_data = &self.postings_data[offset..];
let freq_handler = match field_entry.field_type() {
&FieldType::Str(ref options) => {
let freq_handler = match *field_entry.field_type() {
FieldType::Str(ref options) => {
let indexing_options = options.get_indexing_options();
match option {
SegmentPostingsOption::NoFreq => {
FreqHandler::new()
FreqHandler::new_without_freq()
}
SegmentPostingsOption::Freq => {
if indexing_options.is_termfreq_enabled() {
FreqHandler::new_with_freq()
}
else {
FreqHandler::new()
FreqHandler::new_without_freq()
}
}
SegmentPostingsOption::FreqAndPositions => {
@@ -170,34 +170,34 @@ impl SegmentReader {
FreqHandler::new_with_freq()
}
else {
FreqHandler::new()
FreqHandler::new_without_freq()
}
}
}
}
_ => {
FreqHandler::new()
FreqHandler::new_without_freq()
}
};
Some(SegmentPostings::from_data(term_info.doc_freq, &postings_data, freq_handler))
Some(SegmentPostings::from_data(term_info.doc_freq, postings_data, freq_handler))
}
pub fn read_postings_all_info(&self, term: &Term) -> SegmentPostings {
let field_entry = self.schema.get_field_entry(term.get_field());
let segment_posting_option = match field_entry.field_type() {
&FieldType::Str(ref text_options) => {
let segment_posting_option = match *field_entry.field_type() {
FieldType::Str(ref text_options) => {
match text_options.get_indexing_options() {
TextIndexingOptions::TokenizedWithFreq => SegmentPostingsOption::Freq,
TextIndexingOptions::TokenizedWithFreqAndPosition => SegmentPostingsOption::FreqAndPositions,
_ => SegmentPostingsOption::NoFreq,
}
}
&FieldType::U32(_) => SegmentPostingsOption::NoFreq
FieldType::U32(_) => SegmentPostingsOption::NoFreq
};
self.read_postings(term, segment_posting_option).expect("Read postings all info should not return None")
}
pub fn get_term_info<'a>(&'a self, term: &Term) -> Option<TermInfo> {
pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
self.term_infos.get(term.as_slice())
}
}

View File

@@ -1,3 +1,5 @@
#![allow(should_implement_trait)]
use std::io;
use std::io::Seek;
use std::io::Write;
@@ -79,9 +81,10 @@ impl<'a, V: 'static + BinarySerializable> FstKeyIter<'a, V> {
}
}
impl<V: BinarySerializable> FstMap<V> {
pub fn keys<'a>(&'a self,) -> FstKeyIter<'a, V> {
pub fn keys(&self,) -> FstKeyIter<V> {
FstKeyIter {
streamer: self.fst_index.stream(),
__phantom__: PhantomData,

View File

@@ -106,16 +106,14 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> {
let mut next_layer_skip: Option<(DocId, u32)> = None;
for skip_layer_id in 0..self.skip_layers.len() {
let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id];
match next_layer_skip {
Some((_, offset)) => { skip_layer.seek_offset(offset as usize); },
None => {}
};
next_layer_skip = skip_layer.seek(doc_id);
if let Some((_, offset)) = next_layer_skip {
skip_layer.seek_offset(offset as usize);
}
next_layer_skip = skip_layer.seek(doc_id);
}
if let Some((_, offset)) = next_layer_skip {
self.data_layer.seek_offset(offset as usize);
}
match next_layer_skip {
Some((_, offset)) => { self.data_layer.seek_offset(offset as usize); },
None => {}
};
self.data_layer.seek(doc_id)
}

View File

@@ -37,14 +37,14 @@ impl<T: BinarySerializable> LayerBuilder<T> {
self.remaining -= 1;
self.len += 1;
let offset = self.written_size() as u32; // TODO not sure if we want after or here
let res;
if self.remaining == 0 {
self.remaining = self.period;
res = Some((doc_id, offset));
}
else {
res = None;
}
let res =
if self.remaining == 0 {
self.remaining = self.period;
Some((doc_id, offset))
}
else {
None
};
try!(doc_id.serialize(&mut self.buffer));
try!(value.serialize(&mut self.buffer));
Ok(res)
@@ -69,7 +69,7 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
}
}
fn get_skip_layer<'a>(&'a mut self, layer_id: usize) -> &mut LayerBuilder<u32> {
fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder<u32> {
if layer_id == self.skip_layers.len() {
let layer_builder = LayerBuilder::with_period(self.period);
self.skip_layers.push(layer_builder);

View File

@@ -43,7 +43,7 @@ impl ExpUnrolledLinkedList {
// and we need to add 1u32 to store the pointer
// to the next element.
let new_block_size: usize = (self.len as usize + 1) * mem::size_of::<u32>();
let new_block_addr: u32 = heap.allocate(new_block_size);
let new_block_addr: u32 = heap.allocate_space(new_block_size);
heap.set(self.end, &new_block_addr);
self.end = new_block_addr;
}
@@ -130,7 +130,7 @@ mod tests {
#[test]
fn test_stack() {
let heap = Heap::with_capacity(1_000_000);
let (addr, stack) = heap.new::<ExpUnrolledLinkedList>();
let (addr, stack) = heap.allocate_object::<ExpUnrolledLinkedList>();
stack.push(1u32, &heap);
stack.push(2u32, &heap);
stack.push(4u32, &heap);
@@ -167,7 +167,7 @@ mod tests {
bench.iter(|| {
let mut stacks = Vec::with_capacity(100);
for _ in 0..NUM_STACK {
let (_, stack) = heap.new::<ExpUnrolledLinkedList>();
let (_, stack) = heap.allocate_object::<ExpUnrolledLinkedList>();
stacks.push(stack);
}
for s in 0..NUM_STACK {

View File

@@ -61,7 +61,7 @@ impl<'a, V> HashMap<'a, V> where V: From<u32> {
}
}
#[inline(always)]
#[inline]
fn bucket(&self, key: &[u8]) -> usize {
let hash: u64 = djb2(key);
(hash as usize) & self.mask
@@ -81,7 +81,7 @@ impl<'a, V> HashMap<'a, V> where V: From<u32> {
}
pub fn iter<'b: 'a>(&'b self,) -> impl Iterator<Item=(&'a [u8], (u32, &'a V))> + 'b {
let heap: &'a Heap = &self.heap;
let heap: &'a Heap = self.heap;
let table: &'b [KeyValue] = &self.table;
self.occupied
.iter()
@@ -96,7 +96,7 @@ impl<'a, V> HashMap<'a, V> where V: From<u32> {
}
pub fn values_mut<'b: 'a>(&'b self,) -> impl Iterator<Item=&'a mut V> + 'b {
let heap: &'a Heap = &self.heap;
let heap: &'a Heap = self.heap;
let table: &'b [KeyValue] = &self.table;
self.occupied
.iter()
@@ -112,7 +112,7 @@ impl<'a, V> HashMap<'a, V> where V: From<u32> {
self.heap.get_mut_ref(addr)
}
Entry::Vacant(bucket) => {
let (addr, val): (u32, &mut V) = self.heap.new();
let (addr, val): (u32, &mut V) = self.heap.allocate_object();
self.set_bucket(key.as_ref(), bucket, addr);
val
}

View File

@@ -44,12 +44,12 @@ impl Heap {
self.inner().num_free_bytes()
}
pub fn allocate(&self, num_bytes: usize) -> u32 {
self.inner().allocate(num_bytes)
pub fn allocate_space(&self, num_bytes: usize) -> u32 {
self.inner().allocate_space(num_bytes)
}
pub fn new<V: From<u32>>(&self,) -> (u32, &mut V) {
let addr = self.inner().allocate(mem::size_of::<V>());
pub fn allocate_object<V: From<u32>>(&self,) -> (u32, &mut V) {
let addr = self.inner().allocate_space(mem::size_of::<V>());
let v: V = V::from(addr);
self.inner().set(addr, &v);
(addr, self.inner().get_mut_ref(addr))
@@ -115,7 +115,7 @@ impl InnerHeap {
}
}
pub fn allocate(&mut self, num_bytes: usize) -> u32 {
pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
let addr = self.used;
self.used += num_bytes as u32;
if self.used <= self.buffer_len {
@@ -126,7 +126,7 @@ impl InnerHeap {
warn!("Exceeded heap size. The margin was apparently unsufficient. The segment will be committed right after indexing this very last document.");
self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
}
self.next_heap.as_mut().unwrap().allocate(num_bytes) + self.buffer_len
self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
}
@@ -151,7 +151,7 @@ impl InnerHeap {
}
fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
let start = self.allocate(data.len());
let start = self.allocate_space(data.len());
let stop = start + data.len() as u32;
self.get_mut_slice(start, stop).clone_from_slice(data);
BytesRef {

View File

@@ -11,9 +11,9 @@ use std::marker::Sync;
///
/// There is currently two implementations of `Directory`
///
/// - The [MMapDirectory](struct.MmapDirectory.html), this
/// - The [`MMapDirectory`](struct.MmapDirectory.html), this
/// should be your default choice.
/// - The [RAMDirectory](struct.RAMDirectory.html), which
/// - The [`RAMDirectory`](struct.RAMDirectory.html), which
/// should be used mostly for tests.
///
pub trait Directory: fmt::Debug + Send + Sync + 'static {

View File

@@ -23,7 +23,7 @@ use std::fs;
use directory::shared_vec_slice::SharedVecSlice;
/// Directory storing data in files, read via MMap.
/// Directory storing data in files, read via mmap.
///
/// The Mmap object are cached to limit the
/// system calls.
@@ -98,7 +98,7 @@ impl MmapDirectory {
}
/// This Write wraps a File, but has the specificity of
/// call sync_all on flush.
/// call `sync_all` on flush.
struct SafeFileWriter {
writer: BufWriter<File>,
}

View File

@@ -17,7 +17,7 @@ impl<T: Seek + Write> SeekableWrite for T {}
/// Write object for Directory.
///
/// WritePtr are required to implement both Write
/// `WritePtr` are required to implement both Write
/// and Seek.
pub type WritePtr = Box<SeekableWrite>;

View File

@@ -78,13 +78,13 @@ impl InnerDirectory {
InnerDirectory(Arc::new(RwLock::new(HashMap::new())))
}
fn write(&self, path: PathBuf, data: &Vec<u8>) -> io::Result<bool> {
fn write(&self, path: PathBuf, data: &[u8]) -> io::Result<bool> {
let mut map = try!(
self.0
.write()
.map_err(|_| make_io_err(format!("Failed to lock the directory, when trying to write {:?}", path)))
);
let prev_value = map.insert(path, Arc::new(data.clone()));
let prev_value = map.insert(path, Arc::new(Vec::from(data)));
Ok(prev_value.is_some())
}

View File

@@ -32,6 +32,10 @@ impl ReadOnlySource {
pub fn len(&self,) -> usize {
self.as_slice().len()
}
pub fn is_empty(&self,) -> bool {
self.len() != 0
}
/// Creates an empty ReadOnlySource
pub fn empty() -> ReadOnlySource {
@@ -52,7 +56,7 @@ impl ReadOnlySource {
/// Creates a cursor over the data.
pub fn cursor<'a>(&'a self) -> Cursor<&'a [u8]> {
pub fn cursor(&self) -> Cursor<&[u8]> {
Cursor::new(&*self)
}

View File

@@ -1,3 +1,5 @@
#![allow(enum_variant_names)]
use std::io;
use std::result;
use std::path::PathBuf;

View File

@@ -15,7 +15,7 @@ fn count_leading_zeros(mut val: u32) -> u8 {
val <<= 1;
result += 1;
}
return result;
result
}
@@ -44,7 +44,7 @@ mod tests {
lazy_static! {
static ref SCHEMA: Schema = {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
schema_builder.add_u32_field("field", FAST);
schema_builder.build()
};
@@ -65,7 +65,7 @@ mod tests {
}
fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: Field, value: u32) {
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_u32(field, value);
fast_field_writers.add_document(&doc);
}

View File

@@ -58,7 +58,8 @@ impl U32FastFieldReader {
let bit_shift = (doc * self.num_bits) - addr * 8; //doc - long_addr * self.num_in_pack;
let val_unshifted_unmasked: u64 = unsafe { * (self.data_ptr.offset(addr as isize) as *const u64) };
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
return self.min_val + (val_shifted & self.mask);
self.min_val + (val_shifted & self.mask)
}
}
@@ -79,13 +80,13 @@ impl U32FastFieldsReader {
}
let mut end_offsets: Vec<u32> = field_offsets
.iter()
.map(|&(_, offset)| offset.clone())
.map(|&(_, offset)| offset)
.collect();
end_offsets.push(header_offset);
let mut field_offsets_map: HashMap<Field, (u32, u32)> = HashMap::new();
for (field_start_offsets, stop_offset) in field_offsets.iter().zip(end_offsets.iter().skip(1)) {
let (field, start_offset) = field_start_offsets.clone();
field_offsets_map.insert(field.clone(), (start_offset.clone(), stop_offset.clone()));
let (field, start_offset) = *field_start_offsets;
field_offsets_map.insert(field, (start_offset, *stop_offset));
}
Ok(U32FastFieldsReader {
field_offsets: field_offsets_map,

View File

@@ -30,14 +30,13 @@ impl U32FastFieldsWriter {
}
pub fn get_field_writer(&mut self, field: Field) -> Option<&mut U32FastFieldWriter> {
self.field_writers
self.field_writers
.iter_mut()
.filter(|field_writer| field_writer.field == field)
.next()
.find(|field_writer| field_writer.field == field)
}
pub fn add_document(&mut self, doc: &Document) {
for field_writer in self.field_writers.iter_mut() {
for field_writer in &mut self.field_writers {
field_writer.add_document(doc);
}
}
@@ -69,7 +68,7 @@ pub struct U32FastFieldWriter {
impl U32FastFieldWriter {
pub fn new(field: Field) -> U32FastFieldWriter {
U32FastFieldWriter {
field: field.clone(),
field: field,
vals: Vec::new(),
}
}
@@ -94,13 +93,13 @@ impl U32FastFieldWriter {
match doc.get_first(self.field) {
Some(v) => {
match *v {
Value::U32(ref val) => { return *val; }
Value::U32(ref val) => { *val }
_ => { panic!("Expected a u32field, got {:?} ", v) }
}
},
None => {
// TODO make default value configurable
return 0u32;
0u32
}
}
}

View File

@@ -29,14 +29,21 @@ pub const HEAP_SIZE_LIMIT: u32 = MARGIN_IN_BYTES * 3u32;
// Add document will block if the number of docs waiting in the queue to be indexed reaches PIPELINE_MAX_SIZE_IN_DOCS
const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
type DocumentSender = chan::Sender<Document>;
type DocumentReceiver = chan::Receiver<Document>;
type NewSegmentSender = chan::Sender<Result<(SegmentId, usize)>>;
type NewSegmentReceiver = chan::Receiver<Result<(SegmentId, usize)>>;
pub struct IndexWriter {
index: Index,
heap_size_in_bytes_per_thread: usize,
workers_join_handle: Vec<JoinHandle<()>>,
segment_ready_sender: chan::Sender<Result<(SegmentId, usize)>>,
segment_ready_receiver: chan::Receiver<Result<(SegmentId, usize)>>,
document_receiver: chan::Receiver<Document>,
document_sender: chan::Sender<Document>,
segment_ready_sender: NewSegmentSender,
segment_ready_receiver: NewSegmentReceiver,
document_receiver: DocumentReceiver,
document_sender: DocumentSender,
num_threads: usize,
docstamp: u64,
}
@@ -109,8 +116,8 @@ impl IndexWriter {
if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize {
panic!(format!("The heap size per thread needs to be at least {}.", HEAP_SIZE_LIMIT));
}
let (document_sender, document_receiver): (chan::Sender<Document>, chan::Receiver<Document>) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let (segment_ready_sender, segment_ready_receiver): (chan::Sender<Result<(SegmentId, usize)>>, chan::Receiver<Result<(SegmentId, usize)>>) = chan::async();
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let (segment_ready_sender, segment_ready_receiver): (NewSegmentSender, NewSegmentReceiver) = chan::async();
let mut index_writer = IndexWriter {
heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
index: index.clone(),
@@ -133,7 +140,7 @@ impl IndexWriter {
Ok(())
}
pub fn merge(&mut self, segments: &Vec<Segment>) -> Result<()> {
pub fn merge(&mut self, segments: &[Segment]) -> Result<()> {
let schema = self.index.schema();
let merger = try!(IndexMerger::open(schema, segments));
let mut merged_segment = self.index.new_segment();
@@ -152,9 +159,9 @@ impl IndexWriter {
/// when no documents are remaining.
///
/// Returns the former segment_ready channel.
fn recreate_channels(&mut self,) -> (chan::Receiver<Document>, chan::Receiver<Result<(SegmentId, usize)>>) {
let (mut document_sender, mut document_receiver): (chan::Sender<Document>, chan::Receiver<Document>) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let (mut segment_ready_sender, mut segment_ready_receiver): (chan::Sender<Result<(SegmentId, usize)>>, chan::Receiver<Result<(SegmentId, usize)>>) = chan::async();
fn recreate_channels(&mut self,) -> (DocumentReceiver, chan::Receiver<Result<(SegmentId, usize)>>) {
let (mut document_sender, mut document_receiver): (DocumentSender, DocumentReceiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let (mut segment_ready_sender, mut segment_ready_receiver): (NewSegmentSender, NewSegmentReceiver) = chan::async();
swap(&mut self.document_sender, &mut document_sender);
swap(&mut self.document_receiver, &mut document_receiver);
swap(&mut self.segment_ready_sender, &mut segment_ready_sender);
@@ -282,7 +289,7 @@ mod tests {
#[test]
fn test_commit_and_rollback() {
let mut schema_builder = schema::SchemaBuilder::new();
let mut schema_builder = schema::SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
@@ -297,7 +304,7 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_with_num_threads(3, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a");
index_writer.add_document(doc).unwrap();
}
@@ -305,12 +312,12 @@ mod tests {
assert_eq!(num_docs_containing("a"), 0);
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "b");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "c");
index_writer.add_document(doc).unwrap();
}

View File

@@ -26,7 +26,7 @@ struct PostingsMerger<'a> {
doc_offsets: Vec<DocId>,
heap: BinaryHeap<HeapItem>,
term_streams: Vec<FstKeyIter<'a, TermInfo>>,
readers: &'a Vec<SegmentReader>,
readers: &'a [SegmentReader],
}
#[derive(PartialEq, Eq, Debug)]
@@ -43,12 +43,12 @@ impl PartialOrd for HeapItem {
impl Ord for HeapItem {
fn cmp(&self, other: &HeapItem) -> Ordering {
return (&other.term, &other.segment_ord).cmp(&(&self.term, &self.segment_ord))
(&other.term, &other.segment_ord).cmp(&(&self.term, &self.segment_ord))
}
}
impl<'a> PostingsMerger<'a> {
fn new(readers: &'a Vec<SegmentReader>) -> PostingsMerger<'a> {
fn new(readers: &'a [SegmentReader]) -> PostingsMerger<'a> {
let mut doc_offsets: Vec<DocId> = Vec::new();
let mut max_doc = 0;
for reader in readers {
@@ -74,15 +74,12 @@ impl<'a> PostingsMerger<'a> {
// pushes the term_reader associated with the given segment ordinal
// into the heap.
fn push_next_segment_el(&mut self, segment_ord: usize) {
match self.term_streams[segment_ord].next() {
Some(term) => {
let it = HeapItem {
term: Term::from(term),
segment_ord: segment_ord,
};
self.heap.push(it);
}
None => {}
if let Some(term) = self.term_streams[segment_ord].next() {
let it = HeapItem {
term: Term::from(term),
segment_ord: segment_ord,
};
self.heap.push(it);
}
}
@@ -100,6 +97,12 @@ impl<'a> PostingsMerger<'a> {
self.push_next_segment_el(heap_item.segment_ord);
}
}
impl<'a> Iterator for PostingsMerger<'a> {
type Item = (Term, ChainedPostings<'a>);
fn next(&mut self,) -> Option<(Term, ChainedPostings<'a>)> {
// TODO remove the Vec<u8> allocations
match self.heap.pop() {
@@ -122,6 +125,7 @@ impl<'a> PostingsMerger<'a> {
}
}
pub struct IndexMerger {
schema: Schema,
readers: Vec<SegmentReader>,
@@ -145,20 +149,18 @@ impl DeltaPositionComputer {
self.buffer.resize(positions.len(), 0u32);
}
let mut last_pos = 0u32;
let num_positions = positions.len();
for i in 0..num_positions {
let position = positions[i];
for (i, position) in positions.iter().cloned().enumerate() {
self.buffer[i] = position - last_pos;
last_pos = position;
}
&self.buffer[..num_positions]
&self.buffer[..positions.len()]
}
}
impl IndexMerger {
pub fn open(schema: Schema, segments: &Vec<Segment>) -> Result<IndexMerger> {
pub fn open(schema: Schema, segments: &[Segment]) -> Result<IndexMerger> {
let mut readers = Vec::new();
let mut max_doc = 0;
for segment in segments {
@@ -232,26 +234,21 @@ impl IndexMerger {
}
fn write_postings(&self, postings_serializer: &mut PostingsSerializer) -> Result<()> {
let mut postings_merger = PostingsMerger::new(&self.readers);
let postings_merger = PostingsMerger::new(&self.readers);
let mut delta_position_computer = DeltaPositionComputer::new();
loop {
match postings_merger.next() {
Some((term, mut merged_doc_ids)) => {
try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
while merged_doc_ids.advance() {
let delta_positions: &[u32] = delta_position_computer.compute_delta_positions(merged_doc_ids.positions());
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), delta_positions));
}
try!(postings_serializer.close_term());
}
None => { break; }
for (term, mut merged_doc_ids) in postings_merger {
try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
while merged_doc_ids.advance() {
let delta_positions: &[u32] = delta_position_computer.compute_delta_positions(merged_doc_ids.positions());
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), delta_positions));
}
try!(postings_serializer.close_term());
}
Ok(())
}
fn write_storable_fields(&self, store_writer: &mut StoreWriter) -> Result<()> {
for reader in self.readers.iter() {
for reader in &self.readers {
let store_reader = reader.get_store_reader();
try!(store_writer.stack_reader(store_reader));
}
@@ -284,10 +281,10 @@ mod tests {
#[test]
fn test_index_merger() {
let mut schema_builder = schema::SchemaBuilder::new();
let text_fieldtype = schema::TextOptions::new().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored();
let mut schema_builder = schema::SchemaBuilder::default();
let text_fieldtype = schema::TextOptions::default().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype = schema::U32Options::new().set_fast();
let score_fieldtype = schema::U32Options::default().set_fast();
let score_field = schema_builder.add_u32_field("score", score_fieldtype);
let index = Index::create_in_ram(schema_builder.build());
@@ -296,19 +293,19 @@ mod tests {
{
// writing the segment
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af b");
doc.add_u32(score_field, 3);
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
doc.add_u32(score_field, 5);
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c d");
doc.add_u32(score_field, 7);
index_writer.add_document(doc).unwrap();
@@ -319,13 +316,13 @@ mod tests {
{
// writing the segment
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af b");
doc.add_u32(score_field, 11);
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c g");
doc.add_u32(score_field, 13);
index_writer.add_document(doc).unwrap();
@@ -341,7 +338,7 @@ mod tests {
{
let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| {
let mut collector = TestCollector::new();
let mut collector = TestCollector::default();
let query = MultiTermQuery::from(terms);
assert!(searcher.search(&query, &mut collector).is_ok());
collector.docs()

View File

@@ -40,8 +40,8 @@ fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter {
}
fn posting_from_field_entry<'a>(field_entry: &FieldEntry, heap: &'a Heap) -> Box<PostingsWriter + 'a> {
match field_entry.field_type() {
&FieldType::Str(ref text_options) => {
match *field_entry.field_type() {
FieldType::Str(ref text_options) => {
match text_options.get_indexing_options() {
TextIndexingOptions::TokenizedWithFreq => {
SpecializedPostingsWriter::<TermFrequencyRecorder>::new_boxed(heap)
@@ -54,7 +54,7 @@ fn posting_from_field_entry<'a>(field_entry: &FieldEntry, heap: &'a Heap) -> Box
}
}
}
&FieldType::U32(_) => {
FieldType::U32(_) => {
SpecializedPostingsWriter::<NothingRecorder>::new_boxed(heap)
}
}
@@ -90,7 +90,7 @@ impl<'a> SegmentWriter<'a> {
// enforced by the fact that "self" is moved.
pub fn finalize(mut self,) -> Result<()> {
let segment_info = self.segment_info();
for per_field_postings_writer in self.per_field_postings_writers.iter_mut() {
for per_field_postings_writer in &mut self.per_field_postings_writers {
per_field_postings_writer.close(self.heap);
}
write(&self.per_field_postings_writers,
@@ -112,17 +112,18 @@ impl<'a> SegmentWriter<'a> {
let field_options = schema.get_field_entry(field);
match *field_options.field_type() {
FieldType::Str(ref text_options) => {
let mut num_tokens = 0;
if text_options.get_indexing_options().is_tokenized() {
num_tokens = field_posting_writer.index_text(doc_id, field, &field_values, self.heap);
}
else {
for field_value in field_values {
let term = Term::from_field_text(field, field_value.value().text());
field_posting_writer.suscribe(doc_id, 0, &term, self.heap);
num_tokens += 1u32;
let num_tokens: u32 =
if text_options.get_indexing_options().is_tokenized() {
field_posting_writer.index_text(doc_id, field, &field_values, self.heap)
}
}
else {
let num_field_values = field_values.len() as u32;
for field_value in field_values {
let term = Term::from_field_text(field, field_value.value().text());
field_posting_writer.suscribe(doc_id, 0, &term, self.heap);
}
num_field_values
};
self.fieldnorms_writer
.get_field_writer(field)
.map(|field_norms_writer| {
@@ -141,7 +142,7 @@ impl<'a> SegmentWriter<'a> {
}
self.fieldnorms_writer.fill_val_up_to(doc_id);
self.fast_field_writers.add_document(&doc);
self.fast_field_writers.add_document(doc);
let stored_fieldvalues: Vec<&FieldValue> = doc
.get_fields()
.iter()
@@ -166,7 +167,7 @@ impl<'a> SegmentWriter<'a> {
}
fn write<'a>(per_field_postings_writers: &Vec<Box<PostingsWriter + 'a>>,
fn write<'a>(per_field_postings_writers: &[Box<PostingsWriter + 'a>],
fast_field_writers: &U32FastFieldsWriter,
fieldnorms_writer: &U32FastFieldsWriter,
segment_info: SegmentInfo,

View File

@@ -1,3 +1,8 @@
#![allow(unknown_lints)]
#![allow(module_inception)]
/*!
Tantivy is a search engine library.
@@ -138,7 +143,7 @@ mod tests {
#[test]
fn test_indexing() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_from_tempdir(schema).unwrap();
@@ -146,17 +151,17 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af b");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c d");
index_writer.add_document(doc).unwrap();
}
@@ -167,31 +172,31 @@ mod tests {
#[test]
fn test_docfreq() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
index_writer.add_document(doc).unwrap();
index_writer.commit().unwrap();
}
{
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a a");
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "c");
index_writer.add_document(doc).unwrap();
index_writer.commit().unwrap();
@@ -212,22 +217,22 @@ mod tests {
#[test]
fn test_fieldnorm() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
index_writer.add_document(doc).unwrap();
}
{
let doc = Document::new();
let doc = Document::default();
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b");
index_writer.add_document(doc).unwrap();
}
@@ -246,7 +251,7 @@ mod tests {
#[test]
fn test_termfreq() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -254,7 +259,7 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af af af bc bc");
index_writer.add_document(doc).unwrap();
}
@@ -273,7 +278,7 @@ mod tests {
#[test]
fn test_searcher() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -282,17 +287,17 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af af af b");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c d");
index_writer.add_document(doc).unwrap();
}
@@ -302,7 +307,7 @@ mod tests {
let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| {
let query = MultiTermQuery::from(terms);
let mut collector = TestCollector::new();
let mut collector = TestCollector::default();
assert!(searcher.search(&query, &mut collector).is_ok());
collector.docs()
};
@@ -342,7 +347,7 @@ mod tests {
#[test]
fn test_searcher_2() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -351,17 +356,17 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "af b");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c");
index_writer.add_document(doc).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b c d");
index_writer.add_document(doc).unwrap();
}

View File

@@ -37,7 +37,7 @@ impl<'a> DocSet for ChainedPostings<'a> {
return false;
}
}
return true
true
}
fn doc(&self,) -> DocId {

View File

@@ -28,7 +28,7 @@ fn read_positions(data: &[u8]) -> Vec<u32> {
impl FreqHandler {
pub fn new() -> FreqHandler {
pub fn new_without_freq() -> FreqHandler {
FreqHandler {
freq_decoder: SIMDBlockDecoder::with_val(1u32),
positions: Vec::new(),
@@ -110,7 +110,7 @@ impl FreqHandler {
}
}
#[inline(always)]
#[inline]
pub fn freq(&self, idx: usize)-> u32 {
self.freq_decoder.output(idx)
}

View File

@@ -21,13 +21,13 @@ impl<'a> IntersectionDocSet<'a> {
pub fn new(mut postings: Vec<Box<DocSet + 'a>>) -> IntersectionDocSet<'a> {
let left = postings.pop().unwrap();
let right;
if postings.len() == 1 {
right = postings.pop().unwrap();
}
else {
right = Box::new(IntersectionDocSet::new(postings));
}
let right =
if postings.len() == 1 {
postings.pop().unwrap()
}
else {
Box::new(IntersectionDocSet::new(postings))
};
IntersectionDocSet::from_pair(left, right)
}
}
@@ -74,13 +74,11 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
}
}
#[inline(never)]
pub fn intersection<'a, TDocSet: DocSet + 'a>(postings: Vec<TDocSet>) -> IntersectionDocSet<'a> {
let boxed_postings: Vec<Box<DocSet + 'a>> = postings
.into_iter()
.map(|postings| {
let boxed_p: Box<DocSet + 'a> = Box::new(postings);
boxed_p
.map(|postings: TDocSet| {
Box::new(postings) as Box<DocSet + 'a>
})
.collect();
IntersectionDocSet::new(boxed_postings)

View File

@@ -46,7 +46,7 @@ mod tests {
#[test]
pub fn test_position_write() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -66,7 +66,7 @@ mod tests {
#[test]
pub fn test_position_and_fieldnorm_write_fullstack() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
@@ -75,18 +75,18 @@ mod tests {
{
let mut segment_writer = SegmentWriter::for_segment(&heap, segment.clone(), &schema).unwrap();
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "a b a c a d a a.");
doc.add_text(text_field, "d d d d a"); // checking that position works if the field has two values.
segment_writer.add_document(&doc, &schema).unwrap();
}
{
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "b a");
segment_writer.add_document(&doc, &schema).unwrap();
}
for i in 2..1000 {
let mut doc = Document::new();
let mut doc = Document::default();
let mut text = iter::repeat("e ").take(i).collect::<String>();
text.push_str(" a");
doc.add_text(text_field, &text);

View File

@@ -47,6 +47,9 @@ impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
pub trait HasLen {
fn len(&self,) -> usize;
fn is_empty(&self,) -> bool {
self.len() == 0
}
}
impl<THasLen: HasLen> HasLen for Box<THasLen> {
@@ -56,7 +59,6 @@ impl<THasLen: HasLen> HasLen for Box<THasLen> {
}
}
impl<'a> HasLen for &'a HasLen {
fn len(&self,) -> usize {
let unref: &HasLen = *self;

View File

@@ -17,7 +17,7 @@ pub trait PostingsWriter {
fn serialize(&self, serializer: &mut PostingsSerializer, heap: &Heap) -> io::Result<()>;
fn index_text<'a>(&mut self, doc_id: DocId, field: Field, field_values: &Vec<&'a FieldValue>, heap: &Heap) -> u32 {
fn index_text<'a>(&mut self, doc_id: DocId, field: Field, field_values: &[&'a FieldValue], heap: &Heap) -> u32 {
let mut pos = 0u32;
let mut num_tokens: u32 = 0u32;
let mut term = Term::allocate(field, 100);
@@ -25,16 +25,11 @@ pub trait PostingsWriter {
let mut tokens = SimpleTokenizer.tokenize(field_value.value().text());
// right now num_tokens and pos are redundant, but it should
// change when we get proper analyzers
loop {
match tokens.next() {
Some(token) => {
term.set_text(token);
self.suscribe(doc_id, pos, &term, heap);
pos += 1u32;
num_tokens += 1u32;
},
None => { break; }
}
while let Some(token) = tokens.next() {
term.set_text(token);
self.suscribe(doc_id, pos, &term, heap);
pos += 1u32;
num_tokens += 1u32;
}
pos += 1;
// THIS is to avoid phrase query accross field repetition.
@@ -52,13 +47,13 @@ fn hashmap_size_in_bits(heap_capacity: u32) -> usize {
let num_buckets_usable = heap_capacity / 100;
let hash_table_size = num_buckets_usable * 2;
let mut pow = 512;
for num_bit in 10 .. 32 {
pow = pow << 1;
for num_bits in 10 .. 32 {
pow <<= 1;
if pow > hash_table_size {
return num_bit;
return num_bits;
}
}
return 32
32
}
impl<'a, Rec: Recorder + 'static> SpecializedPostingsWriter<'a, Rec> {
@@ -86,7 +81,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
}
}
#[inline(always)]
#[inline]
fn suscribe(&mut self, doc: DocId, position: u32, term: &Term, heap: &Heap) {
let mut recorder = self.term_index.get_or_create(term);
let current_doc = recorder.current_doc();

View File

@@ -168,31 +168,26 @@ impl Recorder for TFAndPositionRecorder {
fn serialize(&self, self_addr: u32, serializer: &mut PostingsSerializer, heap: &Heap) -> io::Result<()> {
let mut doc_positions = Vec::with_capacity(100);
let mut positions_iter = self.stack.iter(self_addr, heap);
loop {
if let Some(doc) = positions_iter.next() {
let mut prev_position = 0;
doc_positions.clear();
loop {
match positions_iter.next() {
Some(position) => {
if position == POSITION_END {
break;
}
else {
doc_positions.push(position - prev_position);
prev_position = position;
}
while let Some(doc) = positions_iter.next() {
let mut prev_position = 0;
doc_positions.clear();
loop {
match positions_iter.next() {
Some(position) => {
if position == POSITION_END {
break;
}
None => {
panic!("This should never happen. Pleasee report the bug.");
else {
doc_positions.push(position - prev_position);
prev_position = position;
}
}
None => {
panic!("This should never happen. Pleasee report the bug.");
}
}
try!(serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions));
}
else {
break;
}
try!(serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions));
}
Ok(())
}

View File

@@ -25,7 +25,7 @@ impl<'a> SegmentPostings<'a> {
len: 0,
doc_offset: 0,
block_decoder: SIMDBlockDecoder::new(),
freq_handler: FreqHandler::new(),
freq_handler: FreqHandler::new_without_freq(),
remaining_data: &EMPTY_ARRAY,
cur: Wrapping(usize::max_value()),
}
@@ -55,7 +55,6 @@ impl<'a> SegmentPostings<'a> {
}
}
#[inline(always)]
fn index_within_block(&self,) -> usize {
self.cur.0 % NUM_DOCS_PER_BLOCK
}
@@ -67,7 +66,7 @@ impl<'a> DocSet for SegmentPostings<'a> {
// goes to the next element.
// next needs to be called a first time to point to the correct element.
#[inline(always)]
#[inline]
fn advance(&mut self,) -> bool {
self.cur += Wrapping(1);
if self.cur.0 >= self.len {
@@ -76,10 +75,10 @@ impl<'a> DocSet for SegmentPostings<'a> {
if self.index_within_block() == 0 {
self.load_next_block();
}
return true;
true
}
#[inline(always)]
#[inline]
fn doc(&self,) -> DocId {
self.block_decoder.output(self.index_within_block())
}

View File

@@ -62,11 +62,11 @@ impl PostingsSerializer {
pub fn load_indexing_options(&mut self, field: Field) {
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
self.text_indexing_options = match field_entry.field_type() {
&FieldType::Str(ref text_options) => {
self.text_indexing_options = match *field_entry.field_type() {
FieldType::Str(ref text_options) => {
text_options.get_indexing_options()
}
&FieldType::U32(ref u32_options) => {
FieldType::U32(ref u32_options) => {
if u32_options.is_indexed() {
TextIndexingOptions::Unindexed
}

View File

@@ -80,10 +80,10 @@ impl DocSet for VecPostings {
}
self.cursor = Wrapping(start + 1);
if self.cursor.0 < self.doc_ids.len() {
return SkipResult::OverStep;
SkipResult::OverStep
}
else {
return SkipResult::End;
SkipResult::End
}
}
}

View File

@@ -14,8 +14,8 @@ use Score;
struct HeapItem(DocId, u32);
impl PartialOrd for HeapItem {
fn partial_cmp(&self, other:&Self) -> Option<Ordering> {
Some(self.cmp(&other))
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
@@ -35,7 +35,7 @@ impl Filter {
(self.and_mask & ord_set) == self.result
}
fn new(occurs: &Vec<Occur>) -> Filter {
fn new(occurs: &[Occur]) -> Filter {
let mut and_mask = 0u64;
let mut result = 0u64;
for (i, occur) in occurs.iter().enumerate() {
@@ -167,24 +167,19 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for DAATMul
}
}
self.advance_head();
loop {
match self.queue.peek() {
Some(&HeapItem(peek_doc, peek_ord)) => {
if peek_doc != self.doc {
break;
}
else {
let peek_ord: usize = peek_ord as usize;
let peek_tf = self.term_frequencies[peek_ord];
let peek_fieldnorm = self.get_field_norm(peek_ord, peek_doc);
self.similarity.update(peek_ord, peek_tf, peek_fieldnorm);
ord_bitset |= 1 << peek_ord;
}
}
None => { break; }
while let Some(&HeapItem(peek_doc, peek_ord)) = self.queue.peek() {
if peek_doc == self.doc {
let peek_ord: usize = peek_ord as usize;
let peek_tf = self.term_frequencies[peek_ord];
let peek_fieldnorm = self.get_field_norm(peek_ord, peek_doc);
self.similarity.update(peek_ord, peek_tf, peek_fieldnorm);
ord_bitset |= 1 << peek_ord;
}
else {
break;
}
self.advance_head();
}
}
if self.filter.accept(ord_bitset) {
return true;
}

View File

@@ -40,7 +40,7 @@ impl MultiTermQuery {
let num_docs = searcher.num_docs() as f32;
let idfs: Vec<f32> = self.occur_terms
.iter()
.map(|&(_, ref term)| searcher.doc_freq(&term))
.map(|&(_, ref term)| searcher.doc_freq(term))
.map(|doc_freq| {
if doc_freq == 0 {
1.
@@ -73,13 +73,10 @@ impl MultiTermQuery {
let mut decode_timer = timer.open("decode_all");
for &(occur, ref term) in &self.occur_terms {
let _decode_one_timer = decode_timer.open("decode_one");
match reader.read_postings(&term, SegmentPostingsOption::Freq) {
Some(postings) => {
let field = term.get_field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
postings_and_fieldnorms.push((occur, postings, fieldnorm_reader));
}
None => {}
if let Some(postings) = reader.read_postings(term, SegmentPostingsOption::Freq) {
let field = term.get_field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
postings_and_fieldnorms.push((occur, postings, fieldnorm_reader));
}
}
}
@@ -120,7 +117,7 @@ impl Query for MultiTermQuery {
doc_address: &DocAddress) -> Result<Explanation> {
let segment_reader = searcher.segment_reader(doc_address.segment_ord() as usize);
let similitude = SimilarityExplainer::from(self.similitude(searcher));
let mut timer_tree = TimerTree::new();
let mut timer_tree = TimerTree::default();
let mut postings = try!(
self.search_segment(
segment_reader,
@@ -144,7 +141,7 @@ impl Query for MultiTermQuery {
&self,
searcher: &Searcher,
collector: &mut C) -> Result<TimerTree> {
let mut timer_tree = TimerTree::new();
let mut timer_tree = TimerTree::default();
{
let mut search_timer = timer_tree.open("search");
for (segment_ord, segment_reader) in searcher.segment_readers().iter().enumerate() {

View File

@@ -18,7 +18,7 @@ pub struct PhraseQuery {
impl Query for PhraseQuery {
fn search<C: Collector>(&self, searcher: &Searcher, collector: &mut C) -> io::Result<TimerTree> {
let mut timer_tree = TimerTree::new();
let mut timer_tree = TimerTree::default();
{
let mut search_timer = timer_tree.open("search");
for (segment_ord, segment_reader) in searcher.segments().iter().enumerate() {

View File

@@ -30,8 +30,8 @@ pub enum StandardQuery {
impl StandardQuery {
pub fn num_terms(&self,) -> usize {
match self {
&StandardQuery::MultiTerm(ref q) => {
match *self {
StandardQuery::MultiTerm(ref q) => {
q.num_terms()
}
}
@@ -51,8 +51,8 @@ impl Query for StandardQuery {
&self,
searcher: &Searcher,
doc_address: &DocAddress) -> tantivy_Error<Explanation> {
match self {
&StandardQuery::MultiTerm(ref q) => q.explain(searcher, doc_address)
match *self {
StandardQuery::MultiTerm(ref q) => q.explain(searcher, doc_address)
}
}
}
@@ -62,13 +62,8 @@ fn compute_terms(field: Field, text: &str) -> Vec<Term> {
let tokenizer = SimpleTokenizer::new();
let mut tokens = Vec::new();
let mut token_it = tokenizer.tokenize(text);
loop {
match token_it.next() {
Some(token_str) => {
tokens.push(Term::from_field_text(field, token_str));
}
None => { break; }
}
while let Some(token_str) = token_it.next() {
tokens.push(Term::from_field_text(field, token_str));
}
tokens
}
@@ -86,11 +81,11 @@ impl QueryParser {
fn transform_field_and_value(&self, field: Field, val: &str) -> Result<Vec<Term>, ParsingError> {
let field_entry = self.schema.get_field_entry(field);
Ok(match field_entry.field_type() {
&FieldType::Str(_) => {
Ok(match *field_entry.field_type() {
FieldType::Str(_) => {
compute_terms(field, val)
},
&FieldType::U32(_) => {
FieldType::U32(_) => {
let u32_parsed: u32 = try!(val
.parse::<u32>()
.map_err(|_| {
@@ -282,7 +277,7 @@ mod tests {
#[test]
pub fn test_query_parser() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", STRING);
let title_field = schema_builder.add_text_field("title", STRING);
let author_field = schema_builder.add_text_field("author", STRING);

View File

@@ -4,5 +4,5 @@ use query::MultiTermAccumulator;
pub trait Similarity: MultiTermAccumulator {
fn score(&self, ) -> Score;
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation;
fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation;
}

View File

@@ -42,7 +42,7 @@ impl<TSimilarity: Similarity + Sized> Similarity for SimilarityExplainer<TSimila
self.scorer.score()
}
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation {
fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation {
self.scorer.explain(vals)
}
}

View File

@@ -14,14 +14,14 @@ pub struct TfIdf {
impl MultiTermAccumulator for TfIdf {
#[inline(always)]
#[inline]
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32) {
assert!(term_freq != 0u32);
self.score += self.term_score(term_ord, term_freq, fieldnorm);
self.num_fields += 1;
}
#[inline(always)]
#[inline]
fn clear(&mut self,) {
self.score = 0f32;
self.num_fields = 0;
@@ -39,7 +39,6 @@ impl TfIdf {
}
}
#[inline(always)]
fn coord(&self,) -> f32 {
self.coords[self.num_fields]
}
@@ -49,25 +48,25 @@ impl TfIdf {
}
fn term_name(&self, ord: usize) -> String {
match &self.term_names {
&Some(ref term_names_vec) => term_names_vec[ord].clone(),
&None => format!("Field({})", ord)
match self.term_names {
Some(ref term_names_vec) => term_names_vec[ord].clone(),
None => format!("Field({})", ord)
}
}
#[inline(always)]
#[inline]
fn term_score(&self, term_ord: usize, term_freq: u32, field_norm: u32) -> f32 {
(term_freq as f32 / field_norm as f32).sqrt() * self.idf[term_ord]
}
}
impl Similarity for TfIdf {
#[inline(always)]
#[inline]
fn score(&self, ) -> Score {
self.score * self.coord()
}
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation {
fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation {
let score = self.score();
let mut explanation = Explanation::with_val(score);
let formula_components: Vec<String> = vals.iter()
@@ -76,7 +75,7 @@ impl Similarity for TfIdf {
.collect();
let formula = format!("<coord> * ({})", formula_components.join(" + "));
explanation.set_formula(&formula);
for &(ord, term_freq, field_norm) in vals.iter() {
for &(ord, term_freq, field_norm) in vals {
let term_score = self.term_score(ord, term_freq, field_norm);
let term_explanation = explanation.add_child(&self.term_name(ord), term_score);
term_explanation.set_formula(" sqrt(<term_freq> / <field_norm>) * <idf>");

View File

@@ -31,18 +31,17 @@ impl PartialEq for Document {
impl Eq for Document {}
impl Document {
pub fn new() -> Document {
Document {
field_values: Vec::new(),
}
}
/// Returns the number of `(field, value)` pairs.
pub fn len(&self,) -> usize {
self.field_values.len()
}
/// Returns true iff the document contains no fields.
pub fn is_empty(&self,) -> bool {
self.field_values.is_empty()
}
/// Add a text field.
pub fn add_text(&mut self, field: Field, text: &str) {
self.add(FieldValue {
@@ -70,18 +69,17 @@ impl Document {
pub fn get_sorted_fields(&self) -> Vec<(Field, Vec<&FieldValue>)> {
let mut field_values: Vec<&FieldValue> = self.get_fields().iter().collect();
field_values.sort_by_key(|field_value| field_value.field());
let sorted_fields: Vec<(Field, Vec<&FieldValue>)> = field_values
field_values
.into_iter()
.group_by(|field_value| field_value.field())
.into_iter()
.map(|(key, group)| {
(key, group.into_iter().collect())
})
.collect();
sorted_fields
}
.collect::<Vec<(Field, Vec<&FieldValue>)>>()
}
pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a Value> {
pub fn get_all(&self, field: Field) -> Vec<&Value> {
self.field_values
.iter()
.filter(|field_value| field_value.field() == field)
@@ -89,7 +87,7 @@ impl Document {
.collect()
}
pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a Value> {
pub fn get_first(&self, field: Field) -> Option<&Value> {
self.field_values
.iter()
.filter(|field_value| field_value.field() == field)
@@ -98,6 +96,15 @@ impl Document {
}
}
impl Default for Document {
fn default() -> Document {
Document {
field_values: Vec::new(),
}
}
}
impl From<Vec<FieldValue>> for Document {
fn from(field_values: Vec<FieldValue>) -> Document {
Document {
@@ -114,9 +121,9 @@ mod tests {
#[test]
fn test_doc() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("title", TEXT);
let mut doc = Document::new();
let mut doc = Document::default();
doc.add_text(text_field, "My title");
assert_eq!(doc.get_fields().len(), 1);
}

View File

@@ -16,20 +16,20 @@ pub enum FieldType {
impl FieldType {
pub fn value_from_json(&self, json: &Json) -> Result<Value, ValueParsingError> {
match json {
&Json::String(ref field_text) => {
match self {
&FieldType::Str(_) => {
match *json {
Json::String(ref field_text) => {
match *self {
FieldType::Str(_) => {
Ok(Value::Str(field_text.clone()))
}
&FieldType::U32(_) => {
FieldType::U32(_) => {
Err(ValueParsingError::TypeError(format!("Expected a u32 int, got {:?}", json)))
}
}
}
&Json::U64(ref field_val_u64) => {
match self {
&FieldType::U32(_) => {
Json::U64(ref field_val_u64) => {
match *self {
FieldType::U32(_) => {
if *field_val_u64 > (u32::max_value() as u64) {
Err(ValueParsingError::OverflowError(format!("Expected u32, but value {:?} overflows.", field_val_u64)))
}

View File

@@ -27,8 +27,8 @@ directory.
```
use tantivy::schema::*;
let mut schema_builder = SchemaBuilder::new();
let title_options = TextOptions::new()
let mut schema_builder = SchemaBuilder::default();
let title_options = TextOptions::default()
.set_stored()
.set_indexing_options(TextIndexingOptions::TokenizedWithFreqAndPosition);
schema_builder.add_text_field("title_options", title_options);
@@ -57,7 +57,7 @@ The example can be rewritten :
```
use tantivy::schema::*;
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
schema_builder.add_text_field("title_options", TEXT | STORED);
let schema = schema_builder.build();
```
@@ -70,8 +70,8 @@ let schema = schema_builder.build();
```
use tantivy::schema::*;
let mut schema_builder = SchemaBuilder::new();
let num_stars_options = U32Options::new()
let mut schema_builder = SchemaBuilder::default();
let num_stars_options = U32Options::default()
.set_stored()
.set_indexed();
schema_builder.add_u32_field("num_stars", num_stars_options);

View File

@@ -14,9 +14,6 @@ use std::fmt;
/// Tantivy has a very strict schema.
/// You need to specify in advance, whether a field is indexed or not,
/// stored or not, and RAM-based or not.
@@ -30,7 +27,7 @@ use std::fmt;
/// ```
/// use tantivy::schema::*;
///
/// let mut schema_builder = SchemaBuilder::new();
/// let mut schema_builder = SchemaBuilder::default();
/// let id_field = schema_builder.add_text_field("id", STRING);
/// let title_field = schema_builder.add_text_field("title", TEXT);
/// let body_field = schema_builder.add_text_field("body", TEXT);
@@ -39,19 +36,11 @@ use std::fmt;
/// ```
pub struct SchemaBuilder {
fields: Vec<FieldEntry>,
fields_map: HashMap<String, Field
>, // transient
fields_map: HashMap<String, Field>,
}
impl SchemaBuilder {
pub fn new() -> SchemaBuilder {
SchemaBuilder {
fields: Vec::new(),
fields_map: HashMap::new(),
}
}
/// Adds a new u32 field.
/// Returns the associated field handle
@@ -108,6 +97,15 @@ impl SchemaBuilder {
}
impl Default for SchemaBuilder {
fn default() -> SchemaBuilder {
SchemaBuilder {
fields: Vec::new(),
fields_map: HashMap::new(),
}
}
}
#[derive(Debug)]
struct InnerSchema {
fields: Vec<FieldEntry>,
@@ -129,7 +127,7 @@ struct InnerSchema {
/// ```
/// use tantivy::schema::*;
///
/// let mut schema_builder = SchemaBuilder::new();
/// let mut schema_builder = SchemaBuilder::default();
/// let id_field = schema_builder.add_text_field("id", STRING);
/// let title_field = schema_builder.add_text_field("title", TEXT);
/// let body_field = schema_builder.add_text_field("body", TEXT);
@@ -163,7 +161,7 @@ impl Schema {
/// If panicking is not an option for you,
/// you may use `get(&self, field_name: &str)`.
pub fn get_field(&self, field_name: &str) -> Option<Field> {
self.0.fields_map.get(field_name).map(|field| field.clone())
self.0.fields_map.get(field_name).cloned()
}
pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
@@ -190,24 +188,24 @@ impl Schema {
let json_node = try!(Json::from_str(doc_json));
let some_json_obj = json_node.as_object();
if !some_json_obj.is_some() {
let doc_json_sample: String;
if doc_json.len() < 20 {
doc_json_sample = String::from(doc_json);
}
else {
doc_json_sample = format!("{:?}...", &doc_json[0..20]);
}
let doc_json_sample: String =
if doc_json.len() < 20 {
String::from(doc_json)
}
else {
format!("{:?}...", &doc_json[0..20])
};
return Err(DocParsingError::NotJSONObject(doc_json_sample))
}
let json_obj = some_json_obj.unwrap();
let mut doc = Document::new();
let mut doc = Document::default();
for (field_name, json_value) in json_obj.iter() {
match self.get_field(field_name) {
Some(field) => {
let field_entry = self.get_field_entry(field);
let field_type = field_entry.field_type();
match json_value {
&Json::Array(ref json_items) => {
match *json_value {
Json::Array(ref json_items) => {
for json_item in json_items {
let value = try!(
field_type
@@ -251,7 +249,7 @@ impl fmt::Debug for Schema {
impl Decodable for Schema {
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
try!(d.read_seq(|d, num_fields| {
for _ in 0..num_fields {
let field_entry = try!(FieldEntry::decode(d));
@@ -312,8 +310,8 @@ mod tests {
#[test]
pub fn test_schema_serialization() {
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
let mut schema_builder = SchemaBuilder::default();
let count_options = U32Options::default().set_stored().set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u32_field("count", count_options);
@@ -355,8 +353,8 @@ mod tests {
#[test]
pub fn test_document_to_json() {
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
let mut schema_builder = SchemaBuilder::default();
let count_options = U32Options::default().set_stored().set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u32_field("count", count_options);
@@ -373,8 +371,8 @@ mod tests {
#[test]
pub fn test_parse_document() {
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
let mut schema_builder = SchemaBuilder::default();
let count_options = U32Options::default().set_stored().set_fast();
let title_field = schema_builder.add_text_field("title", TEXT);
let author_field = schema_builder.add_text_field("author", STRING);
let count_field = schema_builder.add_u32_field("count", count_options);

View File

@@ -83,7 +83,7 @@ mod tests {
#[test]
pub fn test_term() {
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
schema_builder.add_text_field("text", STRING);
let title_field = schema_builder.add_text_field("title", STRING);
let count_field = schema_builder.add_text_field("count", STRING);

View File

@@ -30,7 +30,10 @@ impl TextOptions {
self
}
pub fn new() -> TextOptions {
}
impl Default for TextOptions {
fn default() -> TextOptions {
TextOptions {
indexing: TextIndexingOptions::Unindexed,
stored: false,
@@ -90,17 +93,16 @@ impl Decodable for TextIndexingOptions {
impl TextIndexingOptions {
pub fn is_termfreq_enabled(&self) -> bool {
match *self {
TextIndexingOptions::TokenizedWithFreq => true,
TextIndexingOptions::TokenizedWithFreqAndPosition => true,
TextIndexingOptions::TokenizedWithFreq | TextIndexingOptions::TokenizedWithFreqAndPosition => true,
_ => false,
}
}
pub fn is_tokenized(&self,) -> bool {
match *self {
TextIndexingOptions::TokenizedNoFreq => true,
TextIndexingOptions::TokenizedWithFreq => true,
TextIndexingOptions::TokenizedWithFreqAndPosition => true,
TextIndexingOptions::TokenizedNoFreq
| TextIndexingOptions::TokenizedWithFreq
| TextIndexingOptions::TokenizedWithFreqAndPosition=> true,
_ => false,
}
}
@@ -129,10 +131,7 @@ impl BitOr for TextIndexingOptions {
if self == Unindexed {
other
}
else if other == Unindexed {
self
}
else if self == other {
else if other == Unindexed || self == other {
self
}
else {
@@ -156,7 +155,7 @@ pub const TEXT: TextOptions = TextOptions {
stored: false,
};
/// A stored fields of a document can be retrieved given its DocId.
/// A stored fields of a document can be retrieved given its `DocId`.
/// Stored field are stored together and LZ4 compressed.
/// Reading the stored fields of a document is relatively slow.
/// (100 microsecs)
@@ -171,7 +170,7 @@ impl BitOr for TextOptions {
type Output = TextOptions;
fn bitor(self, other: TextOptions) -> TextOptions {
let mut res = TextOptions::new();
let mut res = TextOptions::default();
res.indexing = self.indexing | other.indexing;
res.stored = self.stored || other.stored;
res
@@ -191,7 +190,7 @@ mod tests {
assert!(field_options.get_indexing_options().is_tokenized());
}
{
let mut schema_builder = SchemaBuilder::new();
let mut schema_builder = SchemaBuilder::default();
schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build();
let field = schema.get_field("body").unwrap();

View File

@@ -6,15 +6,7 @@ pub struct U32Options {
}
impl U32Options {
pub fn new() -> U32Options {
U32Options {
fast: false,
indexed: false,
stored: false,
}
}
pub fn is_stored(&self,) -> bool {
self.stored
}
@@ -43,6 +35,17 @@ impl U32Options {
}
}
impl Default for U32Options {
fn default() -> U32Options {
U32Options {
fast: false,
indexed: false,
stored: false,
}
}
}
/// The field will be tokenized and indexed
pub const FAST: U32Options = U32Options {
indexed: false,

View File

@@ -20,9 +20,9 @@ mod tests {
use directory::{RAMDirectory, Directory, MmapDirectory, WritePtr};
fn write_lorem_ipsum_store(writer: WritePtr) -> Schema {
let mut schema_builder = SchemaBuilder::new();
let field_body = schema_builder.add_text_field("body", TextOptions::new().set_stored());
let field_title = schema_builder.add_text_field("title", TextOptions::new().set_stored());
let mut schema_builder = SchemaBuilder::default();
let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored());
let field_title = schema_builder.add_text_field("title", TextOptions::default().set_stored());
let schema = schema_builder.build();
let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
{

View File

@@ -46,7 +46,7 @@ impl StoreWriter {
}
pub fn stack_reader(&mut self, reader: &StoreReader) -> io::Result<()> {
if self.current_block.len() > 0 {
if !self.current_block.is_empty() {
try!(self.write_and_compress_block());
}
match reader.offsets.last() {
@@ -65,7 +65,7 @@ impl StoreWriter {
}
}
pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> {
pub fn store<'a>(&mut self, field_values: &[&'a FieldValue]) -> io::Result<()> {
self.intermediary_buffer.clear();
try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer));
for field_value in field_values {
@@ -98,7 +98,7 @@ impl StoreWriter {
}
pub fn close(&mut self,) -> io::Result<()> {
if self.current_block.len() > 0 {
if !self.current_block.is_empty() {
try!(self.write_and_compress_block());
}
let header_offset: u64 = self.written;