From b93fa39910035da3fbae600a22337a89037c835c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 19 Jun 2016 22:47:43 +0900 Subject: [PATCH] Added test for field entry json ser/deser --- src/core/merger.rs | 22 ++++++------ src/core/mod.rs | 4 ++- src/core/searcher.rs | 2 +- src/core/{reader.rs => segment_reader.rs} | 42 +++-------------------- src/fastfield/mod.rs | 32 ++++++++--------- src/lib.rs | 20 +++++------ src/query/multi_term_query.rs | 2 +- src/schema/document.rs | 25 ++++++++++++-- src/schema/field_entry.rs | 20 ++++++++++- src/schema/schema.rs | 2 -- 10 files changed, 89 insertions(+), 82 deletions(-) rename src/core/{reader.rs => segment_reader.rs} (81%) diff --git a/src/core/merger.rs b/src/core/merger.rs index e67b2153b..9a4071a35 100644 --- a/src/core/merger.rs +++ b/src/core/merger.rs @@ -1,5 +1,5 @@ use std::io; -use core::reader::SegmentReader; +use core::SegmentReader; use core::index::Segment; use DocId; use core::index::SerializableSegment; @@ -241,20 +241,20 @@ mod tests { let mut index_writer = index.writer_with_num_threads(1).unwrap(); { let mut doc = Document::new(); - doc.add_text(&text_field, "af b"); - doc.add_u32(&score_field, 3); + doc.add_text(text_field, "af b"); + doc.add_u32(score_field, 3); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c"); - doc.add_u32(&score_field, 5); + doc.add_text(text_field, "a b c"); + doc.add_u32(score_field, 5); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c d"); - doc.add_u32(&score_field, 7); + doc.add_text(text_field, "a b c d"); + doc.add_u32(score_field, 7); index_writer.add_document(doc).unwrap(); } index_writer.wait().unwrap(); @@ -265,14 +265,14 @@ mod tests { let mut index_writer = index.writer_with_num_threads(1).unwrap(); { let mut doc = Document::new(); - doc.add_text(&text_field, "af b"); - doc.add_u32(&score_field, 11); + doc.add_text(text_field, "af b"); + doc.add_u32(score_field, 11); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c g"); - doc.add_u32(&score_field, 13); + doc.add_text(text_field, "a b c g"); + doc.add_u32(score_field, 13); index_writer.add_document(doc).unwrap(); } index_writer.wait().unwrap(); diff --git a/src/core/mod.rs b/src/core/mod.rs index b97d15b35..3bf65f2c8 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,13 +1,15 @@ pub mod writer; -pub mod reader; pub mod codec; pub mod searcher; pub mod index; pub mod merger; +mod segment_reader; use std::error; use std::io; +pub use self::segment_reader::SegmentReader; + pub fn convert_to_ioerror(err: E) -> io::Error { io::Error::new( io::ErrorKind::InvalidData, diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 84d245ed3..021e77e82 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -1,4 +1,4 @@ -use core::reader::SegmentReader; +use core::SegmentReader; use core::index::Index; use core::index::Segment; use DocId; diff --git a/src/core/reader.rs b/src/core/segment_reader.rs similarity index 81% rename from src/core/reader.rs rename to src/core/segment_reader.rs index 5e0795bf3..a252d6cac 100644 --- a/src/core/reader.rs +++ b/src/core/segment_reader.rs @@ -22,11 +22,6 @@ use postings::intersection; use schema::FieldEntry; use schema::Schema; -impl fmt::Debug for SegmentReader { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "SegmentReader({:?})", self.segment_id) - } -} pub struct SegmentReader { @@ -153,35 +148,8 @@ impl SegmentReader { } } - -// impl SerializableSegment for SegmentReader { -// -// fn write_postings(&self, mut serializer: PostingsSerializer) -> io::Result<()> { -// let mut term_infos_it = self.term_infos.stream(); -// loop { -// match term_infos_it.next() { -// Some((term_data, term_info)) => { -// let term = Term::from(term_data); -// try!(serializer.new_term(&term, term_info.doc_freq)); -// let segment_postings = self.read_postings(term_info.postings_offset); -// try!(serializer.write_docs(&segment_postings.doc_ids[..])); -// }, -// None => { break; } -// } -// } -// Ok(()) -// } -// -// fn write_store(&self, ) -// -// fn write(&self, mut serializer: SegmentSerializer) -> io::Result<()> { -// try!(self.write_postings(serializer.get_postings_serializer())); -// try!(self.write_store(serializer.get_store_serializer())); -// -// for doc_id in 0..self.max_doc() { -// let doc = try!(self.store_reader.get(&doc_id)); -// try!(serializer.store_doc(&mut doc.text_fields())); -// } -// serializer.close() -// } -// } +impl fmt::Debug for SegmentReader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SegmentReader({:?})", self.segment_id) + } +} diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 88cd3e311..c33a2e764 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -45,7 +45,7 @@ mod tests { assert_eq!(compute_num_bits(256), 9u8); } - fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: &Field, value: u32) { + fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: Field, value: u32) { let mut doc = Document::new(); doc.add_u32(field, value); fast_field_writers.add_document(&doc); @@ -61,9 +61,9 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); - add_single_field_doc(&mut fast_field_writers, &field, 13u32); - add_single_field_doc(&mut fast_field_writers, &field, 14u32); - add_single_field_doc(&mut fast_field_writers, &field, 2u32); + add_single_field_doc(&mut fast_field_writers, field, 13u32); + add_single_field_doc(&mut fast_field_writers, field, 14u32); + add_single_field_doc(&mut fast_field_writers, field, 2u32); fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } @@ -90,15 +90,15 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); - add_single_field_doc(&mut fast_field_writers, &field, 4u32); - add_single_field_doc(&mut fast_field_writers, &field, 14_082_001u32); - add_single_field_doc(&mut fast_field_writers, &field, 3_052u32); - add_single_field_doc(&mut fast_field_writers, &field, 9002u32); - add_single_field_doc(&mut fast_field_writers, &field, 15_001u32); - add_single_field_doc(&mut fast_field_writers, &field, 777u32); - add_single_field_doc(&mut fast_field_writers, &field, 1_002u32); - add_single_field_doc(&mut fast_field_writers, &field, 1_501u32); - add_single_field_doc(&mut fast_field_writers, &field, 215u32); + add_single_field_doc(&mut fast_field_writers, field, 4u32); + add_single_field_doc(&mut fast_field_writers, field, 14_082_001u32); + add_single_field_doc(&mut fast_field_writers, field, 3_052u32); + add_single_field_doc(&mut fast_field_writers, field, 9002u32); + add_single_field_doc(&mut fast_field_writers, field, 15_001u32); + add_single_field_doc(&mut fast_field_writers, field, 777u32); + add_single_field_doc(&mut fast_field_writers, field, 1_002u32); + add_single_field_doc(&mut fast_field_writers, field, 1_501u32); + add_single_field_doc(&mut fast_field_writers, field, 215u32); fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } @@ -142,7 +142,7 @@ mod tests { let mut serializer = FastFieldSerializer::new(write).unwrap(); let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); for x in permutation.iter() { - add_single_field_doc(&mut fast_field_writers, &field, x.clone()); + add_single_field_doc(&mut fast_field_writers, field, x.clone()); } fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); @@ -197,7 +197,7 @@ mod tests { let mut serializer = FastFieldSerializer::new(write).unwrap(); let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); for x in permutation.iter() { - add_single_field_doc(&mut fast_field_writers, &field, x.clone()); + add_single_field_doc(&mut fast_field_writers, field, x.clone()); } fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); @@ -229,7 +229,7 @@ mod tests { let mut serializer = FastFieldSerializer::new(write).unwrap(); let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); for x in permutation.iter() { - add_single_field_doc(&mut fast_field_writers, &field, x.clone()); + add_single_field_doc(&mut fast_field_writers, field, x.clone()); } fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 3ba8aff32..53f80c517 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,7 +47,7 @@ pub use core::searcher::Searcher; pub use core::index::Index; pub use schema::Term; pub use schema::Document; -pub use core::reader::SegmentReader; +pub use core::SegmentReader; pub use core::searcher::SegmentLocalId; pub use self::common::TimerTree; @@ -74,17 +74,17 @@ mod tests { let mut index_writer = index.writer_with_num_threads(1).unwrap(); { let mut doc = Document::new(); - doc.add_text(&text_field, "af b"); + doc.add_text(text_field, "af b"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c"); + doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c d"); + doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } assert!(index_writer.wait().is_ok()); @@ -108,17 +108,17 @@ mod tests { let mut index_writer = index.writer_with_num_threads(1).unwrap(); { let mut doc = Document::new(); - doc.add_text(&text_field, "af b"); + doc.add_text(text_field, "af b"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c"); + doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c d"); + doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } index_writer.wait().unwrap(); @@ -176,17 +176,17 @@ mod tests { let mut index_writer = index.writer_with_num_threads(1).unwrap(); { let mut doc = Document::new(); - doc.add_text(&text_field, "af b"); + doc.add_text(text_field, "af b"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c"); + doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { let mut doc = Document::new(); - doc.add_text(&text_field, "a b c d"); + doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } index_writer.wait().unwrap(); diff --git a/src/query/multi_term_query.rs b/src/query/multi_term_query.rs index 36a673755..1a7d83f84 100644 --- a/src/query/multi_term_query.rs +++ b/src/query/multi_term_query.rs @@ -6,7 +6,7 @@ use std::io; use core::searcher::Searcher; use collector::Collector; use core::searcher::SegmentLocalId; -use core::reader::SegmentReader; +use core::SegmentReader; use postings::Postings; use postings::SegmentPostings; use postings::intersection; diff --git a/src/schema/document.rs b/src/schema/document.rs index c70e43ef2..dddcb39d0 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -21,11 +21,11 @@ impl Document { self.field_values.len() } - pub fn add_text(&mut self, field: &Field, text: &str) { + pub fn add_text(&mut self, field: Field, text: &str) { self.add(FieldValue::Text(field.clone(), String::from(text))); } - pub fn add_u32(&mut self, field: &Field, value: u32) { + pub fn add_u32(&mut self, field: Field, value: u32) { self.add(FieldValue::U32(field.clone(), value)); } @@ -59,3 +59,24 @@ impl From> for Document { } } } + + +#[cfg(test)] +mod tests { + + + + use super::*; + use schema::Schema; + use schema::TEXT; + + #[test] + fn test_doc() { + let mut schema = Schema::new(); + let text_field = schema.add_text_field("title", TEXT); + let mut doc = Document::new(); + doc.add_text(text_field, "My title"); + assert_eq!(doc.get_fields().len(), 1); + } + +} \ No newline at end of file diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 2253c4113..86bdb7e5b 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -11,6 +11,7 @@ pub enum FieldEntry { } impl FieldEntry { + pub fn get_field_name(&self,) -> &str { match self { &FieldEntry::Text(ref field_name, _) => { @@ -44,4 +45,21 @@ impl FieldEntry { } } } -} \ No newline at end of file +} + +// TODO implement a nicer JSON format + +#[cfg(test)] +mod tests { + + use super::*; + use schema::TEXT; + use rustc_serialize::json; + + #[test] + fn test_json_serialization() { + let field_value = FieldEntry::Text(String::from("title"), TEXT); + assert_eq!(format!("{}", json::as_json(&field_value)), + "{\"variant\":\"Text\",\"fields\":[\"title\",{\"indexing_options\":\"TokenizedWithFreqAndPosition\",\"stored\":false,\"fast\":false}]}" ); + } +} \ No newline at end of file diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 90ba3b594..9b86b745c 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -131,6 +131,4 @@ impl Schema { field } - - }