diff --git a/src/cli/commands/serve.rs b/src/cli/commands/serve.rs index a78eaec7d..809ee4c58 100644 --- a/src/cli/commands/serve.rs +++ b/src/cli/commands/serve.rs @@ -6,6 +6,7 @@ use iron::typemap::Key; use mount::Mount; use persistent::Read; use rustc_serialize::json::as_pretty_json; +use rustc_serialize::json::Json; use staticfile::Static; use std::convert::From; use std::path::Path; @@ -20,8 +21,9 @@ use tantivy::query::Explanation; use tantivy::query::Query; use tantivy::query::QueryParser; use tantivy::Result; -use tantivy::schema::Field; +use tantivy::schema::{Field, Schema}; use tantivy::Score; +use tantivy::schema::NamedFieldDocument; use urlencoded::UrlEncodedQuery; @@ -34,7 +36,7 @@ pub fn run_serve_cli(matches: &ArgMatches) -> tantivy::Result<()> { } -#[derive(RustcDecodable, RustcEncodable)] +#[derive(RustcEncodable)] struct Serp { q: String, num_hits: usize, @@ -42,15 +44,14 @@ struct Serp { timings: Vec, } -#[derive(RustcDecodable, RustcEncodable)] +#[derive(RustcEncodable)] struct Hit { - title: String, - body: String, + doc: NamedFieldDocument, explain: String, score: Score, } -#[derive(RustcDecodable, RustcEncodable)] +#[derive(RustcEncodable)] struct Timing { name: String, duration: i64, @@ -59,8 +60,7 @@ struct Timing { struct IndexServer { index: Index, query_parser: QueryParser, - body_field: Field, - title_field: Field, + schema: Schema, } impl IndexServer { @@ -70,19 +70,17 @@ impl IndexServer { let schema = index.schema(); let body_field = schema.get_field("body").unwrap(); let title_field = schema.get_field("title").unwrap(); - let query_parser = QueryParser::new(schema, vec!(body_field, title_field)); + let query_parser = QueryParser::new(schema.clone(), vec!(body_field, title_field)); IndexServer { index: index, query_parser: query_parser, - title_field: title_field, - body_field: body_field, + schema: schema, } } fn create_hit(&self, doc: &Document, explain: Explanation) -> Hit { Hit { - title: String::from(doc.get_first(self.title_field).unwrap().text()), - body: String::from(doc.get_first(self.body_field).unwrap().text().clone()), + doc: self.index.schema().to_named_doc(&doc), explain: format!("{:?}", explain), score: explain.val(), } diff --git a/src/core/segment_writer.rs b/src/core/segment_writer.rs index 7cfa189af..40a7fe824 100644 --- a/src/core/segment_writer.rs +++ b/src/core/segment_writer.rs @@ -112,7 +112,7 @@ impl SegmentWriter { let mut num_tokens: usize = 0; for field_value in field_values { if text_options.get_indexing_options().is_tokenized() { - let mut tokens = self.tokenizer.tokenize(field_value.text()); + let mut tokens = self.tokenizer.tokenize(field_value.value().text()); // right now num_tokens and pos are redundant, but it should // change when we get proper analyzers let field = field_value.field(); @@ -129,7 +129,7 @@ impl SegmentWriter { } } else { - let term = Term::from_field_text(field, field_value.text()); + let term = Term::from_field_text(field, field_value.value().text()); field_posting_writers.suscribe(doc_id, 0, term); } pos += 1; @@ -145,7 +145,7 @@ impl SegmentWriter { FieldType::U32(ref u32_options) => { if u32_options.is_indexed() { for field_value in field_values { - let term = Term::from_field_u32(field_value.field(), field_value.u32_value()); + let term = Term::from_field_u32(field_value.field(), field_value.value().u32_value()); field_posting_writers.suscribe(doc_id, 0, term); } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 3ec43c42b..e1837fa9b 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,6 +1,7 @@ -use schema::{Schema, FieldValue, Field, Document}; +use schema::{Schema, Field, Document}; use fastfield::FastFieldSerializer; use std::io; +use schema::Value; use DocId; pub struct U32FastFieldsWriter { @@ -91,10 +92,10 @@ impl U32FastFieldWriter { fn extract_val(&self, doc: &Document) -> u32 { match doc.get_first(self.field) { - Some(field_value) => { - match field_value { - &FieldValue::U32(_, val) => { return val; } - _ => { panic!("Expected a u32field, got {:?} ", field_value) } + Some(v) => { + match *v { + Value::U32(ref val) => { return *val; } + _ => { panic!("Expected a u32field, got {:?} ", v) } } }, None => { diff --git a/src/schema/document.rs b/src/schema/document.rs index 45be5f653..895a0c9c5 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -32,18 +32,24 @@ impl Document { /// Add a text field. pub fn add_text(&mut self, field: Field, text: &str) { - self.add(FieldValue::Text(field.clone(), String::from(text))); + self.add(FieldValue { + field: field, + value: Value::Str(String::from(text)), + }); } /// Add a u32 field pub fn add_u32(&mut self, field: Field, value: u32) { - self.add(FieldValue::U32(field.clone(), value)); + self.add(FieldValue { + field: field, + value: Value::U32(value), + }); } pub fn add(&mut self, field_value: FieldValue) { self.field_values.push(field_value); } - + pub fn get_fields(&self) -> &Vec { &self.field_values } @@ -62,17 +68,19 @@ impl Document { sorted_fields } - pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a FieldValue> { + pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a Value> { self.field_values .iter() .filter(|field_value| field_value.field() == field) + .map(|field_value| field_value.value()) .collect() } - pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a FieldValue> { + pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a Value> { self.field_values .iter() .filter(|field_value| field_value.field() == field) + .map(|field_value| field_value.value()) .next() } } diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 9e5cf14f0..fdc66dee5 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -19,6 +19,58 @@ pub struct FieldEntry { field_type: FieldType, } +impl FieldEntry { + + pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry { + FieldEntry { + name: field_name, + field_type: FieldType::Text(field_type), + } + } + + pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry { + FieldEntry { + name: field_name, + field_type: FieldType::U32(field_type), + } + } + + pub fn name(&self,) -> &String { + &self.name + } + + pub fn field_type(&self,) -> &FieldType { + &self.field_type + } + + pub fn is_indexed(&self,) -> bool { + match self.field_type { + FieldType::Text(ref options) => options.get_indexing_options().is_indexed(), + _ => false, // TODO handle u32 indexed + } + } + + pub fn is_u32_fast(&self,) -> bool { + match self.field_type { + FieldType::U32(ref options) => options.is_fast(), + _ => false, + } + } + + pub fn is_stored(&self,) -> bool { + match self.field_type { + FieldType::U32(ref options) => { + options.is_stored() + } + FieldType::Text(ref options) => { + options.is_stored() + } + } + } +} + + + impl Encodable for FieldEntry { fn encode(&self, s: &mut S) -> Result<(), S::Error> { s.emit_struct("field_entry", 3, |s| { @@ -77,57 +129,6 @@ impl Decodable for FieldEntry { } } -impl FieldEntry { - - pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry { - FieldEntry { - name: field_name, - field_type: FieldType::Text(field_type), - } - } - - pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry { - FieldEntry { - name: field_name, - field_type: FieldType::U32(field_type), - } - } - - pub fn name(&self,) -> &String { - &self.name - } - - pub fn field_type(&self,) -> &FieldType { - &self.field_type - } - - pub fn is_indexed(&self,) -> bool { - match self.field_type { - FieldType::Text(ref options) => options.get_indexing_options().is_indexed(), - _ => false, // TODO handle u32 indexed - } - } - - pub fn is_u32_fast(&self,) -> bool { - match self.field_type { - FieldType::U32(ref options) => options.is_fast(), - _ => false, - } - } - - pub fn is_stored(&self,) -> bool { - match self.field_type { - FieldType::U32(ref options) => { - options.is_stored() - } - FieldType::Text(ref options) => { - options.is_stored() - } - } - } -} - -// TODO implement a nicer JSON format #[cfg(test)] mod tests { diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index 2be318f67..70e9c9e84 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -3,85 +3,41 @@ use common::BinarySerializable; use std::io::Read; use std::io::Write; use schema::Field; +use schema::Value; -const TEXT_CODE: u8 = 0; -const U32_CODE: u8 = 1; #[derive(Debug)] -pub enum FieldValue { - Text(Field, String), - U32(Field, u32), +pub struct FieldValue { + pub field: Field, + pub value: Value, } impl FieldValue { pub fn field(&self) -> Field { - match self { - &FieldValue::Text(field, _) => { - field - }, - &FieldValue::U32(field, _) => { - field - } - } + self.field + } + + pub fn value(&self,) -> &Value { + &self.value } - pub fn text(&self) -> &str { - match self { - &FieldValue::Text(_, ref text) => { - text - } - _ => { - panic!("This is not a text field.") - } - } - } - - pub fn u32_value(&self) -> u32 { - match self { - &FieldValue::U32(_, value) => { - value - } - _ => { - panic!("This is not a text field.") - } - } - } } impl BinarySerializable for FieldValue { fn serialize(&self, writer: &mut Write) -> io::Result { let mut written_size = 0; - match self { - &FieldValue::Text(ref field, ref text) => { - written_size += try!(TEXT_CODE.serialize(writer)); - written_size += try!(field.serialize(writer)); - written_size += try!(text.serialize(writer)); - }, - &FieldValue::U32(ref field, ref val) => { - written_size += try!(U32_CODE.serialize(writer)); - written_size += try!(field.serialize(writer)); - written_size += try!(val.serialize(writer)); - }, - } + written_size += try!(self.field.serialize(writer)); + written_size += try!(self.value.serialize(writer)); Ok(written_size) } + fn deserialize(reader: &mut Read) -> io::Result { - let type_code = try!(u8::deserialize(reader)); - match type_code { - TEXT_CODE => { - let field = try!(Field::deserialize(reader)); - let text = try!(String::deserialize(reader)); - Ok(FieldValue::Text(field, text)) - } - U32_CODE => { - let field = try!(Field::deserialize(reader)); - let value = try!(u32::deserialize(reader)); - Ok(FieldValue::U32(field, value)) - } - _ => { - Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code))) - } - } + let field = try!(Field::deserialize(reader)); + let value = try!(Value::deserialize(reader)); + Ok(FieldValue { + field: field, + value: value, + }) } } diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 44040a53a..98905f3e6 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -102,10 +102,16 @@ mod field_value; mod text_options; mod u32_options; mod field; +mod value; +mod named_field_document; + +pub use self::named_field_document::NamedFieldDocument; pub use self::schema::Schema; +pub use self::value::Value; pub use self::schema::DocParsingError; + pub use self::document::Document; pub use self::field::Field; pub use self::term::Term; diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs new file mode 100644 index 000000000..e9b740bc3 --- /dev/null +++ b/src/schema/named_field_document.rs @@ -0,0 +1,57 @@ +use std::collections::BTreeMap; +use schema::Value; +use rustc_serialize::Encodable; +use rustc_serialize::Encoder; + +pub struct NamedFieldDocument(pub BTreeMap>); + + +impl Encodable for NamedFieldDocument { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("named_field_document", self.0.len(), |s| { + for (i, (name, vals)) in self.0.iter().enumerate() { + try!(s.emit_struct_field(name, i, |s| { + for (j, val) in vals.iter().enumerate() { + try!(s.emit_seq(vals.len(), |s| { + s.emit_seq_elt(j, |s| { + match *val { + Value::Str(ref text) => { + s.emit_str(text) + }, + Value::U32(ref val) => { + s.emit_u32(*val) + } + } + }) + })); + } + Ok(()) + + })); + } + // try!(s.emit_struct_field("name", 0, |s| { + // self.name.encode(s) + // })); + // match self.field_type { + // FieldType::Text(ref options) => { + // try!(s.emit_struct_field("type", 1, |s| { + // s.emit_str("text") + // })); + // try!(s.emit_struct_field("options", 2, |s| { + // options.encode(s) + // })); + // } + // FieldType::U32(ref options) => { + // try!(s.emit_struct_field("type", 1, |s| { + // s.emit_str("u32") + // })); + // try!(s.emit_struct_field("options", 2, |s| { + // options.encode(s) + // })); + // } + // } + + Ok(()) + }) + } +} diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 10b9807ef..23846fa8c 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -6,6 +6,7 @@ use rustc_serialize::Decoder; use rustc_serialize::Encoder; use rustc_serialize::json; use rustc_serialize::json::Json; +use std::collections::BTreeMap; use super::*; @@ -34,6 +35,7 @@ pub struct Schema { fields_map: HashMap, // transient } + impl Decodable for Schema { fn decode(d: &mut D) -> Result { let mut schema = Schema::new(); @@ -75,6 +77,10 @@ impl Schema { &self.fields[field.0 as usize] } + pub fn get_field_name(&self, field: Field) -> &String { + self.get_field_entry(field).name() + } + pub fn fields(&self,) -> &Vec { &self.fields } @@ -123,6 +129,20 @@ impl Schema { field } + + pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument { + let mut field_map = BTreeMap::new(); + for (field, field_values) in doc.get_sorted_fields() { + let field_name = self.get_field_name(field); + let values: Vec = field_values + .into_iter() + .map(|field_val| field_val.value() ) + .cloned() + .collect(); + field_map.insert(field_name.clone(), values); + } + NamedFieldDocument(field_map) + } /// Build a document object from a json-object. pub fn parse_document(&self, doc_json: &str) -> Result { let json_node = try!(Json::from_str(doc_json)); diff --git a/src/schema/value.rs b/src/schema/value.rs new file mode 100644 index 000000000..119f0505c --- /dev/null +++ b/src/schema/value.rs @@ -0,0 +1,86 @@ + +use common::BinarySerializable; +use std::io; +use std::io::Write; +use std::io::Read; + +#[derive(Debug, Clone)] +pub enum Value { + Str(String), + U32(u32), +} + +impl Value { + pub fn text(&self) -> &str { + match *self { + Value::Str(ref text) => { + text + } + _ => { + panic!("This is not a text field.") + } + } + } + + pub fn u32_value(&self) -> u32 { + match *self { + Value::U32(ref value) => { + *value + } + _ => { + panic!("This is not a text field.") + } + } + } +} + +impl From for Value { + fn from(s: String) -> Value { + Value::Str(s) + } +} + + +impl From for Value { + fn from(v: u32) -> Value { + Value::U32(v) + } +} + + +const TEXT_CODE: u8 = 0; +const U32_CODE: u8 = 1; + + +impl BinarySerializable for Value { + fn serialize(&self, writer: &mut Write) -> io::Result { + let mut written_size = 0; + match *self { + Value::Str(ref text) => { + written_size += try!(TEXT_CODE.serialize(writer)); + written_size += try!(text.serialize(writer)); + }, + Value::U32(ref val) => { + written_size += try!(U32_CODE.serialize(writer)); + written_size += try!(val.serialize(writer)); + }, + } + Ok(written_size) + } + fn deserialize(reader: &mut Read) -> io::Result { + let type_code = try!(u8::deserialize(reader)); + match type_code { + TEXT_CODE => { + let text = try!(String::deserialize(reader)); + Ok(Value::Str(text)) + } + U32_CODE => { + let value = try!(u32::deserialize(reader)); + Ok(Value::U32(value)) + } + _ => { + Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code))) + } + } + } +} \ No newline at end of file diff --git a/src/store/mod.rs b/src/store/mod.rs index 64ad695e8..8a395b38c 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -29,12 +29,18 @@ mod tests { for i in 0..1000 { let mut fields: Vec = Vec::new(); { - let field_value = FieldValue::Text(field_body.clone(), lorem.clone()); + let field_value = FieldValue { + field: field_body, + value: From::from(lorem.clone()) + }; fields.push(field_value); } { let title_text = format!("Doc {}", i); - let field_value = FieldValue::Text(field_title.clone(), title_text); + let field_value = FieldValue { + field: field_title, + value: From::from(title_text) + }; fields.push(field_value); } let fields_refs: Vec<&FieldValue> = fields.iter().collect();