Refactoring, and making server working with a dynamic schema

This commit is contained in:
Paul Masurel
2016-08-14 14:28:36 +09:00
parent 1807e0710c
commit e847de1ebb
11 changed files with 280 additions and 141 deletions

View File

@@ -6,6 +6,7 @@ use iron::typemap::Key;
use mount::Mount;
use persistent::Read;
use rustc_serialize::json::as_pretty_json;
use rustc_serialize::json::Json;
use staticfile::Static;
use std::convert::From;
use std::path::Path;
@@ -20,8 +21,9 @@ use tantivy::query::Explanation;
use tantivy::query::Query;
use tantivy::query::QueryParser;
use tantivy::Result;
use tantivy::schema::Field;
use tantivy::schema::{Field, Schema};
use tantivy::Score;
use tantivy::schema::NamedFieldDocument;
use urlencoded::UrlEncodedQuery;
@@ -34,7 +36,7 @@ pub fn run_serve_cli(matches: &ArgMatches) -> tantivy::Result<()> {
}
#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Serp {
q: String,
num_hits: usize,
@@ -42,15 +44,14 @@ struct Serp {
timings: Vec<Timing>,
}
#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Hit {
title: String,
body: String,
doc: NamedFieldDocument,
explain: String,
score: Score,
}
#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Timing {
name: String,
duration: i64,
@@ -59,8 +60,7 @@ struct Timing {
struct IndexServer {
index: Index,
query_parser: QueryParser,
body_field: Field,
title_field: Field,
schema: Schema,
}
impl IndexServer {
@@ -70,19 +70,17 @@ impl IndexServer {
let schema = index.schema();
let body_field = schema.get_field("body").unwrap();
let title_field = schema.get_field("title").unwrap();
let query_parser = QueryParser::new(schema, vec!(body_field, title_field));
let query_parser = QueryParser::new(schema.clone(), vec!(body_field, title_field));
IndexServer {
index: index,
query_parser: query_parser,
title_field: title_field,
body_field: body_field,
schema: schema,
}
}
fn create_hit(&self, doc: &Document, explain: Explanation) -> Hit {
Hit {
title: String::from(doc.get_first(self.title_field).unwrap().text()),
body: String::from(doc.get_first(self.body_field).unwrap().text().clone()),
doc: self.index.schema().to_named_doc(&doc),
explain: format!("{:?}", explain),
score: explain.val(),
}

View File

@@ -112,7 +112,7 @@ impl SegmentWriter {
let mut num_tokens: usize = 0;
for field_value in field_values {
if text_options.get_indexing_options().is_tokenized() {
let mut tokens = self.tokenizer.tokenize(field_value.text());
let mut tokens = self.tokenizer.tokenize(field_value.value().text());
// right now num_tokens and pos are redundant, but it should
// change when we get proper analyzers
let field = field_value.field();
@@ -129,7 +129,7 @@ impl SegmentWriter {
}
}
else {
let term = Term::from_field_text(field, field_value.text());
let term = Term::from_field_text(field, field_value.value().text());
field_posting_writers.suscribe(doc_id, 0, term);
}
pos += 1;
@@ -145,7 +145,7 @@ impl SegmentWriter {
FieldType::U32(ref u32_options) => {
if u32_options.is_indexed() {
for field_value in field_values {
let term = Term::from_field_u32(field_value.field(), field_value.u32_value());
let term = Term::from_field_u32(field_value.field(), field_value.value().u32_value());
field_posting_writers.suscribe(doc_id, 0, term);
}
}

View File

@@ -1,6 +1,7 @@
use schema::{Schema, FieldValue, Field, Document};
use schema::{Schema, Field, Document};
use fastfield::FastFieldSerializer;
use std::io;
use schema::Value;
use DocId;
pub struct U32FastFieldsWriter {
@@ -91,10 +92,10 @@ impl U32FastFieldWriter {
fn extract_val(&self, doc: &Document) -> u32 {
match doc.get_first(self.field) {
Some(field_value) => {
match field_value {
&FieldValue::U32(_, val) => { return val; }
_ => { panic!("Expected a u32field, got {:?} ", field_value) }
Some(v) => {
match *v {
Value::U32(ref val) => { return *val; }
_ => { panic!("Expected a u32field, got {:?} ", v) }
}
},
None => {

View File

@@ -32,18 +32,24 @@ impl Document {
/// Add a text field.
pub fn add_text(&mut self, field: Field, text: &str) {
self.add(FieldValue::Text(field.clone(), String::from(text)));
self.add(FieldValue {
field: field,
value: Value::Str(String::from(text)),
});
}
/// Add a u32 field
pub fn add_u32(&mut self, field: Field, value: u32) {
self.add(FieldValue::U32(field.clone(), value));
self.add(FieldValue {
field: field,
value: Value::U32(value),
});
}
pub fn add(&mut self, field_value: FieldValue) {
self.field_values.push(field_value);
}
pub fn get_fields(&self) -> &Vec<FieldValue> {
&self.field_values
}
@@ -62,17 +68,19 @@ impl Document {
sorted_fields
}
pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a FieldValue> {
pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a Value> {
self.field_values
.iter()
.filter(|field_value| field_value.field() == field)
.map(|field_value| field_value.value())
.collect()
}
pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a FieldValue> {
pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a Value> {
self.field_values
.iter()
.filter(|field_value| field_value.field() == field)
.map(|field_value| field_value.value())
.next()
}
}

View File

@@ -19,6 +19,58 @@ pub struct FieldEntry {
field_type: FieldType,
}
impl FieldEntry {
pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry {
FieldEntry {
name: field_name,
field_type: FieldType::Text(field_type),
}
}
pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry {
FieldEntry {
name: field_name,
field_type: FieldType::U32(field_type),
}
}
pub fn name(&self,) -> &String {
&self.name
}
pub fn field_type(&self,) -> &FieldType {
&self.field_type
}
pub fn is_indexed(&self,) -> bool {
match self.field_type {
FieldType::Text(ref options) => options.get_indexing_options().is_indexed(),
_ => false, // TODO handle u32 indexed
}
}
pub fn is_u32_fast(&self,) -> bool {
match self.field_type {
FieldType::U32(ref options) => options.is_fast(),
_ => false,
}
}
pub fn is_stored(&self,) -> bool {
match self.field_type {
FieldType::U32(ref options) => {
options.is_stored()
}
FieldType::Text(ref options) => {
options.is_stored()
}
}
}
}
impl Encodable for FieldEntry {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("field_entry", 3, |s| {
@@ -77,57 +129,6 @@ impl Decodable for FieldEntry {
}
}
impl FieldEntry {
pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry {
FieldEntry {
name: field_name,
field_type: FieldType::Text(field_type),
}
}
pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry {
FieldEntry {
name: field_name,
field_type: FieldType::U32(field_type),
}
}
pub fn name(&self,) -> &String {
&self.name
}
pub fn field_type(&self,) -> &FieldType {
&self.field_type
}
pub fn is_indexed(&self,) -> bool {
match self.field_type {
FieldType::Text(ref options) => options.get_indexing_options().is_indexed(),
_ => false, // TODO handle u32 indexed
}
}
pub fn is_u32_fast(&self,) -> bool {
match self.field_type {
FieldType::U32(ref options) => options.is_fast(),
_ => false,
}
}
pub fn is_stored(&self,) -> bool {
match self.field_type {
FieldType::U32(ref options) => {
options.is_stored()
}
FieldType::Text(ref options) => {
options.is_stored()
}
}
}
}
// TODO implement a nicer JSON format
#[cfg(test)]
mod tests {

View File

@@ -3,85 +3,41 @@ use common::BinarySerializable;
use std::io::Read;
use std::io::Write;
use schema::Field;
use schema::Value;
const TEXT_CODE: u8 = 0;
const U32_CODE: u8 = 1;
#[derive(Debug)]
pub enum FieldValue {
Text(Field, String),
U32(Field, u32),
pub struct FieldValue {
pub field: Field,
pub value: Value,
}
impl FieldValue {
pub fn field(&self) -> Field {
match self {
&FieldValue::Text(field, _) => {
field
},
&FieldValue::U32(field, _) => {
field
}
}
self.field
}
pub fn value(&self,) -> &Value {
&self.value
}
pub fn text(&self) -> &str {
match self {
&FieldValue::Text(_, ref text) => {
text
}
_ => {
panic!("This is not a text field.")
}
}
}
pub fn u32_value(&self) -> u32 {
match self {
&FieldValue::U32(_, value) => {
value
}
_ => {
panic!("This is not a text field.")
}
}
}
}
impl BinarySerializable for FieldValue {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut written_size = 0;
match self {
&FieldValue::Text(ref field, ref text) => {
written_size += try!(TEXT_CODE.serialize(writer));
written_size += try!(field.serialize(writer));
written_size += try!(text.serialize(writer));
},
&FieldValue::U32(ref field, ref val) => {
written_size += try!(U32_CODE.serialize(writer));
written_size += try!(field.serialize(writer));
written_size += try!(val.serialize(writer));
},
}
written_size += try!(self.field.serialize(writer));
written_size += try!(self.value.serialize(writer));
Ok(written_size)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let type_code = try!(u8::deserialize(reader));
match type_code {
TEXT_CODE => {
let field = try!(Field::deserialize(reader));
let text = try!(String::deserialize(reader));
Ok(FieldValue::Text(field, text))
}
U32_CODE => {
let field = try!(Field::deserialize(reader));
let value = try!(u32::deserialize(reader));
Ok(FieldValue::U32(field, value))
}
_ => {
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
}
}
let field = try!(Field::deserialize(reader));
let value = try!(Value::deserialize(reader));
Ok(FieldValue {
field: field,
value: value,
})
}
}

View File

@@ -102,10 +102,16 @@ mod field_value;
mod text_options;
mod u32_options;
mod field;
mod value;
mod named_field_document;
pub use self::named_field_document::NamedFieldDocument;
pub use self::schema::Schema;
pub use self::value::Value;
pub use self::schema::DocParsingError;
pub use self::document::Document;
pub use self::field::Field;
pub use self::term::Term;

View File

@@ -0,0 +1,57 @@
use std::collections::BTreeMap;
use schema::Value;
use rustc_serialize::Encodable;
use rustc_serialize::Encoder;
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
impl Encodable for NamedFieldDocument {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("named_field_document", self.0.len(), |s| {
for (i, (name, vals)) in self.0.iter().enumerate() {
try!(s.emit_struct_field(name, i, |s| {
for (j, val) in vals.iter().enumerate() {
try!(s.emit_seq(vals.len(), |s| {
s.emit_seq_elt(j, |s| {
match *val {
Value::Str(ref text) => {
s.emit_str(text)
},
Value::U32(ref val) => {
s.emit_u32(*val)
}
}
})
}));
}
Ok(())
}));
}
// try!(s.emit_struct_field("name", 0, |s| {
// self.name.encode(s)
// }));
// match self.field_type {
// FieldType::Text(ref options) => {
// try!(s.emit_struct_field("type", 1, |s| {
// s.emit_str("text")
// }));
// try!(s.emit_struct_field("options", 2, |s| {
// options.encode(s)
// }));
// }
// FieldType::U32(ref options) => {
// try!(s.emit_struct_field("type", 1, |s| {
// s.emit_str("u32")
// }));
// try!(s.emit_struct_field("options", 2, |s| {
// options.encode(s)
// }));
// }
// }
Ok(())
})
}
}

View File

@@ -6,6 +6,7 @@ use rustc_serialize::Decoder;
use rustc_serialize::Encoder;
use rustc_serialize::json;
use rustc_serialize::json::Json;
use std::collections::BTreeMap;
use super::*;
@@ -34,6 +35,7 @@ pub struct Schema {
fields_map: HashMap<String, Field>, // transient
}
impl Decodable for Schema {
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
let mut schema = Schema::new();
@@ -75,6 +77,10 @@ impl Schema {
&self.fields[field.0 as usize]
}
pub fn get_field_name(&self, field: Field) -> &String {
self.get_field_entry(field).name()
}
pub fn fields(&self,) -> &Vec<FieldEntry> {
&self.fields
}
@@ -123,6 +129,20 @@ impl Schema {
field
}
pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
let mut field_map = BTreeMap::new();
for (field, field_values) in doc.get_sorted_fields() {
let field_name = self.get_field_name(field);
let values: Vec<Value> = field_values
.into_iter()
.map(|field_val| field_val.value() )
.cloned()
.collect();
field_map.insert(field_name.clone(), values);
}
NamedFieldDocument(field_map)
}
/// Build a document object from a json-object.
pub fn parse_document(&self, doc_json: &str) -> Result<Document, DocParsingError> {
let json_node = try!(Json::from_str(doc_json));

86
src/schema/value.rs Normal file
View File

@@ -0,0 +1,86 @@
use common::BinarySerializable;
use std::io;
use std::io::Write;
use std::io::Read;
#[derive(Debug, Clone)]
pub enum Value {
Str(String),
U32(u32),
}
impl Value {
pub fn text(&self) -> &str {
match *self {
Value::Str(ref text) => {
text
}
_ => {
panic!("This is not a text field.")
}
}
}
pub fn u32_value(&self) -> u32 {
match *self {
Value::U32(ref value) => {
*value
}
_ => {
panic!("This is not a text field.")
}
}
}
}
impl From<String> for Value {
fn from(s: String) -> Value {
Value::Str(s)
}
}
impl From<u32> for Value {
fn from(v: u32) -> Value {
Value::U32(v)
}
}
const TEXT_CODE: u8 = 0;
const U32_CODE: u8 = 1;
impl BinarySerializable for Value {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut written_size = 0;
match *self {
Value::Str(ref text) => {
written_size += try!(TEXT_CODE.serialize(writer));
written_size += try!(text.serialize(writer));
},
Value::U32(ref val) => {
written_size += try!(U32_CODE.serialize(writer));
written_size += try!(val.serialize(writer));
},
}
Ok(written_size)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let type_code = try!(u8::deserialize(reader));
match type_code {
TEXT_CODE => {
let text = try!(String::deserialize(reader));
Ok(Value::Str(text))
}
U32_CODE => {
let value = try!(u32::deserialize(reader));
Ok(Value::U32(value))
}
_ => {
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
}
}
}
}

View File

@@ -29,12 +29,18 @@ mod tests {
for i in 0..1000 {
let mut fields: Vec<FieldValue> = Vec::new();
{
let field_value = FieldValue::Text(field_body.clone(), lorem.clone());
let field_value = FieldValue {
field: field_body,
value: From::from(lorem.clone())
};
fields.push(field_value);
}
{
let title_text = format!("Doc {}", i);
let field_value = FieldValue::Text(field_title.clone(), title_text);
let field_value = FieldValue {
field: field_title,
value: From::from(title_text)
};
fields.push(field_value);
}
let fields_refs: Vec<&FieldValue> = fields.iter().collect();