mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 07:30:39 +00:00
Refactoring, and making server working with a dynamic schema
This commit is contained in:
@@ -6,6 +6,7 @@ use iron::typemap::Key;
|
||||
use mount::Mount;
|
||||
use persistent::Read;
|
||||
use rustc_serialize::json::as_pretty_json;
|
||||
use rustc_serialize::json::Json;
|
||||
use staticfile::Static;
|
||||
use std::convert::From;
|
||||
use std::path::Path;
|
||||
@@ -20,8 +21,9 @@ use tantivy::query::Explanation;
|
||||
use tantivy::query::Query;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::Result;
|
||||
use tantivy::schema::Field;
|
||||
use tantivy::schema::{Field, Schema};
|
||||
use tantivy::Score;
|
||||
use tantivy::schema::NamedFieldDocument;
|
||||
use urlencoded::UrlEncodedQuery;
|
||||
|
||||
|
||||
@@ -34,7 +36,7 @@ pub fn run_serve_cli(matches: &ArgMatches) -> tantivy::Result<()> {
|
||||
}
|
||||
|
||||
|
||||
#[derive(RustcDecodable, RustcEncodable)]
|
||||
#[derive(RustcEncodable)]
|
||||
struct Serp {
|
||||
q: String,
|
||||
num_hits: usize,
|
||||
@@ -42,15 +44,14 @@ struct Serp {
|
||||
timings: Vec<Timing>,
|
||||
}
|
||||
|
||||
#[derive(RustcDecodable, RustcEncodable)]
|
||||
#[derive(RustcEncodable)]
|
||||
struct Hit {
|
||||
title: String,
|
||||
body: String,
|
||||
doc: NamedFieldDocument,
|
||||
explain: String,
|
||||
score: Score,
|
||||
}
|
||||
|
||||
#[derive(RustcDecodable, RustcEncodable)]
|
||||
#[derive(RustcEncodable)]
|
||||
struct Timing {
|
||||
name: String,
|
||||
duration: i64,
|
||||
@@ -59,8 +60,7 @@ struct Timing {
|
||||
struct IndexServer {
|
||||
index: Index,
|
||||
query_parser: QueryParser,
|
||||
body_field: Field,
|
||||
title_field: Field,
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
impl IndexServer {
|
||||
@@ -70,19 +70,17 @@ impl IndexServer {
|
||||
let schema = index.schema();
|
||||
let body_field = schema.get_field("body").unwrap();
|
||||
let title_field = schema.get_field("title").unwrap();
|
||||
let query_parser = QueryParser::new(schema, vec!(body_field, title_field));
|
||||
let query_parser = QueryParser::new(schema.clone(), vec!(body_field, title_field));
|
||||
IndexServer {
|
||||
index: index,
|
||||
query_parser: query_parser,
|
||||
title_field: title_field,
|
||||
body_field: body_field,
|
||||
schema: schema,
|
||||
}
|
||||
}
|
||||
|
||||
fn create_hit(&self, doc: &Document, explain: Explanation) -> Hit {
|
||||
Hit {
|
||||
title: String::from(doc.get_first(self.title_field).unwrap().text()),
|
||||
body: String::from(doc.get_first(self.body_field).unwrap().text().clone()),
|
||||
doc: self.index.schema().to_named_doc(&doc),
|
||||
explain: format!("{:?}", explain),
|
||||
score: explain.val(),
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ impl SegmentWriter {
|
||||
let mut num_tokens: usize = 0;
|
||||
for field_value in field_values {
|
||||
if text_options.get_indexing_options().is_tokenized() {
|
||||
let mut tokens = self.tokenizer.tokenize(field_value.text());
|
||||
let mut tokens = self.tokenizer.tokenize(field_value.value().text());
|
||||
// right now num_tokens and pos are redundant, but it should
|
||||
// change when we get proper analyzers
|
||||
let field = field_value.field();
|
||||
@@ -129,7 +129,7 @@ impl SegmentWriter {
|
||||
}
|
||||
}
|
||||
else {
|
||||
let term = Term::from_field_text(field, field_value.text());
|
||||
let term = Term::from_field_text(field, field_value.value().text());
|
||||
field_posting_writers.suscribe(doc_id, 0, term);
|
||||
}
|
||||
pos += 1;
|
||||
@@ -145,7 +145,7 @@ impl SegmentWriter {
|
||||
FieldType::U32(ref u32_options) => {
|
||||
if u32_options.is_indexed() {
|
||||
for field_value in field_values {
|
||||
let term = Term::from_field_u32(field_value.field(), field_value.u32_value());
|
||||
let term = Term::from_field_u32(field_value.field(), field_value.value().u32_value());
|
||||
field_posting_writers.suscribe(doc_id, 0, term);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use schema::{Schema, FieldValue, Field, Document};
|
||||
use schema::{Schema, Field, Document};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use std::io;
|
||||
use schema::Value;
|
||||
use DocId;
|
||||
|
||||
pub struct U32FastFieldsWriter {
|
||||
@@ -91,10 +92,10 @@ impl U32FastFieldWriter {
|
||||
|
||||
fn extract_val(&self, doc: &Document) -> u32 {
|
||||
match doc.get_first(self.field) {
|
||||
Some(field_value) => {
|
||||
match field_value {
|
||||
&FieldValue::U32(_, val) => { return val; }
|
||||
_ => { panic!("Expected a u32field, got {:?} ", field_value) }
|
||||
Some(v) => {
|
||||
match *v {
|
||||
Value::U32(ref val) => { return *val; }
|
||||
_ => { panic!("Expected a u32field, got {:?} ", v) }
|
||||
}
|
||||
},
|
||||
None => {
|
||||
|
||||
@@ -32,18 +32,24 @@ impl Document {
|
||||
|
||||
/// Add a text field.
|
||||
pub fn add_text(&mut self, field: Field, text: &str) {
|
||||
self.add(FieldValue::Text(field.clone(), String::from(text)));
|
||||
self.add(FieldValue {
|
||||
field: field,
|
||||
value: Value::Str(String::from(text)),
|
||||
});
|
||||
}
|
||||
|
||||
/// Add a u32 field
|
||||
pub fn add_u32(&mut self, field: Field, value: u32) {
|
||||
self.add(FieldValue::U32(field.clone(), value));
|
||||
self.add(FieldValue {
|
||||
field: field,
|
||||
value: Value::U32(value),
|
||||
});
|
||||
}
|
||||
|
||||
pub fn add(&mut self, field_value: FieldValue) {
|
||||
self.field_values.push(field_value);
|
||||
}
|
||||
|
||||
|
||||
pub fn get_fields(&self) -> &Vec<FieldValue> {
|
||||
&self.field_values
|
||||
}
|
||||
@@ -62,17 +68,19 @@ impl Document {
|
||||
sorted_fields
|
||||
}
|
||||
|
||||
pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a FieldValue> {
|
||||
pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a Value> {
|
||||
self.field_values
|
||||
.iter()
|
||||
.filter(|field_value| field_value.field() == field)
|
||||
.map(|field_value| field_value.value())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a FieldValue> {
|
||||
pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a Value> {
|
||||
self.field_values
|
||||
.iter()
|
||||
.filter(|field_value| field_value.field() == field)
|
||||
.map(|field_value| field_value.value())
|
||||
.next()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,58 @@ pub struct FieldEntry {
|
||||
field_type: FieldType,
|
||||
}
|
||||
|
||||
impl FieldEntry {
|
||||
|
||||
pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry {
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::Text(field_type),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry {
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::U32(field_type),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self,) -> &String {
|
||||
&self.name
|
||||
}
|
||||
|
||||
pub fn field_type(&self,) -> &FieldType {
|
||||
&self.field_type
|
||||
}
|
||||
|
||||
pub fn is_indexed(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::Text(ref options) => options.get_indexing_options().is_indexed(),
|
||||
_ => false, // TODO handle u32 indexed
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_u32_fast(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U32(ref options) => options.is_fast(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_stored(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U32(ref options) => {
|
||||
options.is_stored()
|
||||
}
|
||||
FieldType::Text(ref options) => {
|
||||
options.is_stored()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl Encodable for FieldEntry {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("field_entry", 3, |s| {
|
||||
@@ -77,57 +129,6 @@ impl Decodable for FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldEntry {
|
||||
|
||||
pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry {
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::Text(field_type),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry {
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::U32(field_type),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self,) -> &String {
|
||||
&self.name
|
||||
}
|
||||
|
||||
pub fn field_type(&self,) -> &FieldType {
|
||||
&self.field_type
|
||||
}
|
||||
|
||||
pub fn is_indexed(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::Text(ref options) => options.get_indexing_options().is_indexed(),
|
||||
_ => false, // TODO handle u32 indexed
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_u32_fast(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U32(ref options) => options.is_fast(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_stored(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U32(ref options) => {
|
||||
options.is_stored()
|
||||
}
|
||||
FieldType::Text(ref options) => {
|
||||
options.is_stored()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO implement a nicer JSON format
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -3,85 +3,41 @@ use common::BinarySerializable;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use schema::Field;
|
||||
use schema::Value;
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U32_CODE: u8 = 1;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FieldValue {
|
||||
Text(Field, String),
|
||||
U32(Field, u32),
|
||||
pub struct FieldValue {
|
||||
pub field: Field,
|
||||
pub value: Value,
|
||||
}
|
||||
|
||||
impl FieldValue {
|
||||
pub fn field(&self) -> Field {
|
||||
match self {
|
||||
&FieldValue::Text(field, _) => {
|
||||
field
|
||||
},
|
||||
&FieldValue::U32(field, _) => {
|
||||
field
|
||||
}
|
||||
}
|
||||
self.field
|
||||
}
|
||||
|
||||
pub fn value(&self,) -> &Value {
|
||||
&self.value
|
||||
}
|
||||
|
||||
pub fn text(&self) -> &str {
|
||||
match self {
|
||||
&FieldValue::Text(_, ref text) => {
|
||||
text
|
||||
}
|
||||
_ => {
|
||||
panic!("This is not a text field.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn u32_value(&self) -> u32 {
|
||||
match self {
|
||||
&FieldValue::U32(_, value) => {
|
||||
value
|
||||
}
|
||||
_ => {
|
||||
panic!("This is not a text field.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match self {
|
||||
&FieldValue::Text(ref field, ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(field.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
&FieldValue::U32(ref field, ref val) => {
|
||||
written_size += try!(U32_CODE.serialize(writer));
|
||||
written_size += try!(field.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
}
|
||||
written_size += try!(self.field.serialize(writer));
|
||||
written_size += try!(self.value.serialize(writer));
|
||||
Ok(written_size)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let field = try!(Field::deserialize(reader));
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(FieldValue::Text(field, text))
|
||||
}
|
||||
U32_CODE => {
|
||||
let field = try!(Field::deserialize(reader));
|
||||
let value = try!(u32::deserialize(reader));
|
||||
Ok(FieldValue::U32(field, value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
let field = try!(Field::deserialize(reader));
|
||||
let value = try!(Value::deserialize(reader));
|
||||
Ok(FieldValue {
|
||||
field: field,
|
||||
value: value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -102,10 +102,16 @@ mod field_value;
|
||||
mod text_options;
|
||||
mod u32_options;
|
||||
mod field;
|
||||
mod value;
|
||||
mod named_field_document;
|
||||
|
||||
|
||||
|
||||
pub use self::named_field_document::NamedFieldDocument;
|
||||
pub use self::schema::Schema;
|
||||
pub use self::value::Value;
|
||||
pub use self::schema::DocParsingError;
|
||||
|
||||
pub use self::document::Document;
|
||||
pub use self::field::Field;
|
||||
pub use self::term::Term;
|
||||
|
||||
57
src/schema/named_field_document.rs
Normal file
57
src/schema/named_field_document.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use std::collections::BTreeMap;
|
||||
use schema::Value;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
||||
|
||||
|
||||
impl Encodable for NamedFieldDocument {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("named_field_document", self.0.len(), |s| {
|
||||
for (i, (name, vals)) in self.0.iter().enumerate() {
|
||||
try!(s.emit_struct_field(name, i, |s| {
|
||||
for (j, val) in vals.iter().enumerate() {
|
||||
try!(s.emit_seq(vals.len(), |s| {
|
||||
s.emit_seq_elt(j, |s| {
|
||||
match *val {
|
||||
Value::Str(ref text) => {
|
||||
s.emit_str(text)
|
||||
},
|
||||
Value::U32(ref val) => {
|
||||
s.emit_u32(*val)
|
||||
}
|
||||
}
|
||||
})
|
||||
}));
|
||||
}
|
||||
Ok(())
|
||||
|
||||
}));
|
||||
}
|
||||
// try!(s.emit_struct_field("name", 0, |s| {
|
||||
// self.name.encode(s)
|
||||
// }));
|
||||
// match self.field_type {
|
||||
// FieldType::Text(ref options) => {
|
||||
// try!(s.emit_struct_field("type", 1, |s| {
|
||||
// s.emit_str("text")
|
||||
// }));
|
||||
// try!(s.emit_struct_field("options", 2, |s| {
|
||||
// options.encode(s)
|
||||
// }));
|
||||
// }
|
||||
// FieldType::U32(ref options) => {
|
||||
// try!(s.emit_struct_field("type", 1, |s| {
|
||||
// s.emit_str("u32")
|
||||
// }));
|
||||
// try!(s.emit_struct_field("options", 2, |s| {
|
||||
// options.encode(s)
|
||||
// }));
|
||||
// }
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encoder;
|
||||
use rustc_serialize::json;
|
||||
use rustc_serialize::json::Json;
|
||||
use std::collections::BTreeMap;
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -34,6 +35,7 @@ pub struct Schema {
|
||||
fields_map: HashMap<String, Field>, // transient
|
||||
}
|
||||
|
||||
|
||||
impl Decodable for Schema {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
|
||||
let mut schema = Schema::new();
|
||||
@@ -75,6 +77,10 @@ impl Schema {
|
||||
&self.fields[field.0 as usize]
|
||||
}
|
||||
|
||||
pub fn get_field_name(&self, field: Field) -> &String {
|
||||
self.get_field_entry(field).name()
|
||||
}
|
||||
|
||||
pub fn fields(&self,) -> &Vec<FieldEntry> {
|
||||
&self.fields
|
||||
}
|
||||
@@ -123,6 +129,20 @@ impl Schema {
|
||||
field
|
||||
}
|
||||
|
||||
|
||||
pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
|
||||
let mut field_map = BTreeMap::new();
|
||||
for (field, field_values) in doc.get_sorted_fields() {
|
||||
let field_name = self.get_field_name(field);
|
||||
let values: Vec<Value> = field_values
|
||||
.into_iter()
|
||||
.map(|field_val| field_val.value() )
|
||||
.cloned()
|
||||
.collect();
|
||||
field_map.insert(field_name.clone(), values);
|
||||
}
|
||||
NamedFieldDocument(field_map)
|
||||
}
|
||||
/// Build a document object from a json-object.
|
||||
pub fn parse_document(&self, doc_json: &str) -> Result<Document, DocParsingError> {
|
||||
let json_node = try!(Json::from_str(doc_json));
|
||||
|
||||
86
src/schema/value.rs
Normal file
86
src/schema/value.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
|
||||
use common::BinarySerializable;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Value {
|
||||
Str(String),
|
||||
U32(u32),
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn text(&self) -> &str {
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
text
|
||||
}
|
||||
_ => {
|
||||
panic!("This is not a text field.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn u32_value(&self) -> u32 {
|
||||
match *self {
|
||||
Value::U32(ref value) => {
|
||||
*value
|
||||
}
|
||||
_ => {
|
||||
panic!("This is not a text field.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(s: String) -> Value {
|
||||
Value::Str(s)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl From<u32> for Value {
|
||||
fn from(v: u32) -> Value {
|
||||
Value::U32(v)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U32_CODE: u8 = 1;
|
||||
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
Value::U32(ref val) => {
|
||||
written_size += try!(U32_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(Value::Str(text))
|
||||
}
|
||||
U32_CODE => {
|
||||
let value = try!(u32::deserialize(reader));
|
||||
Ok(Value::U32(value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -29,12 +29,18 @@ mod tests {
|
||||
for i in 0..1000 {
|
||||
let mut fields: Vec<FieldValue> = Vec::new();
|
||||
{
|
||||
let field_value = FieldValue::Text(field_body.clone(), lorem.clone());
|
||||
let field_value = FieldValue {
|
||||
field: field_body,
|
||||
value: From::from(lorem.clone())
|
||||
};
|
||||
fields.push(field_value);
|
||||
}
|
||||
{
|
||||
let title_text = format!("Doc {}", i);
|
||||
let field_value = FieldValue::Text(field_title.clone(), title_text);
|
||||
let field_value = FieldValue {
|
||||
field: field_title,
|
||||
value: From::from(title_text)
|
||||
};
|
||||
fields.push(field_value);
|
||||
}
|
||||
let fields_refs: Vec<&FieldValue> = fields.iter().collect();
|
||||
|
||||
Reference in New Issue
Block a user