Merge branch 'master' into issue/indexing-refactoring

This commit is contained in:
Paul Masurel
2017-05-09 16:43:33 +09:00
26 changed files with 415 additions and 366 deletions

View File

@@ -2,6 +2,7 @@ Tantivy 0.4.0
==========================
- Removed u32 fields. They are replaced by u64 and i64 fields (#65)
- Replacing rustc_serialize by serde. Kudos to @KodrAus and @lnicola
- QueryParser:
- Explicit error returned when searched for a term that is not indexed
- Searching for a int term via the query parser was broken `(age:1)`

View File

@@ -20,18 +20,20 @@ regex = "0.2"
fst = "0.1.37"
atomicwrites = "0.1.3"
tempfile = "2.1"
rustc-serialize = "0.3"
log = "0.3.6"
combine = "2.2"
tempdir = "0.3"
bincode = "0.5"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
bincode = "0.7.0-alpha7"
libc = {version = "0.2.20", optional=true}
num_cpus = "1.2"
itertools = "0.5.9"
lz4 = "1.20"
bit-set = "0.4.0"
time = "0.1"
uuid = { version = "0.4", features = ["v4", "rustc-serialize"] }
uuid = { version = "0.5", features = ["v4", "serde"] }
chan = "0.1"
version = "2"
crossbeam = "0.2"

View File

@@ -1,4 +1,3 @@
extern crate rustc_serialize;
extern crate tantivy;
extern crate tempdir;

View File

@@ -33,7 +33,7 @@ impl<'a> Drop for OpenTimer<'a> {
}
/// Timing recording
#[derive(Debug, RustcEncodable)]
#[derive(Debug, Serialize)]
pub struct Timing {
name: &'static str,
duration: i64,
@@ -41,7 +41,7 @@ pub struct Timing {
}
/// Timer tree
#[derive(Debug, RustcEncodable)]
#[derive(Debug, Serialize)]
pub struct TimerTree {
timings: Vec<Timing>,
}

View File

@@ -1,10 +1,10 @@
use Result;
use Error;
use serde_json;
use schema::Schema;
use std::sync::Arc;
use std::borrow::BorrowMut;
use std::fmt;
use rustc_serialize::json;
use core::SegmentId;
use directory::{Directory, MmapDirectory, RAMDirectory};
use indexer::index_writer::open_index_writer;
@@ -29,7 +29,7 @@ const NUM_SEARCHERS: usize = 12;
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
let meta_data = directory.atomic_read(&META_FILEPATH)?;
let meta_string = String::from_utf8_lossy(&meta_data);
json::decode(&meta_string)
serde_json::from_str(&meta_string)
.map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e)))
}

View File

@@ -9,7 +9,7 @@ use core::SegmentMeta;
/// * the index docstamp
/// * the schema
///
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
#[derive(Clone,Debug,Serialize, Deserialize)]
pub struct IndexMeta {
pub segments: Vec<SegmentMeta>,
pub schema: Schema,

View File

@@ -1,6 +1,5 @@
use uuid::Uuid;
use std::fmt;
use rustc_serialize::{Encoder, Decoder, Encodable, Decodable};
use std::cmp::{Ordering, Ord};
#[cfg(test)]
@@ -14,7 +13,7 @@ use std::sync::atomic;
///
/// In unit test, for reproducability, the SegmentId are
/// simply generated in an autoincrement fashion.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SegmentId(Uuid);
@@ -65,18 +64,6 @@ impl SegmentId {
}
}
impl Encodable for SegmentId {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
self.0.encode(s)
}
}
impl Decodable for SegmentId {
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
Uuid::decode(d).map(SegmentId)
}
}
impl fmt::Debug for SegmentId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Seg({:?})", self.short_uuid_string())

View File

@@ -3,7 +3,7 @@ use super::SegmentComponent;
use std::path::PathBuf;
use std::collections::HashSet;
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
#[derive(Clone, Debug, Serialize, Deserialize)]
struct DeleteMeta {
num_deleted_docs: u32,
opstamp: u64,
@@ -13,7 +13,7 @@ struct DeleteMeta {
///
/// For instance the number of docs it contains,
/// how many are deleted, etc.
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SegmentMeta {
segment_id: SegmentId,
max_doc: u32,

View File

@@ -1,4 +1,5 @@
use std::path::{Path, PathBuf};
use serde_json;
use directory::error::{OpenReadError, DeleteError, OpenWriteError};
use directory::{ReadOnlySource, WritePtr};
use std::result;
@@ -7,7 +8,6 @@ use Directory;
use std::sync::{Arc, RwLock};
use std::collections::HashSet;
use std::io::Write;
use rustc_serialize::json;
use core::MANAGED_FILEPATH;
use std::collections::HashMap;
use std::fmt;
@@ -74,7 +74,7 @@ impl ManagedDirectory {
match directory.atomic_read(&MANAGED_FILEPATH) {
Ok(data) => {
let managed_files_json = String::from_utf8_lossy(&data);
let managed_files: HashSet<PathBuf> = json::decode(&managed_files_json)
let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
.map_err(|e| Error::CorruptedFile(MANAGED_FILEPATH.clone(), Box::new(e)))?;
Ok(ManagedDirectory {
directory: box directory,
@@ -204,8 +204,8 @@ impl ManagedDirectory {
.expect("Managed file lock poisoned");
managed_paths = meta_informations_rlock.managed_paths.clone();
}
let mut w = vec!();
try!(write!(&mut w, "{}\n", json::as_pretty_json(&managed_paths)));
let mut w = try!(serde_json::to_vec(&managed_paths));
try!(write!(&mut w, "\n"));
self.directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
Ok(())
}

View File

@@ -53,7 +53,7 @@ fn open_mmap(full_path: &PathBuf) -> result::Result<Option<Arc<Mmap>>, OpenReadE
}
#[derive(Default,Clone,Debug,RustcDecodable,RustcEncodable)]
#[derive(Default,Clone,Debug,Serialize,Deserialize)]
pub struct CacheCounters {
// Number of time the cache prevents to call `mmap`
pub hit: usize,
@@ -65,7 +65,7 @@ pub struct CacheCounters {
pub miss_weak: usize,
}
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
#[derive(Clone,Debug,Serialize,Deserialize)]
pub struct CacheInfo {
pub counters: CacheCounters,
pub mmapped: Vec<PathBuf>,

View File

@@ -11,7 +11,7 @@ use directory::error::{OpenReadError, OpenWriteError, OpenDirectoryError};
use query;
use schema;
use fastfield::FastFieldNotAvailableError;
use serde_json;
/// Generic tantivy error.
@@ -101,3 +101,9 @@ impl From<OpenDirectoryError> for Error {
}
}
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Error {
Error::IOError(error.into())
}
}

View File

@@ -440,6 +440,8 @@ mod tests {
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
let empty_vec = Vec::<u64>::new();
{ // a first commit
index_writer.add_document(
@@ -502,11 +504,11 @@ mod tests {
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(searcher.segment_readers()[1].num_docs(), 2);
assert_eq!(searcher.segment_readers()[1].max_doc(), 4);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
@@ -529,11 +531,11 @@ mod tests {
assert_eq!(searcher.num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
@@ -551,11 +553,11 @@ mod tests {
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
@@ -574,11 +576,11 @@ mod tests {
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 2);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();

View File

@@ -23,7 +23,7 @@ use indexer::SegmentEntry;
use indexer::SegmentSerializer;
use Result;
use futures_cpupool::CpuFuture;
use rustc_serialize::json;
use serde_json;
use indexer::delete_queue::DeleteCursor;
use schema::Schema;
use std::borrow::BorrowMut;
@@ -77,10 +77,10 @@ pub fn save_metas(segment_metas: Vec<SegmentMeta>,
schema: schema,
opstamp: opstamp,
};
let mut w = vec!();
try!(write!(&mut w, "{}\n", json::as_pretty_json(&metas)));
let mut w = try!(serde_json::to_vec(&metas));
try!(write!(&mut w, "\n"));
let res = directory.atomic_write(&META_FILEPATH, &w[..])?;
debug!("Saved metas {}", json::as_pretty_json(&metas));
debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
Ok(res)
}

View File

@@ -25,6 +25,9 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate serde_derive;
#[macro_use]
extern crate log;
@@ -35,10 +38,11 @@ extern crate byteorder;
extern crate memmap;
extern crate regex;
extern crate tempfile;
extern crate rustc_serialize;
extern crate atomicwrites;
extern crate tempdir;
extern crate serde;
extern crate bincode;
extern crate serde_json;
extern crate time;
extern crate lz4;
extern crate uuid;

View File

@@ -102,7 +102,7 @@ mod tests {
}
{
let boolean_query = BooleanQuery::from(vec![(Occur::MustNot, make_term_query("d")),]);
assert_eq!(matching_docs(&boolean_query), Vec::new());
assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new());
}
}

View File

@@ -60,11 +60,14 @@ mod tests {
searcher.search(&phrase_query, &mut test_collector).expect("search should succeed");
test_collector.docs()
};
let empty_vec = Vec::<u32>::new();
assert_eq!(test_query(vec!("a", "b", "c")), vec!(2, 4));
assert_eq!(test_query(vec!("a", "b")), vec!(1, 2, 3, 4));
assert_eq!(test_query(vec!("b", "b")), vec!(0, 1));
assert_eq!(test_query(vec!("g", "ewrwer")), vec!());
assert_eq!(test_query(vec!("g", "a")), vec!());
assert_eq!(test_query(vec!("g", "ewrwer")), empty_vec);
assert_eq!(test_query(vec!("g", "a")), empty_vec);
}
}

View File

@@ -11,7 +11,7 @@ use itertools::Itertools;
/// Documents are really just a list of couple `(field, value)`.
/// In this list, one field may appear more than once.
#[derive(Debug, RustcEncodable, RustcDecodable, Default)]
#[derive(Debug, Serialize, Deserialize, Default)]
pub struct Document {
field_values: Vec<FieldValue>,
}

View File

@@ -10,7 +10,7 @@ use common::BinarySerializable;
///
/// Because the field id is a `u8`, tantivy can only have at most `255` fields.
/// Value 255 is reserved.
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)]
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, Serialize, Deserialize)]
pub struct Field(pub u32);
impl BinarySerializable for Field {

View File

@@ -1,10 +1,10 @@
use schema::TextOptions;
use schema::IntOptions;
use rustc_serialize::Decodable;
use rustc_serialize::Decoder;
use rustc_serialize::Encodable;
use rustc_serialize::Encoder;
use std::fmt;
use serde::{Serialize, Deserialize, Serializer, Deserializer};
use serde::ser::SerializeStruct;
use serde::de::{self, Visitor, MapAccess};
use schema::FieldType;
/// A `FieldEntry` represents a field and its configuration.
@@ -94,75 +94,99 @@ impl FieldEntry {
}
}
impl Serialize for FieldEntry {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer
{
let mut s = serializer.serialize_struct("field_entry", 3)?;
s.serialize_field("name", &self.name)?;
impl Encodable for FieldEntry {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("field_entry", 3, |s| {
try!(s.emit_struct_field("name", 0, |s| {
self.name.encode(s)
}));
match self.field_type {
FieldType::Str(ref options) => {
s.emit_struct_field("type", 1, |s| {
s.emit_str("text")
})?;
s.emit_struct_field("options", 2, |s| {
options.encode(s)
})?;
}
FieldType::U64(ref options) => {
s.emit_struct_field("type", 1, |s| {
s.emit_str("u64")
})?;
s.emit_struct_field("options", 2, |s| {
options.encode(s)
})?;
}
FieldType::I64(ref options) => {
s.emit_struct_field("type", 1, |s| {
s.emit_str("i64")
})?;
s.emit_struct_field("options", 2, |s| {
options.encode(s)
})?;
}
match self.field_type {
FieldType::Str(ref options) => {
s.serialize_field("type", "text")?;
s.serialize_field("options", options)?;
},
FieldType::U64(ref options) => {
s.serialize_field("type", "u64")?;
s.serialize_field("options", options)?;
},
FieldType::I64(ref options) => {
s.serialize_field("type", "i64")?;
s.serialize_field("options", options)?;
}
Ok(())
})
}
s.end()
}
}
impl Decodable for FieldEntry {
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
d.read_struct("field_entry", 3, |d| {
let name = try!(d.read_struct_field("name", 0, |d| {
d.read_str()
}));
let field_type: String = try!(d.read_struct_field("type", 1, |d| {
d.read_str()
}));
d.read_struct_field("options", 2, |d| {
match field_type.as_ref() {
"u64" => {
let int_options = try!(IntOptions::decode(d));
Ok(FieldEntry::new_u64(name, int_options))
}
"i64" => {
let int_options = try!(IntOptions::decode(d));
Ok(FieldEntry::new_i64(name, int_options))
}
"text" => {
let text_options = try!(TextOptions::decode(d));
Ok(FieldEntry::new_text(name, text_options))
}
_ => {
Err(d.error(&format!("Field type {:?} unknown", field_type)))
impl<'de> Deserialize<'de> for FieldEntry {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>
{
#[derive(Deserialize)]
#[serde(field_identifier, rename_all = "lowercase")]
enum Field { Name, Type, Options };
const FIELDS: &'static [&'static str] = &["name", "type", "options"];
struct FieldEntryVisitor;
impl<'de> Visitor<'de> for FieldEntryVisitor {
type Value = FieldEntry;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct FieldEntry")
}
fn visit_map<V>(self, mut map: V) -> Result<FieldEntry, V::Error>
where V: MapAccess<'de>
{
let mut name = None;
let mut ty = None;
let mut field_type = None;
while let Some(key) = map.next_key()? {
match key {
Field::Name => {
if name.is_some() {
return Err(de::Error::duplicate_field("name"));
}
name = Some(map.next_value()?);
}
Field::Type => {
if ty.is_some() {
return Err(de::Error::duplicate_field("type"));
}
ty = Some(map.next_value()?);
}
Field::Options => {
match ty {
None => return Err(de::Error::custom("The `type` field must be specified before `options`")),
Some(ty) => {
match ty {
"text" => field_type = Some(FieldType::Str(map.next_value()?)),
"u64" => field_type = Some(FieldType::U64(map.next_value()?)),
"i64" => field_type = Some(FieldType::I64(map.next_value()?)),
_ => return Err(de::Error::custom(format!("Unrecognised type {}", ty)))
}
}
}
}
}
}
})
})
let name = name.ok_or_else(|| de::Error::missing_field("name"))?;
ty.ok_or_else(|| de::Error::missing_field("ty"))?;
let field_type = field_type.ok_or_else(|| de::Error::missing_field("options"))?;
Ok(FieldEntry {
name: name,
field_type: field_type,
})
}
}
deserializer.deserialize_struct("field_entry", FIELDS, FieldEntryVisitor)
}
}
@@ -172,18 +196,31 @@ mod tests {
use super::*;
use schema::TEXT;
use rustc_serialize::json;
use serde_json;
#[test]
fn test_json_serialization() {
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
assert_eq!(format!("{}", json::as_pretty_json(&field_value)), r#"{
let expected = r#"{
"name": "title",
"type": "text",
"options": {
"indexing": "position",
"stored": false
}
}"#);
}"#;
let field_value_json = serde_json::to_string_pretty(&field_value).unwrap();
assert_eq!(expected, &field_value_json);
let field_value: FieldEntry = serde_json::from_str(expected).unwrap();
assert_eq!("title", field_value.name);
match field_value.field_type {
FieldType::Str(_) => assert!(true),
_ => panic!("expected FieldType::Str")
}
}
}

View File

@@ -1,7 +1,6 @@
use schema::TextOptions;
use schema::IntOptions;
use schema::{TextOptions, IntOptions};
use rustc_serialize::json::Json;
use serde_json::Value as JsonValue;
use schema::Value;
@@ -19,7 +18,7 @@ pub enum ValueParsingError {
/// A `FieldType` describes the type (text, u64) of a field as well as
/// how it should be handled by tantivy.
#[derive(Clone, Debug, RustcDecodable, RustcEncodable)]
#[derive(Clone, Debug)]
pub enum FieldType {
/// String field type configuration
Str(TextOptions),
@@ -30,7 +29,7 @@ pub enum FieldType {
}
impl FieldType {
/// returns true iff the field is indexed.
pub fn is_indexed(&self) -> bool {
match self {
@@ -51,9 +50,9 @@ impl FieldType {
/// Tantivy will not try to cast values.
/// For instance, If the json value is the integer `3` and the
/// target field is a `Str`, this method will return an Error.
pub fn value_from_json(&self, json: &Json) -> Result<Value, ValueParsingError> {
pub fn value_from_json(&self, json: &JsonValue) -> Result<Value, ValueParsingError> {
match *json {
Json::String(ref field_text) => {
JsonValue::String(ref field_text) => {
match *self {
FieldType::Str(_) => {
Ok(Value::Str(field_text.clone()))
@@ -63,31 +62,23 @@ impl FieldType {
}
}
}
Json::U64(ref field_val_u64) => {
JsonValue::Number(ref field_val_num) => {
match *self {
FieldType::I64(_) => {
if *field_val_u64 > (i64::max_value() as u64) {
Err(ValueParsingError::OverflowError(format!("Value {:?} is too high for a i64.", field_val_u64)))
if let Some(field_val_i64) = field_val_num.as_i64() {
Ok(Value::I64(field_val_i64))
}
else {
Ok(Value::I64(*field_val_u64 as i64))
Err(ValueParsingError::OverflowError(format!("Expected an i64 int, got {:?}", json)))
}
}
FieldType::U64(_) => {
Ok(Value::U64(*field_val_u64))
}
_ => {
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))
}
}
},
Json::I64(ref field_val_i64) => {
match *self {
FieldType::I64(_) => {
Ok(Value::I64(* field_val_i64))
}
FieldType::U64(_) => {
Err(ValueParsingError::TypeError(format!("Expected a positive integer, got {:?}", json)))
if let Some(field_val_u64) = field_val_num.as_u64() {
Ok(Value::U64(field_val_u64))
}
else {
Err(ValueParsingError::OverflowError(format!("Expected an u64 int, got {:?}", json)))
}
}
FieldType::Str(_) => {
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))

View File

@@ -7,7 +7,7 @@ use schema::Value;
/// `FieldValue` holds together a `Field` and its `Value`.
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, RustcEncodable, RustcDecodable)]
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, Serialize, Deserialize)]
pub struct FieldValue {
field: Field,
value: Value,

View File

@@ -1,7 +1,7 @@
use std::ops::BitOr;
/// Define how a u64 field should be handled by tantivy.
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
/// Define how an int field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct IntOptions {
indexed: bool,
fast: bool,

View File

@@ -1,7 +1,5 @@
use std::collections::BTreeMap;
use schema::Value;
use rustc_serialize::Encodable;
use rustc_serialize::Encoder;
@@ -11,36 +9,5 @@ use rustc_serialize::Encoder;
/// A `NamedFieldDocument` is a simple representation of a document
/// as a `BTreeMap<String, Vec<Value>>`.
///
#[derive(Serialize)]
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
impl Encodable for NamedFieldDocument {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("named_field_document", self.0.len(), |s| {
for (i, (name, vals)) in self.0.iter().enumerate() {
s.emit_struct_field(name, i, |s| {
for (j, val) in vals.iter().enumerate() {
s.emit_seq(vals.len(), |s| {
s.emit_seq_elt(j, |s| {
match *val {
Value::Str(ref text) => {
s.emit_str(text)
},
Value::U64(ref val) => {
s.emit_u64(*val)
}
Value::I64(ref val) => {
s.emit_i64(*val)
}
}
})
})?;
}
Ok(())
})?;
}
Ok(())
})
}
}

View File

@@ -1,14 +1,12 @@
use std::collections::HashMap;
use rustc_serialize::Decodable;
use rustc_serialize::Encodable;
use rustc_serialize::Decoder;
use rustc_serialize::Encoder;
use rustc_serialize::json;
use rustc_serialize::json::Json;
use std::collections::BTreeMap;
use schema::field_type::ValueParsingError;
use std::sync::Arc;
use serde_json::{self, Value as JsonValue, Map as JsonObject};
use serde::{Serialize, Serializer, Deserialize, Deserializer};
use serde::ser::SerializeSeq;
use serde::de::{Visitor, SeqAccess};
use super::*;
use std::fmt;
@@ -215,14 +213,12 @@ impl Schema {
///
/// Encoding a document cannot fail.
pub fn to_json(&self, doc: &Document) -> String {
json::encode(&self.to_named_doc(doc)).unwrap()
serde_json::to_string(&self.to_named_doc(doc)).expect("doc encoding failed. This is a bug")
}
/// Build a document object from a json-object.
pub fn parse_document(&self, doc_json: &str) -> Result<Document, DocParsingError> {
let json_node = try!(Json::from_str(doc_json));
let some_json_obj = json_node.as_object();
if !some_json_obj.is_some() {
let json_obj: JsonObject<String, JsonValue> = serde_json::from_str(doc_json).map_err(|_| {
let doc_json_sample: String =
if doc_json.len() < 20 {
String::from(doc_json)
@@ -230,9 +226,9 @@ impl Schema {
else {
format!("{:?}...", &doc_json[0..20])
};
return Err(DocParsingError::NotJSONObject(doc_json_sample))
}
let json_obj = some_json_obj.unwrap();
DocParsingError::NotJSON(doc_json_sample)
})?;
let mut doc = Document::default();
for (field_name, json_value) in json_obj.iter() {
match self.get_field(field_name) {
@@ -240,7 +236,7 @@ impl Schema {
let field_entry = self.get_field_entry(field);
let field_type = field_entry.field_type();
match *json_value {
Json::Array(ref json_items) => {
JsonValue::Array(ref json_items) => {
for json_item in json_items {
let value = try!(
field_type
@@ -276,30 +272,50 @@ impl fmt::Debug for Schema {
}
}
impl Decodable for Schema {
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
let mut schema_builder = SchemaBuilder::default();
try!(d.read_seq(|d, num_fields| {
for _ in 0..num_fields {
let field_entry = try!(FieldEntry::decode(d));
schema_builder.add_field(field_entry);
}
Ok(())
}));
Ok(schema_builder.build())
impl Serialize for Schema {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer
{
let mut seq = serializer.serialize_seq(Some(self.0.fields.len()))?;
for e in &self.0.fields {
seq.serialize_element(e)?;
}
seq.end()
}
}
impl Encodable for Schema {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
try!(s.emit_seq(self.0.fields.len(),
|mut e| {
for (ord, field) in self.0.fields.iter().enumerate() {
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
impl<'de> Deserialize<'de> for Schema
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>
{
struct SchemaVisitor;
impl<'de> Visitor<'de> for SchemaVisitor
{
type Value = Schema;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct Schema")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where A: SeqAccess<'de>
{
let mut schema = SchemaBuilder {
fields: Vec::with_capacity(seq.size_hint().unwrap_or(0)),
fields_map: HashMap::with_capacity(seq.size_hint().unwrap_or(0)),
};
while let Some(value) = seq.next_element()? {
schema.add_field(value);
}
Ok(())
}));
Ok(())
Ok(schema.build())
}
}
deserializer.deserialize_map(SchemaVisitor)
}
}
@@ -319,28 +335,19 @@ impl From<SchemaBuilder> for Schema {
#[derive(Debug)]
pub enum DocParsingError {
/// The payload given is not valid JSON.
NotJSON(json::ParserError),
/// The payload given is not a JSON Object (`{...}`).
NotJSONObject(String),
NotJSON(String),
/// One of the value node could not be parsed.
ValueError(String, ValueParsingError),
/// The json-document contains a field that is not declared in the schema.
NoSuchFieldInSchema(String),
}
impl From<json::ParserError> for DocParsingError {
fn from(err: json::ParserError) -> DocParsingError {
DocParsingError::NotJSON(err)
}
}
#[cfg(test)]
mod tests {
use schema::*;
use rustc_serialize::json;
use serde_json;
use schema::field_type::ValueParsingError;
use schema::schema::DocParsingError::NotJSON;
@@ -348,11 +355,13 @@ mod tests {
pub fn test_schema_serialization() {
let mut schema_builder = SchemaBuilder::default();
let count_options = IntOptions::default().set_stored().set_fast();
let popularity_options = IntOptions::default().set_stored().set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u64_field("count", count_options);
schema_builder.add_i64_field("popularity", popularity_options);
let schema = schema_builder.build();
let schema_json: String = format!("{}", json::as_pretty_json(&schema));
let schema_json = serde_json::to_string_pretty(&schema).unwrap();
let expected = r#"[
{
"name": "title",
@@ -378,10 +387,29 @@ mod tests {
"fast": true,
"stored": true
}
},
{
"name": "popularity",
"type": "i64",
"options": {
"indexed": false,
"fast": true,
"stored": true
}
}
]"#;
println!("{}", schema_json);
println!("{}", expected);
assert_eq!(schema_json, expected);
let schema: Schema = serde_json::from_str(expected).unwrap();
let mut fields = schema.fields().iter();
assert_eq!("title", fields.next().unwrap().name());
assert_eq!("author", fields.next().unwrap().name());
assert_eq!("count", fields.next().unwrap().name());
assert_eq!("popularity", fields.next().unwrap().name());
}
@@ -400,6 +428,7 @@ mod tests {
"count": 4
}"#;
let doc = schema.parse_document(doc_json).unwrap();
let doc_serdeser = schema.parse_document(&schema.to_json(&doc)).unwrap();
assert_eq!(doc, doc_serdeser);
}
@@ -408,9 +437,11 @@ mod tests {
pub fn test_parse_document() {
let mut schema_builder = SchemaBuilder::default();
let count_options = IntOptions::default().set_stored().set_fast();
let popularity_options = IntOptions::default().set_stored().set_fast();
let title_field = schema_builder.add_text_field("title", TEXT);
let author_field = schema_builder.add_text_field("author", STRING);
let count_field = schema_builder.add_u64_field("count", count_options);
let popularity_field = schema_builder.add_i64_field("popularity", popularity_options);
let schema = schema_builder.build();
{
let doc = schema.parse_document("{}").unwrap();
@@ -420,32 +451,20 @@ mod tests {
let doc = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 4
"count": 4,
"popularity": 10
}"#).unwrap();
assert_eq!(doc.get_first(title_field).unwrap().text(), "my title");
assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton");
assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4);
}
{
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton"
"count": 4
}"#);
match json_err {
Err(DocParsingError::NotJSON(__)) => {
assert!(true);
}
_ => {
assert!(false);
}
}
assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10);
}
{
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 4,
"popularity": 10,
"jambon": "bayonne"
}"#);
match json_err {
@@ -453,7 +472,7 @@ mod tests {
assert_eq!(field_name, "jambon");
}
_ => {
assert!(false);
panic!("expected additional field 'jambon' to fail but didn't");
}
}
}
@@ -462,6 +481,7 @@ mod tests {
"title": "my title",
"author": "fulmicoton",
"count": "5",
"popularity": "10",
"jambon": "bayonne"
}"#);
match json_err {
@@ -469,7 +489,7 @@ mod tests {
assert!(true);
}
_ => {
assert!(false);
panic!("expected string of 5 to fail but didn't");
}
}
}
@@ -477,26 +497,28 @@ mod tests {
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": -5
}"#);
match json_err {
Err(DocParsingError::ValueError(_, ValueParsingError::TypeError(_))) => {
assert!(true);
}
_ => {
assert!(false);
}
}
}
{
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 5000000000
"count": -5,
"popularity": 10
}"#);
match json_err {
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
assert!(false);
assert!(true);
}
_ => {
panic!("expected -5 to fail but didn't");
}
}
}
{
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 9223372036854775808,
"popularity": 10
}"#);
match json_err {
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
panic!("expected 9223372036854775808 to fit into u64, but it didn't");
}
_ => {
assert!(true);
@@ -507,14 +529,30 @@ mod tests {
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 50000000000000000000
"count": 50,
"popularity": 9223372036854775808
}"#);
match json_err {
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
assert!(true);
},
_ => {
panic!("expected 9223372036854775808 to overflow i64, but it didn't");
}
}
}
{
let json_err = schema.parse_document(r#"{
"title": "my title",
"author": "fulmicoton",
"count": 50,
}"#);
match json_err {
Err(NotJSON(_)) => {
assert!(true);
}
},
_ => {
assert!(false)
panic!("expected invalid JSON to fail parsing, but it didn't");
}
}
}

View File

@@ -1,12 +1,8 @@
use std::ops::BitOr;
use rustc_serialize::Decodable;
use rustc_serialize::Decoder;
use rustc_serialize::Encodable;
use rustc_serialize::Encoder;
/// Define how a text field should be handled by tantivy.
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
#[derive(Clone,Debug,PartialEq,Eq, Serialize, Deserialize)]
pub struct TextOptions {
indexing: TextIndexingOptions,
stored: bool,
@@ -51,9 +47,10 @@ impl Default for TextOptions {
/// Describe how a field should be indexed
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash)]
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash, Serialize, Deserialize)]
pub enum TextIndexingOptions {
/// Unindexed fields will not generate any postings. They will not be searchable either.
#[serde(rename="unindexed")]
Unindexed,
/// Untokenized means that the field text will not be split into tokens before being indexed.
/// A field with the value "Hello world", will have the document suscribe to one single
@@ -61,62 +58,26 @@ pub enum TextIndexingOptions {
///
/// It will **not** be searchable if the user enter "hello" for instance.
/// This can be useful for tags, or ids for instance.
#[serde(rename="untokenized")]
Untokenized,
/// TokenizedNoFreq will tokenize the field value, and append the document doc id
/// to the posting lists associated to all of the tokens.
/// The frequence of appearance of the term in the document however will be lost.
/// The term frequency used in the TfIdf formula will always be 1.
#[serde(rename="tokenize")]
TokenizedNoFreq,
/// TokenizedWithFreq will tokenize the field value, and encode
/// both the docid and the term frequency in the posting lists associated to all
#[serde(rename="freq")]
// of the tokens.
TokenizedWithFreq,
/// Like TokenizedWithFreq, but also encodes the positions of the
/// terms in a separate file. This option is required for phrase queries.
/// Don't use this if you are certain you won't need it, the term positions file can be very big.
#[serde(rename="position")]
TokenizedWithFreqAndPosition,
}
impl Encodable for TextIndexingOptions {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
let name = match *self {
TextIndexingOptions::Unindexed => {
"unindexed"
}
TextIndexingOptions::Untokenized => {
"untokenized"
}
TextIndexingOptions::TokenizedNoFreq => {
"tokenize"
}
TextIndexingOptions::TokenizedWithFreq => {
"freq"
}
TextIndexingOptions::TokenizedWithFreqAndPosition => {
"position"
}
};
s.emit_str(name)
}
}
impl Decodable for TextIndexingOptions {
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
use self::TextIndexingOptions::*;
let option_name: String = try!(d.read_str());
Ok(match option_name.as_ref() {
"unindexed" => Unindexed,
"untokenized" => Untokenized,
"tokenize" => TokenizedNoFreq,
"freq" => TokenizedWithFreq,
"position" => TokenizedWithFreqAndPosition,
_ => {
return Err(d.error(&format!("Encoding option {:?} unknown", option_name)));
}
})
}
}
impl TextIndexingOptions {
/// Returns true iff the term frequency will be encoded.

View File

@@ -1,12 +1,10 @@
use common::BinarySerializable;
use std::io;
use std::io::Write;
use std::io::Read;
use std::fmt;
use serde::{Serialize, Serializer, Deserialize, Deserializer};
use serde::de::Visitor;
/// Value represents the value of a any field.
/// It is an enum over all over all of the possible field type.
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable)]
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub enum Value {
/// The str type is used for any text information.
Str(String),
@@ -16,6 +14,54 @@ pub enum Value {
I64(i64)
}
impl Serialize for Value {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer
{
match *self {
Value::Str(ref v) => serializer.serialize_str(v),
Value::U64(u) => serializer.serialize_u64(u),
Value::I64(u) => serializer.serialize_i64(u),
}
}
}
impl<'de> Deserialize<'de> for Value
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>
{
struct ValueVisitor;
impl<'de> Visitor<'de> for ValueVisitor
{
type Value = Value;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string or u32")
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> {
Ok(Value::U64(v))
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> {
Ok(Value::I64(v))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
Ok(Value::Str(v.to_owned()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
Ok(Value::Str(v))
}
}
deserializer.deserialize_any(ValueVisitor)
}
}
impl Value {
/// Returns the text value, provided the value is of the `Str` type.
///
@@ -88,48 +134,53 @@ impl<'a> From<&'a str> for Value {
}
}
const TEXT_CODE: u8 = 0;
const U64_CODE: u8 = 1;
const I64_CODE: u8 = 2;
mod binary_serialize {
use common::BinarySerializable;
use std::io::{self, Read, Write};
use super::Value;
const TEXT_CODE: u8 = 0;
const U64_CODE: u8 = 1;
const I64_CODE: u8 = 2;
impl BinarySerializable for Value {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut written_size = 0;
match *self {
Value::Str(ref text) => {
written_size += try!(TEXT_CODE.serialize(writer));
written_size += try!(text.serialize(writer));
},
Value::U64(ref val) => {
written_size += try!(U64_CODE.serialize(writer));
written_size += try!(val.serialize(writer));
},
Value::I64(ref val) => {
written_size += try!(I64_CODE.serialize(writer));
written_size += try!(val.serialize(writer));
},
impl BinarySerializable for Value {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut written_size = 0;
match *self {
Value::Str(ref text) => {
written_size += try!(TEXT_CODE.serialize(writer));
written_size += try!(text.serialize(writer));
},
Value::U64(ref val) => {
written_size += try!(U64_CODE.serialize(writer));
written_size += try!(val.serialize(writer));
},
Value::I64(ref val) => {
written_size += try!(I64_CODE.serialize(writer));
written_size += try!(val.serialize(writer));
},
}
Ok(written_size)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let type_code = try!(u8::deserialize(reader));
match type_code {
TEXT_CODE => {
let text = try!(String::deserialize(reader));
Ok(Value::Str(text))
}
U64_CODE => {
let value = try!(u64::deserialize(reader));
Ok(Value::U64(value))
}
I64_CODE => {
let value = try!(i64::deserialize(reader));
Ok(Value::I64(value))
}
_ => {
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
}
}
}
Ok(written_size)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let type_code = try!(u8::deserialize(reader));
match type_code {
TEXT_CODE => {
let text = try!(String::deserialize(reader));
Ok(Value::Str(text))
}
U64_CODE => {
let value = try!(u64::deserialize(reader));
Ok(Value::U64(value))
}
I64_CODE => {
let value = try!(i64::deserialize(reader));
Ok(Value::I64(value))
}
_ => {
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
}
}
}
}