diff --git a/CHANGELOG.md b/CHANGELOG.md index de66f2d23..3478e57ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ Tantivy 0.4.0 ========================== - Removed u32 fields. They are replaced by u64 and i64 fields (#65) +- Replacing rustc_serialize by serde. Kudos to @KodrAus and @lnicola - QueryParser: - Explicit error returned when searched for a term that is not indexed - Searching for a int term via the query parser was broken `(age:1)` diff --git a/Cargo.toml b/Cargo.toml index 274c442b8..281f0fda1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,18 +20,20 @@ regex = "0.2" fst = "0.1.37" atomicwrites = "0.1.3" tempfile = "2.1" -rustc-serialize = "0.3" log = "0.3.6" combine = "2.2" tempdir = "0.3" -bincode = "0.5" +serde = "1.0" +serde_derive = "1.0" +serde_json = "1.0" +bincode = "0.7.0-alpha7" libc = {version = "0.2.20", optional=true} num_cpus = "1.2" itertools = "0.5.9" lz4 = "1.20" bit-set = "0.4.0" time = "0.1" -uuid = { version = "0.4", features = ["v4", "rustc-serialize"] } +uuid = { version = "0.5", features = ["v4", "serde"] } chan = "0.1" version = "2" crossbeam = "0.2" diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 430d7abf0..821462afd 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -1,4 +1,3 @@ -extern crate rustc_serialize; extern crate tantivy; extern crate tempdir; diff --git a/src/common/timer.rs b/src/common/timer.rs index 3f3950422..e28d1af6b 100644 --- a/src/common/timer.rs +++ b/src/common/timer.rs @@ -33,7 +33,7 @@ impl<'a> Drop for OpenTimer<'a> { } /// Timing recording -#[derive(Debug, RustcEncodable)] +#[derive(Debug, Serialize)] pub struct Timing { name: &'static str, duration: i64, @@ -41,7 +41,7 @@ pub struct Timing { } /// Timer tree -#[derive(Debug, RustcEncodable)] +#[derive(Debug, Serialize)] pub struct TimerTree { timings: Vec, } diff --git a/src/core/index.rs b/src/core/index.rs index cb97ba569..0210f5ad0 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -1,10 +1,10 @@ use Result; use Error; +use serde_json; use schema::Schema; use std::sync::Arc; use std::borrow::BorrowMut; use std::fmt; -use rustc_serialize::json; use core::SegmentId; use directory::{Directory, MmapDirectory, RAMDirectory}; use indexer::index_writer::open_index_writer; @@ -29,7 +29,7 @@ const NUM_SEARCHERS: usize = 12; fn load_metas(directory: &Directory) -> Result { let meta_data = directory.atomic_read(&META_FILEPATH)?; let meta_string = String::from_utf8_lossy(&meta_data); - json::decode(&meta_string) + serde_json::from_str(&meta_string) .map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e))) } diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index 8a0274b4e..849d21041 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -9,7 +9,7 @@ use core::SegmentMeta; /// * the index docstamp /// * the schema /// -#[derive(Clone,Debug,RustcDecodable,RustcEncodable)] +#[derive(Clone,Debug,Serialize, Deserialize)] pub struct IndexMeta { pub segments: Vec, pub schema: Schema, diff --git a/src/core/segment_id.rs b/src/core/segment_id.rs index 9e3a75d3d..a0914ceb4 100644 --- a/src/core/segment_id.rs +++ b/src/core/segment_id.rs @@ -1,6 +1,5 @@ use uuid::Uuid; use std::fmt; -use rustc_serialize::{Encoder, Decoder, Encodable, Decodable}; use std::cmp::{Ordering, Ord}; #[cfg(test)] @@ -14,7 +13,7 @@ use std::sync::atomic; /// /// In unit test, for reproducability, the SegmentId are /// simply generated in an autoincrement fashion. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct SegmentId(Uuid); @@ -65,18 +64,6 @@ impl SegmentId { } } -impl Encodable for SegmentId { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - self.0.encode(s) - } -} - -impl Decodable for SegmentId { - fn decode(d: &mut D) -> Result { - Uuid::decode(d).map(SegmentId) - } -} - impl fmt::Debug for SegmentId { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Seg({:?})", self.short_uuid_string()) diff --git a/src/core/segment_meta.rs b/src/core/segment_meta.rs index 9716c348b..1a91123ca 100644 --- a/src/core/segment_meta.rs +++ b/src/core/segment_meta.rs @@ -3,7 +3,7 @@ use super::SegmentComponent; use std::path::PathBuf; use std::collections::HashSet; -#[derive(Clone, Debug, RustcDecodable,RustcEncodable)] +#[derive(Clone, Debug, Serialize, Deserialize)] struct DeleteMeta { num_deleted_docs: u32, opstamp: u64, @@ -13,7 +13,7 @@ struct DeleteMeta { /// /// For instance the number of docs it contains, /// how many are deleted, etc. -#[derive(Clone, Debug, RustcDecodable,RustcEncodable)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct SegmentMeta { segment_id: SegmentId, max_doc: u32, diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index 49c41fa49..18631e3a7 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -1,4 +1,5 @@ use std::path::{Path, PathBuf}; +use serde_json; use directory::error::{OpenReadError, DeleteError, OpenWriteError}; use directory::{ReadOnlySource, WritePtr}; use std::result; @@ -7,7 +8,6 @@ use Directory; use std::sync::{Arc, RwLock}; use std::collections::HashSet; use std::io::Write; -use rustc_serialize::json; use core::MANAGED_FILEPATH; use std::collections::HashMap; use std::fmt; @@ -74,7 +74,7 @@ impl ManagedDirectory { match directory.atomic_read(&MANAGED_FILEPATH) { Ok(data) => { let managed_files_json = String::from_utf8_lossy(&data); - let managed_files: HashSet = json::decode(&managed_files_json) + let managed_files: HashSet = serde_json::from_str(&managed_files_json) .map_err(|e| Error::CorruptedFile(MANAGED_FILEPATH.clone(), Box::new(e)))?; Ok(ManagedDirectory { directory: box directory, @@ -204,8 +204,8 @@ impl ManagedDirectory { .expect("Managed file lock poisoned"); managed_paths = meta_informations_rlock.managed_paths.clone(); } - let mut w = vec!(); - try!(write!(&mut w, "{}\n", json::as_pretty_json(&managed_paths))); + let mut w = try!(serde_json::to_vec(&managed_paths)); + try!(write!(&mut w, "\n")); self.directory.atomic_write(&MANAGED_FILEPATH, &w[..])?; Ok(()) } diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 2437ceb35..25c76a0e9 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -53,7 +53,7 @@ fn open_mmap(full_path: &PathBuf) -> result::Result>, OpenReadE } -#[derive(Default,Clone,Debug,RustcDecodable,RustcEncodable)] +#[derive(Default,Clone,Debug,Serialize,Deserialize)] pub struct CacheCounters { // Number of time the cache prevents to call `mmap` pub hit: usize, @@ -65,7 +65,7 @@ pub struct CacheCounters { pub miss_weak: usize, } -#[derive(Clone,Debug,RustcDecodable,RustcEncodable)] +#[derive(Clone,Debug,Serialize,Deserialize)] pub struct CacheInfo { pub counters: CacheCounters, pub mmapped: Vec, diff --git a/src/error.rs b/src/error.rs index b897cb262..aacc14653 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,7 +11,7 @@ use directory::error::{OpenReadError, OpenWriteError, OpenDirectoryError}; use query; use schema; use fastfield::FastFieldNotAvailableError; - +use serde_json; /// Generic tantivy error. @@ -101,3 +101,9 @@ impl From for Error { } } } + +impl From for Error { + fn from(error: serde_json::Error) -> Error { + Error::IOError(error.into()) + } +} \ No newline at end of file diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index e638067c2..e4307bba2 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -440,6 +440,8 @@ mod tests { let score_field = schema_builder.add_u64_field("score", score_fieldtype); let index = Index::create_in_ram(schema_builder.build()); let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + + let empty_vec = Vec::::new(); { // a first commit index_writer.add_document( @@ -502,11 +504,11 @@ mod tests { assert_eq!(searcher.segment_readers()[0].max_doc(), 3); assert_eq!(searcher.segment_readers()[1].num_docs(), 2); assert_eq!(searcher.segment_readers()[1].max_doc(), 4); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3)); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); @@ -529,11 +531,11 @@ mod tests { assert_eq!(searcher.num_docs(), 3); assert_eq!(searcher.segment_readers()[0].num_docs(), 3); assert_eq!(searcher.segment_readers()[0].max_doc(), 3); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3)); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap(); @@ -551,11 +553,11 @@ mod tests { assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].max_doc(), 3); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap(); @@ -574,11 +576,11 @@ mod tests { assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].max_doc(), 2); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!()); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap(); diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index ea68a5664..3ec06ff3f 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -23,7 +23,7 @@ use indexer::SegmentEntry; use indexer::SegmentSerializer; use Result; use futures_cpupool::CpuFuture; -use rustc_serialize::json; +use serde_json; use indexer::delete_queue::DeleteCursor; use schema::Schema; use std::borrow::BorrowMut; @@ -77,10 +77,10 @@ pub fn save_metas(segment_metas: Vec, schema: schema, opstamp: opstamp, }; - let mut w = vec!(); - try!(write!(&mut w, "{}\n", json::as_pretty_json(&metas))); + let mut w = try!(serde_json::to_vec(&metas)); + try!(write!(&mut w, "\n")); let res = directory.atomic_write(&META_FILEPATH, &w[..])?; - debug!("Saved metas {}", json::as_pretty_json(&metas)); + debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas)); Ok(res) } diff --git a/src/lib.rs b/src/lib.rs index c11397465..7f889691d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,9 @@ #[macro_use] extern crate lazy_static; +#[macro_use] +extern crate serde_derive; + #[macro_use] extern crate log; @@ -35,10 +38,11 @@ extern crate byteorder; extern crate memmap; extern crate regex; extern crate tempfile; -extern crate rustc_serialize; extern crate atomicwrites; extern crate tempdir; +extern crate serde; extern crate bincode; +extern crate serde_json; extern crate time; extern crate lz4; extern crate uuid; diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index fc1a33821..c03c0fbc4 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -102,7 +102,7 @@ mod tests { } { let boolean_query = BooleanQuery::from(vec![(Occur::MustNot, make_term_query("d")),]); - assert_eq!(matching_docs(&boolean_query), Vec::new()); + assert_eq!(matching_docs(&boolean_query), Vec::::new()); } } diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index 4c9d1dca0..05765149b 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -60,11 +60,14 @@ mod tests { searcher.search(&phrase_query, &mut test_collector).expect("search should succeed"); test_collector.docs() }; + + let empty_vec = Vec::::new(); + assert_eq!(test_query(vec!("a", "b", "c")), vec!(2, 4)); assert_eq!(test_query(vec!("a", "b")), vec!(1, 2, 3, 4)); assert_eq!(test_query(vec!("b", "b")), vec!(0, 1)); - assert_eq!(test_query(vec!("g", "ewrwer")), vec!()); - assert_eq!(test_query(vec!("g", "a")), vec!()); + assert_eq!(test_query(vec!("g", "ewrwer")), empty_vec); + assert_eq!(test_query(vec!("g", "a")), empty_vec); } } diff --git a/src/schema/document.rs b/src/schema/document.rs index fa8a65397..6496a1889 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -11,7 +11,7 @@ use itertools::Itertools; /// Documents are really just a list of couple `(field, value)`. /// In this list, one field may appear more than once. -#[derive(Debug, RustcEncodable, RustcDecodable, Default)] +#[derive(Debug, Serialize, Deserialize, Default)] pub struct Document { field_values: Vec, } diff --git a/src/schema/field.rs b/src/schema/field.rs index 26d8da366..650ac0fd2 100644 --- a/src/schema/field.rs +++ b/src/schema/field.rs @@ -10,7 +10,7 @@ use common::BinarySerializable; /// /// Because the field id is a `u8`, tantivy can only have at most `255` fields. /// Value 255 is reserved. -#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)] +#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, Serialize, Deserialize)] pub struct Field(pub u32); impl BinarySerializable for Field { diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index ad2292248..45e34489c 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -1,10 +1,10 @@ use schema::TextOptions; use schema::IntOptions; -use rustc_serialize::Decodable; -use rustc_serialize::Decoder; -use rustc_serialize::Encodable; -use rustc_serialize::Encoder; +use std::fmt; +use serde::{Serialize, Deserialize, Serializer, Deserializer}; +use serde::ser::SerializeStruct; +use serde::de::{self, Visitor, MapAccess}; use schema::FieldType; /// A `FieldEntry` represents a field and its configuration. @@ -94,75 +94,99 @@ impl FieldEntry { } } +impl Serialize for FieldEntry { + fn serialize(&self, serializer: S) -> Result + where S: Serializer + { + let mut s = serializer.serialize_struct("field_entry", 3)?; + s.serialize_field("name", &self.name)?; - -impl Encodable for FieldEntry { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_struct("field_entry", 3, |s| { - try!(s.emit_struct_field("name", 0, |s| { - self.name.encode(s) - })); - match self.field_type { - FieldType::Str(ref options) => { - s.emit_struct_field("type", 1, |s| { - s.emit_str("text") - })?; - s.emit_struct_field("options", 2, |s| { - options.encode(s) - })?; - } - FieldType::U64(ref options) => { - s.emit_struct_field("type", 1, |s| { - s.emit_str("u64") - })?; - s.emit_struct_field("options", 2, |s| { - options.encode(s) - })?; - } - FieldType::I64(ref options) => { - s.emit_struct_field("type", 1, |s| { - s.emit_str("i64") - })?; - s.emit_struct_field("options", 2, |s| { - options.encode(s) - })?; - } + match self.field_type { + FieldType::Str(ref options) => { + s.serialize_field("type", "text")?; + s.serialize_field("options", options)?; + }, + FieldType::U64(ref options) => { + s.serialize_field("type", "u64")?; + s.serialize_field("options", options)?; + }, + FieldType::I64(ref options) => { + s.serialize_field("type", "i64")?; + s.serialize_field("options", options)?; } - - Ok(()) - }) + } + + s.end() } } -impl Decodable for FieldEntry { - fn decode(d: &mut D) -> Result { - d.read_struct("field_entry", 3, |d| { - let name = try!(d.read_struct_field("name", 0, |d| { - d.read_str() - })); - let field_type: String = try!(d.read_struct_field("type", 1, |d| { - d.read_str() - })); - d.read_struct_field("options", 2, |d| { - match field_type.as_ref() { - "u64" => { - let int_options = try!(IntOptions::decode(d)); - Ok(FieldEntry::new_u64(name, int_options)) - } - "i64" => { - let int_options = try!(IntOptions::decode(d)); - Ok(FieldEntry::new_i64(name, int_options)) - } - "text" => { - let text_options = try!(TextOptions::decode(d)); - Ok(FieldEntry::new_text(name, text_options)) - } - _ => { - Err(d.error(&format!("Field type {:?} unknown", field_type))) +impl<'de> Deserialize<'de> for FieldEntry { + fn deserialize(deserializer: D) -> Result + where D: Deserializer<'de> + { + #[derive(Deserialize)] + #[serde(field_identifier, rename_all = "lowercase")] + enum Field { Name, Type, Options }; + + const FIELDS: &'static [&'static str] = &["name", "type", "options"]; + + struct FieldEntryVisitor; + + impl<'de> Visitor<'de> for FieldEntryVisitor { + type Value = FieldEntry; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("struct FieldEntry") + } + + fn visit_map(self, mut map: V) -> Result + where V: MapAccess<'de> + { + let mut name = None; + let mut ty = None; + let mut field_type = None; + while let Some(key) = map.next_key()? { + match key { + Field::Name => { + if name.is_some() { + return Err(de::Error::duplicate_field("name")); + } + name = Some(map.next_value()?); + } + Field::Type => { + if ty.is_some() { + return Err(de::Error::duplicate_field("type")); + } + ty = Some(map.next_value()?); + } + Field::Options => { + match ty { + None => return Err(de::Error::custom("The `type` field must be specified before `options`")), + Some(ty) => { + match ty { + "text" => field_type = Some(FieldType::Str(map.next_value()?)), + "u64" => field_type = Some(FieldType::U64(map.next_value()?)), + "i64" => field_type = Some(FieldType::I64(map.next_value()?)), + _ => return Err(de::Error::custom(format!("Unrecognised type {}", ty))) + } + } + } + } } } - }) - }) + + let name = name.ok_or_else(|| de::Error::missing_field("name"))?; + ty.ok_or_else(|| de::Error::missing_field("ty"))?; + let field_type = field_type.ok_or_else(|| de::Error::missing_field("options"))?; + + Ok(FieldEntry { + name: name, + field_type: field_type, + }) + } + } + + deserializer.deserialize_struct("field_entry", FIELDS, FieldEntryVisitor) } } @@ -172,18 +196,31 @@ mod tests { use super::*; use schema::TEXT; - use rustc_serialize::json; + use serde_json; #[test] fn test_json_serialization() { let field_value = FieldEntry::new_text(String::from("title"), TEXT); - assert_eq!(format!("{}", json::as_pretty_json(&field_value)), r#"{ + + let expected = r#"{ "name": "title", "type": "text", "options": { "indexing": "position", "stored": false } -}"#); +}"#; + let field_value_json = serde_json::to_string_pretty(&field_value).unwrap(); + + assert_eq!(expected, &field_value_json); + + let field_value: FieldEntry = serde_json::from_str(expected).unwrap(); + + assert_eq!("title", field_value.name); + + match field_value.field_type { + FieldType::Str(_) => assert!(true), + _ => panic!("expected FieldType::Str") + } } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index de477975c..70f6509f8 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -1,7 +1,6 @@ -use schema::TextOptions; -use schema::IntOptions; +use schema::{TextOptions, IntOptions}; -use rustc_serialize::json::Json; +use serde_json::Value as JsonValue; use schema::Value; @@ -19,7 +18,7 @@ pub enum ValueParsingError { /// A `FieldType` describes the type (text, u64) of a field as well as /// how it should be handled by tantivy. -#[derive(Clone, Debug, RustcDecodable, RustcEncodable)] +#[derive(Clone, Debug)] pub enum FieldType { /// String field type configuration Str(TextOptions), @@ -30,7 +29,7 @@ pub enum FieldType { } impl FieldType { - + /// returns true iff the field is indexed. pub fn is_indexed(&self) -> bool { match self { @@ -51,9 +50,9 @@ impl FieldType { /// Tantivy will not try to cast values. /// For instance, If the json value is the integer `3` and the /// target field is a `Str`, this method will return an Error. - pub fn value_from_json(&self, json: &Json) -> Result { + pub fn value_from_json(&self, json: &JsonValue) -> Result { match *json { - Json::String(ref field_text) => { + JsonValue::String(ref field_text) => { match *self { FieldType::Str(_) => { Ok(Value::Str(field_text.clone())) @@ -63,31 +62,23 @@ impl FieldType { } } } - Json::U64(ref field_val_u64) => { + JsonValue::Number(ref field_val_num) => { match *self { FieldType::I64(_) => { - if *field_val_u64 > (i64::max_value() as u64) { - Err(ValueParsingError::OverflowError(format!("Value {:?} is too high for a i64.", field_val_u64))) + if let Some(field_val_i64) = field_val_num.as_i64() { + Ok(Value::I64(field_val_i64)) } else { - Ok(Value::I64(*field_val_u64 as i64)) + Err(ValueParsingError::OverflowError(format!("Expected an i64 int, got {:?}", json))) } } FieldType::U64(_) => { - Ok(Value::U64(*field_val_u64)) - } - _ => { - Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json))) - } - } - }, - Json::I64(ref field_val_i64) => { - match *self { - FieldType::I64(_) => { - Ok(Value::I64(* field_val_i64)) - } - FieldType::U64(_) => { - Err(ValueParsingError::TypeError(format!("Expected a positive integer, got {:?}", json))) + if let Some(field_val_u64) = field_val_num.as_u64() { + Ok(Value::U64(field_val_u64)) + } + else { + Err(ValueParsingError::OverflowError(format!("Expected an u64 int, got {:?}", json))) + } } FieldType::Str(_) => { Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json))) diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index 353c44ecf..594172daf 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -7,7 +7,7 @@ use schema::Value; /// `FieldValue` holds together a `Field` and its `Value`. -#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, RustcEncodable, RustcDecodable)] +#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, Serialize, Deserialize)] pub struct FieldValue { field: Field, value: Value, diff --git a/src/schema/int_options.rs b/src/schema/int_options.rs index a599c89a3..2f4812e13 100644 --- a/src/schema/int_options.rs +++ b/src/schema/int_options.rs @@ -1,7 +1,7 @@ use std::ops::BitOr; -/// Define how a u64 field should be handled by tantivy. -#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)] +/// Define how an int field should be handled by tantivy. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct IntOptions { indexed: bool, fast: bool, diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 49d6b0c03..3a28be243 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -1,7 +1,5 @@ use std::collections::BTreeMap; use schema::Value; -use rustc_serialize::Encodable; -use rustc_serialize::Encoder; @@ -11,36 +9,5 @@ use rustc_serialize::Encoder; /// A `NamedFieldDocument` is a simple representation of a document /// as a `BTreeMap>`. /// +#[derive(Serialize)] pub struct NamedFieldDocument(pub BTreeMap>); - - -impl Encodable for NamedFieldDocument { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_struct("named_field_document", self.0.len(), |s| { - for (i, (name, vals)) in self.0.iter().enumerate() { - s.emit_struct_field(name, i, |s| { - for (j, val) in vals.iter().enumerate() { - s.emit_seq(vals.len(), |s| { - s.emit_seq_elt(j, |s| { - match *val { - Value::Str(ref text) => { - s.emit_str(text) - }, - Value::U64(ref val) => { - s.emit_u64(*val) - } - Value::I64(ref val) => { - s.emit_i64(*val) - } - } - }) - })?; - } - Ok(()) - - })?; - } - Ok(()) - }) - } -} diff --git a/src/schema/schema.rs b/src/schema/schema.rs index b1b146c50..3bfd803e0 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -1,14 +1,12 @@ use std::collections::HashMap; - -use rustc_serialize::Decodable; -use rustc_serialize::Encodable; -use rustc_serialize::Decoder; -use rustc_serialize::Encoder; -use rustc_serialize::json; -use rustc_serialize::json::Json; use std::collections::BTreeMap; use schema::field_type::ValueParsingError; use std::sync::Arc; + +use serde_json::{self, Value as JsonValue, Map as JsonObject}; +use serde::{Serialize, Serializer, Deserialize, Deserializer}; +use serde::ser::SerializeSeq; +use serde::de::{Visitor, SeqAccess}; use super::*; use std::fmt; @@ -215,14 +213,12 @@ impl Schema { /// /// Encoding a document cannot fail. pub fn to_json(&self, doc: &Document) -> String { - json::encode(&self.to_named_doc(doc)).unwrap() + serde_json::to_string(&self.to_named_doc(doc)).expect("doc encoding failed. This is a bug") } /// Build a document object from a json-object. pub fn parse_document(&self, doc_json: &str) -> Result { - let json_node = try!(Json::from_str(doc_json)); - let some_json_obj = json_node.as_object(); - if !some_json_obj.is_some() { + let json_obj: JsonObject = serde_json::from_str(doc_json).map_err(|_| { let doc_json_sample: String = if doc_json.len() < 20 { String::from(doc_json) @@ -230,9 +226,9 @@ impl Schema { else { format!("{:?}...", &doc_json[0..20]) }; - return Err(DocParsingError::NotJSONObject(doc_json_sample)) - } - let json_obj = some_json_obj.unwrap(); + DocParsingError::NotJSON(doc_json_sample) + })?; + let mut doc = Document::default(); for (field_name, json_value) in json_obj.iter() { match self.get_field(field_name) { @@ -240,7 +236,7 @@ impl Schema { let field_entry = self.get_field_entry(field); let field_type = field_entry.field_type(); match *json_value { - Json::Array(ref json_items) => { + JsonValue::Array(ref json_items) => { for json_item in json_items { let value = try!( field_type @@ -276,30 +272,50 @@ impl fmt::Debug for Schema { } } -impl Decodable for Schema { - fn decode(d: &mut D) -> Result { - let mut schema_builder = SchemaBuilder::default(); - try!(d.read_seq(|d, num_fields| { - for _ in 0..num_fields { - let field_entry = try!(FieldEntry::decode(d)); - schema_builder.add_field(field_entry); - } - Ok(()) - })); - Ok(schema_builder.build()) +impl Serialize for Schema { + fn serialize(&self, serializer: S) -> Result + where S: Serializer + { + let mut seq = serializer.serialize_seq(Some(self.0.fields.len()))?; + for e in &self.0.fields { + seq.serialize_element(e)?; + } + seq.end() } } -impl Encodable for Schema { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - try!(s.emit_seq(self.0.fields.len(), - |mut e| { - for (ord, field) in self.0.fields.iter().enumerate() { - try!(e.emit_seq_elt(ord, |e| field.encode(e))); +impl<'de> Deserialize<'de> for Schema +{ + fn deserialize(deserializer: D) -> Result + where D: Deserializer<'de> + { + struct SchemaVisitor; + + impl<'de> Visitor<'de> for SchemaVisitor + { + type Value = Schema; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("struct Schema") + } + + fn visit_seq(self, mut seq: A) -> Result + where A: SeqAccess<'de> + { + let mut schema = SchemaBuilder { + fields: Vec::with_capacity(seq.size_hint().unwrap_or(0)), + fields_map: HashMap::with_capacity(seq.size_hint().unwrap_or(0)), + }; + + while let Some(value) = seq.next_element()? { + schema.add_field(value); } - Ok(()) - })); - Ok(()) + + Ok(schema.build()) + } + } + + deserializer.deserialize_map(SchemaVisitor) } } @@ -319,28 +335,19 @@ impl From for Schema { #[derive(Debug)] pub enum DocParsingError { /// The payload given is not valid JSON. - NotJSON(json::ParserError), - /// The payload given is not a JSON Object (`{...}`). - NotJSONObject(String), + NotJSON(String), /// One of the value node could not be parsed. ValueError(String, ValueParsingError), /// The json-document contains a field that is not declared in the schema. NoSuchFieldInSchema(String), } -impl From for DocParsingError { - fn from(err: json::ParserError) -> DocParsingError { - DocParsingError::NotJSON(err) - } -} - - #[cfg(test)] mod tests { use schema::*; - use rustc_serialize::json; + use serde_json; use schema::field_type::ValueParsingError; use schema::schema::DocParsingError::NotJSON; @@ -348,11 +355,13 @@ mod tests { pub fn test_schema_serialization() { let mut schema_builder = SchemaBuilder::default(); let count_options = IntOptions::default().set_stored().set_fast(); + let popularity_options = IntOptions::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u64_field("count", count_options); + schema_builder.add_i64_field("popularity", popularity_options); let schema = schema_builder.build(); - let schema_json: String = format!("{}", json::as_pretty_json(&schema)); + let schema_json = serde_json::to_string_pretty(&schema).unwrap(); let expected = r#"[ { "name": "title", @@ -378,10 +387,29 @@ mod tests { "fast": true, "stored": true } + }, + { + "name": "popularity", + "type": "i64", + "options": { + "indexed": false, + "fast": true, + "stored": true + } } ]"#; + println!("{}", schema_json); + println!("{}", expected); assert_eq!(schema_json, expected); + let schema: Schema = serde_json::from_str(expected).unwrap(); + + let mut fields = schema.fields().iter(); + + assert_eq!("title", fields.next().unwrap().name()); + assert_eq!("author", fields.next().unwrap().name()); + assert_eq!("count", fields.next().unwrap().name()); + assert_eq!("popularity", fields.next().unwrap().name()); } @@ -400,6 +428,7 @@ mod tests { "count": 4 }"#; let doc = schema.parse_document(doc_json).unwrap(); + let doc_serdeser = schema.parse_document(&schema.to_json(&doc)).unwrap(); assert_eq!(doc, doc_serdeser); } @@ -408,9 +437,11 @@ mod tests { pub fn test_parse_document() { let mut schema_builder = SchemaBuilder::default(); let count_options = IntOptions::default().set_stored().set_fast(); + let popularity_options = IntOptions::default().set_stored().set_fast(); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); let count_field = schema_builder.add_u64_field("count", count_options); + let popularity_field = schema_builder.add_i64_field("popularity", popularity_options); let schema = schema_builder.build(); { let doc = schema.parse_document("{}").unwrap(); @@ -420,32 +451,20 @@ mod tests { let doc = schema.parse_document(r#"{ "title": "my title", "author": "fulmicoton", - "count": 4 + "count": 4, + "popularity": 10 }"#).unwrap(); assert_eq!(doc.get_first(title_field).unwrap().text(), "my title"); assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton"); assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4); - } - { - let json_err = schema.parse_document(r#"{ - "title": "my title", - "author": "fulmicoton" - "count": 4 - }"#); - match json_err { - Err(DocParsingError::NotJSON(__)) => { - assert!(true); - } - _ => { - assert!(false); - } - } + assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10); } { let json_err = schema.parse_document(r#"{ "title": "my title", "author": "fulmicoton", "count": 4, + "popularity": 10, "jambon": "bayonne" }"#); match json_err { @@ -453,7 +472,7 @@ mod tests { assert_eq!(field_name, "jambon"); } _ => { - assert!(false); + panic!("expected additional field 'jambon' to fail but didn't"); } } } @@ -462,6 +481,7 @@ mod tests { "title": "my title", "author": "fulmicoton", "count": "5", + "popularity": "10", "jambon": "bayonne" }"#); match json_err { @@ -469,7 +489,7 @@ mod tests { assert!(true); } _ => { - assert!(false); + panic!("expected string of 5 to fail but didn't"); } } } @@ -477,26 +497,28 @@ mod tests { let json_err = schema.parse_document(r#"{ "title": "my title", "author": "fulmicoton", - "count": -5 - }"#); - match json_err { - Err(DocParsingError::ValueError(_, ValueParsingError::TypeError(_))) => { - assert!(true); - } - _ => { - assert!(false); - } - } - } - { - let json_err = schema.parse_document(r#"{ - "title": "my title", - "author": "fulmicoton", - "count": 5000000000 + "count": -5, + "popularity": 10 }"#); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { - assert!(false); + assert!(true); + } + _ => { + panic!("expected -5 to fail but didn't"); + } + } + } + { + let json_err = schema.parse_document(r#"{ + "title": "my title", + "author": "fulmicoton", + "count": 9223372036854775808, + "popularity": 10 + }"#); + match json_err { + Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { + panic!("expected 9223372036854775808 to fit into u64, but it didn't"); } _ => { assert!(true); @@ -507,14 +529,30 @@ mod tests { let json_err = schema.parse_document(r#"{ "title": "my title", "author": "fulmicoton", - "count": 50000000000000000000 + "count": 50, + "popularity": 9223372036854775808 + }"#); + match json_err { + Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { + assert!(true); + }, + _ => { + panic!("expected 9223372036854775808 to overflow i64, but it didn't"); + } + } + } + { + let json_err = schema.parse_document(r#"{ + "title": "my title", + "author": "fulmicoton", + "count": 50, }"#); match json_err { Err(NotJSON(_)) => { assert!(true); - } + }, _ => { - assert!(false) + panic!("expected invalid JSON to fail parsing, but it didn't"); } } } diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index c718e2a87..31550cd7f 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -1,12 +1,8 @@ use std::ops::BitOr; -use rustc_serialize::Decodable; -use rustc_serialize::Decoder; -use rustc_serialize::Encodable; -use rustc_serialize::Encoder; /// Define how a text field should be handled by tantivy. -#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)] +#[derive(Clone,Debug,PartialEq,Eq, Serialize, Deserialize)] pub struct TextOptions { indexing: TextIndexingOptions, stored: bool, @@ -51,9 +47,10 @@ impl Default for TextOptions { /// Describe how a field should be indexed -#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash)] +#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash, Serialize, Deserialize)] pub enum TextIndexingOptions { /// Unindexed fields will not generate any postings. They will not be searchable either. + #[serde(rename="unindexed")] Unindexed, /// Untokenized means that the field text will not be split into tokens before being indexed. /// A field with the value "Hello world", will have the document suscribe to one single @@ -61,62 +58,26 @@ pub enum TextIndexingOptions { /// /// It will **not** be searchable if the user enter "hello" for instance. /// This can be useful for tags, or ids for instance. + #[serde(rename="untokenized")] Untokenized, /// TokenizedNoFreq will tokenize the field value, and append the document doc id /// to the posting lists associated to all of the tokens. /// The frequence of appearance of the term in the document however will be lost. /// The term frequency used in the TfIdf formula will always be 1. + #[serde(rename="tokenize")] TokenizedNoFreq, /// TokenizedWithFreq will tokenize the field value, and encode /// both the docid and the term frequency in the posting lists associated to all + #[serde(rename="freq")] // of the tokens. TokenizedWithFreq, /// Like TokenizedWithFreq, but also encodes the positions of the /// terms in a separate file. This option is required for phrase queries. /// Don't use this if you are certain you won't need it, the term positions file can be very big. + #[serde(rename="position")] TokenizedWithFreqAndPosition, } -impl Encodable for TextIndexingOptions { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - let name = match *self { - TextIndexingOptions::Unindexed => { - "unindexed" - } - TextIndexingOptions::Untokenized => { - "untokenized" - } - TextIndexingOptions::TokenizedNoFreq => { - "tokenize" - } - TextIndexingOptions::TokenizedWithFreq => { - "freq" - } - TextIndexingOptions::TokenizedWithFreqAndPosition => { - "position" - } - }; - s.emit_str(name) - } -} - -impl Decodable for TextIndexingOptions { - fn decode(d: &mut D) -> Result { - use self::TextIndexingOptions::*; - let option_name: String = try!(d.read_str()); - Ok(match option_name.as_ref() { - "unindexed" => Unindexed, - "untokenized" => Untokenized, - "tokenize" => TokenizedNoFreq, - "freq" => TokenizedWithFreq, - "position" => TokenizedWithFreqAndPosition, - _ => { - return Err(d.error(&format!("Encoding option {:?} unknown", option_name))); - } - }) - } -} - impl TextIndexingOptions { /// Returns true iff the term frequency will be encoded. diff --git a/src/schema/value.rs b/src/schema/value.rs index 3dc678e04..bf75b9405 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -1,12 +1,10 @@ - -use common::BinarySerializable; -use std::io; -use std::io::Write; -use std::io::Read; +use std::fmt; +use serde::{Serialize, Serializer, Deserialize, Deserializer}; +use serde::de::Visitor; /// Value represents the value of a any field. /// It is an enum over all over all of the possible field type. -#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable)] +#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] pub enum Value { /// The str type is used for any text information. Str(String), @@ -16,6 +14,54 @@ pub enum Value { I64(i64) } +impl Serialize for Value { + fn serialize(&self, serializer: S) -> Result + where S: Serializer + { + match *self { + Value::Str(ref v) => serializer.serialize_str(v), + Value::U64(u) => serializer.serialize_u64(u), + Value::I64(u) => serializer.serialize_i64(u), + } + } +} + +impl<'de> Deserialize<'de> for Value +{ + fn deserialize(deserializer: D) -> Result + where D: Deserializer<'de> + { + struct ValueVisitor; + + impl<'de> Visitor<'de> for ValueVisitor + { + type Value = Value; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string or u32") + } + + fn visit_u64(self, v: u64) -> Result { + Ok(Value::U64(v)) + } + + fn visit_i64(self, v: i64) -> Result { + Ok(Value::I64(v)) + } + + fn visit_str(self, v: &str) -> Result { + Ok(Value::Str(v.to_owned())) + } + + fn visit_string(self, v: String) -> Result { + Ok(Value::Str(v)) + } + } + + deserializer.deserialize_any(ValueVisitor) + } +} + impl Value { /// Returns the text value, provided the value is of the `Str` type. /// @@ -88,48 +134,53 @@ impl<'a> From<&'a str> for Value { } } -const TEXT_CODE: u8 = 0; -const U64_CODE: u8 = 1; -const I64_CODE: u8 = 2; +mod binary_serialize { + use common::BinarySerializable; + use std::io::{self, Read, Write}; + use super::Value; + const TEXT_CODE: u8 = 0; + const U64_CODE: u8 = 1; + const I64_CODE: u8 = 2; -impl BinarySerializable for Value { - fn serialize(&self, writer: &mut Write) -> io::Result { - let mut written_size = 0; - match *self { - Value::Str(ref text) => { - written_size += try!(TEXT_CODE.serialize(writer)); - written_size += try!(text.serialize(writer)); - }, - Value::U64(ref val) => { - written_size += try!(U64_CODE.serialize(writer)); - written_size += try!(val.serialize(writer)); - }, - Value::I64(ref val) => { - written_size += try!(I64_CODE.serialize(writer)); - written_size += try!(val.serialize(writer)); - }, + impl BinarySerializable for Value { + fn serialize(&self, writer: &mut Write) -> io::Result { + let mut written_size = 0; + match *self { + Value::Str(ref text) => { + written_size += try!(TEXT_CODE.serialize(writer)); + written_size += try!(text.serialize(writer)); + }, + Value::U64(ref val) => { + written_size += try!(U64_CODE.serialize(writer)); + written_size += try!(val.serialize(writer)); + }, + Value::I64(ref val) => { + written_size += try!(I64_CODE.serialize(writer)); + written_size += try!(val.serialize(writer)); + }, + } + Ok(written_size) + } + fn deserialize(reader: &mut Read) -> io::Result { + let type_code = try!(u8::deserialize(reader)); + match type_code { + TEXT_CODE => { + let text = try!(String::deserialize(reader)); + Ok(Value::Str(text)) + } + U64_CODE => { + let value = try!(u64::deserialize(reader)); + Ok(Value::U64(value)) + } + I64_CODE => { + let value = try!(i64::deserialize(reader)); + Ok(Value::I64(value)) + } + _ => { + Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code))) + } + } } - Ok(written_size) - } - fn deserialize(reader: &mut Read) -> io::Result { - let type_code = try!(u8::deserialize(reader)); - match type_code { - TEXT_CODE => { - let text = try!(String::deserialize(reader)); - Ok(Value::Str(text)) - } - U64_CODE => { - let value = try!(u64::deserialize(reader)); - Ok(Value::U64(value)) - } - I64_CODE => { - let value = try!(i64::deserialize(reader)); - Ok(Value::I64(value)) - } - _ => { - Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code))) - } - } } }