mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-17 06:22:54 +00:00
Merge branch 'master' into issue/indexing-refactoring
This commit is contained in:
@@ -2,6 +2,7 @@ Tantivy 0.4.0
|
||||
==========================
|
||||
|
||||
- Removed u32 fields. They are replaced by u64 and i64 fields (#65)
|
||||
- Replacing rustc_serialize by serde. Kudos to @KodrAus and @lnicola
|
||||
- QueryParser:
|
||||
- Explicit error returned when searched for a term that is not indexed
|
||||
- Searching for a int term via the query parser was broken `(age:1)`
|
||||
|
||||
@@ -20,18 +20,20 @@ regex = "0.2"
|
||||
fst = "0.1.37"
|
||||
atomicwrites = "0.1.3"
|
||||
tempfile = "2.1"
|
||||
rustc-serialize = "0.3"
|
||||
log = "0.3.6"
|
||||
combine = "2.2"
|
||||
tempdir = "0.3"
|
||||
bincode = "0.5"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
serde_json = "1.0"
|
||||
bincode = "0.7.0-alpha7"
|
||||
libc = {version = "0.2.20", optional=true}
|
||||
num_cpus = "1.2"
|
||||
itertools = "0.5.9"
|
||||
lz4 = "1.20"
|
||||
bit-set = "0.4.0"
|
||||
time = "0.1"
|
||||
uuid = { version = "0.4", features = ["v4", "rustc-serialize"] }
|
||||
uuid = { version = "0.5", features = ["v4", "serde"] }
|
||||
chan = "0.1"
|
||||
version = "2"
|
||||
crossbeam = "0.2"
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
extern crate rustc_serialize;
|
||||
extern crate tantivy;
|
||||
extern crate tempdir;
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ impl<'a> Drop for OpenTimer<'a> {
|
||||
}
|
||||
|
||||
/// Timing recording
|
||||
#[derive(Debug, RustcEncodable)]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct Timing {
|
||||
name: &'static str,
|
||||
duration: i64,
|
||||
@@ -41,7 +41,7 @@ pub struct Timing {
|
||||
}
|
||||
|
||||
/// Timer tree
|
||||
#[derive(Debug, RustcEncodable)]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct TimerTree {
|
||||
timings: Vec<Timing>,
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use Result;
|
||||
use Error;
|
||||
use serde_json;
|
||||
use schema::Schema;
|
||||
use std::sync::Arc;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::fmt;
|
||||
use rustc_serialize::json;
|
||||
use core::SegmentId;
|
||||
use directory::{Directory, MmapDirectory, RAMDirectory};
|
||||
use indexer::index_writer::open_index_writer;
|
||||
@@ -29,7 +29,7 @@ const NUM_SEARCHERS: usize = 12;
|
||||
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
|
||||
let meta_data = directory.atomic_read(&META_FILEPATH)?;
|
||||
let meta_string = String::from_utf8_lossy(&meta_data);
|
||||
json::decode(&meta_string)
|
||||
serde_json::from_str(&meta_string)
|
||||
.map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e)))
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use core::SegmentMeta;
|
||||
/// * the index docstamp
|
||||
/// * the schema
|
||||
///
|
||||
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
|
||||
#[derive(Clone,Debug,Serialize, Deserialize)]
|
||||
pub struct IndexMeta {
|
||||
pub segments: Vec<SegmentMeta>,
|
||||
pub schema: Schema,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use uuid::Uuid;
|
||||
use std::fmt;
|
||||
use rustc_serialize::{Encoder, Decoder, Encodable, Decodable};
|
||||
use std::cmp::{Ordering, Ord};
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -14,7 +13,7 @@ use std::sync::atomic;
|
||||
///
|
||||
/// In unit test, for reproducability, the SegmentId are
|
||||
/// simply generated in an autoincrement fashion.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct SegmentId(Uuid);
|
||||
|
||||
|
||||
@@ -65,18 +64,6 @@ impl SegmentId {
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodable for SegmentId {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
self.0.encode(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for SegmentId {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
|
||||
Uuid::decode(d).map(SegmentId)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for SegmentId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Seg({:?})", self.short_uuid_string())
|
||||
|
||||
@@ -3,7 +3,7 @@ use super::SegmentComponent;
|
||||
use std::path::PathBuf;
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct DeleteMeta {
|
||||
num_deleted_docs: u32,
|
||||
opstamp: u64,
|
||||
@@ -13,7 +13,7 @@ struct DeleteMeta {
|
||||
///
|
||||
/// For instance the number of docs it contains,
|
||||
/// how many are deleted, etc.
|
||||
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SegmentMeta {
|
||||
segment_id: SegmentId,
|
||||
max_doc: u32,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use serde_json;
|
||||
use directory::error::{OpenReadError, DeleteError, OpenWriteError};
|
||||
use directory::{ReadOnlySource, WritePtr};
|
||||
use std::result;
|
||||
@@ -7,7 +8,6 @@ use Directory;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
use rustc_serialize::json;
|
||||
use core::MANAGED_FILEPATH;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
@@ -74,7 +74,7 @@ impl ManagedDirectory {
|
||||
match directory.atomic_read(&MANAGED_FILEPATH) {
|
||||
Ok(data) => {
|
||||
let managed_files_json = String::from_utf8_lossy(&data);
|
||||
let managed_files: HashSet<PathBuf> = json::decode(&managed_files_json)
|
||||
let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
|
||||
.map_err(|e| Error::CorruptedFile(MANAGED_FILEPATH.clone(), Box::new(e)))?;
|
||||
Ok(ManagedDirectory {
|
||||
directory: box directory,
|
||||
@@ -204,8 +204,8 @@ impl ManagedDirectory {
|
||||
.expect("Managed file lock poisoned");
|
||||
managed_paths = meta_informations_rlock.managed_paths.clone();
|
||||
}
|
||||
let mut w = vec!();
|
||||
try!(write!(&mut w, "{}\n", json::as_pretty_json(&managed_paths)));
|
||||
let mut w = try!(serde_json::to_vec(&managed_paths));
|
||||
try!(write!(&mut w, "\n"));
|
||||
self.directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ fn open_mmap(full_path: &PathBuf) -> result::Result<Option<Arc<Mmap>>, OpenReadE
|
||||
|
||||
}
|
||||
|
||||
#[derive(Default,Clone,Debug,RustcDecodable,RustcEncodable)]
|
||||
#[derive(Default,Clone,Debug,Serialize,Deserialize)]
|
||||
pub struct CacheCounters {
|
||||
// Number of time the cache prevents to call `mmap`
|
||||
pub hit: usize,
|
||||
@@ -65,7 +65,7 @@ pub struct CacheCounters {
|
||||
pub miss_weak: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
|
||||
#[derive(Clone,Debug,Serialize,Deserialize)]
|
||||
pub struct CacheInfo {
|
||||
pub counters: CacheCounters,
|
||||
pub mmapped: Vec<PathBuf>,
|
||||
|
||||
@@ -11,7 +11,7 @@ use directory::error::{OpenReadError, OpenWriteError, OpenDirectoryError};
|
||||
use query;
|
||||
use schema;
|
||||
use fastfield::FastFieldNotAvailableError;
|
||||
|
||||
use serde_json;
|
||||
|
||||
|
||||
/// Generic tantivy error.
|
||||
@@ -101,3 +101,9 @@ impl From<OpenDirectoryError> for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(error: serde_json::Error) -> Error {
|
||||
Error::IOError(error.into())
|
||||
}
|
||||
}
|
||||
@@ -440,6 +440,8 @@ mod tests {
|
||||
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
|
||||
let empty_vec = Vec::<u64>::new();
|
||||
|
||||
{ // a first commit
|
||||
index_writer.add_document(
|
||||
@@ -502,11 +504,11 @@ mod tests {
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
|
||||
assert_eq!(searcher.segment_readers()[1].num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[1].max_doc(), 4);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
|
||||
@@ -529,11 +531,11 @@ mod tests {
|
||||
assert_eq!(searcher.num_docs(), 3);
|
||||
assert_eq!(searcher.segment_readers()[0].num_docs(), 3);
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!(3));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!(3));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
|
||||
@@ -551,11 +553,11 @@ mod tests {
|
||||
assert_eq!(searcher.num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
|
||||
@@ -574,11 +576,11 @@ mod tests {
|
||||
assert_eq!(searcher.num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 2);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), empty_vec);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
let score_field_reader: U64FastFieldReader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
|
||||
|
||||
@@ -23,7 +23,7 @@ use indexer::SegmentEntry;
|
||||
use indexer::SegmentSerializer;
|
||||
use Result;
|
||||
use futures_cpupool::CpuFuture;
|
||||
use rustc_serialize::json;
|
||||
use serde_json;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use schema::Schema;
|
||||
use std::borrow::BorrowMut;
|
||||
@@ -77,10 +77,10 @@ pub fn save_metas(segment_metas: Vec<SegmentMeta>,
|
||||
schema: schema,
|
||||
opstamp: opstamp,
|
||||
};
|
||||
let mut w = vec!();
|
||||
try!(write!(&mut w, "{}\n", json::as_pretty_json(&metas)));
|
||||
let mut w = try!(serde_json::to_vec(&metas));
|
||||
try!(write!(&mut w, "\n"));
|
||||
let res = directory.atomic_write(&META_FILEPATH, &w[..])?;
|
||||
debug!("Saved metas {}", json::as_pretty_json(&metas));
|
||||
debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
|
||||
Ok(res)
|
||||
|
||||
}
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
@@ -35,10 +38,11 @@ extern crate byteorder;
|
||||
extern crate memmap;
|
||||
extern crate regex;
|
||||
extern crate tempfile;
|
||||
extern crate rustc_serialize;
|
||||
extern crate atomicwrites;
|
||||
extern crate tempdir;
|
||||
extern crate serde;
|
||||
extern crate bincode;
|
||||
extern crate serde_json;
|
||||
extern crate time;
|
||||
extern crate lz4;
|
||||
extern crate uuid;
|
||||
|
||||
@@ -102,7 +102,7 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let boolean_query = BooleanQuery::from(vec![(Occur::MustNot, make_term_query("d")),]);
|
||||
assert_eq!(matching_docs(&boolean_query), Vec::new());
|
||||
assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,11 +60,14 @@ mod tests {
|
||||
searcher.search(&phrase_query, &mut test_collector).expect("search should succeed");
|
||||
test_collector.docs()
|
||||
};
|
||||
|
||||
let empty_vec = Vec::<u32>::new();
|
||||
|
||||
assert_eq!(test_query(vec!("a", "b", "c")), vec!(2, 4));
|
||||
assert_eq!(test_query(vec!("a", "b")), vec!(1, 2, 3, 4));
|
||||
assert_eq!(test_query(vec!("b", "b")), vec!(0, 1));
|
||||
assert_eq!(test_query(vec!("g", "ewrwer")), vec!());
|
||||
assert_eq!(test_query(vec!("g", "a")), vec!());
|
||||
assert_eq!(test_query(vec!("g", "ewrwer")), empty_vec);
|
||||
assert_eq!(test_query(vec!("g", "a")), empty_vec);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use itertools::Itertools;
|
||||
|
||||
/// Documents are really just a list of couple `(field, value)`.
|
||||
/// In this list, one field may appear more than once.
|
||||
#[derive(Debug, RustcEncodable, RustcDecodable, Default)]
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub struct Document {
|
||||
field_values: Vec<FieldValue>,
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ use common::BinarySerializable;
|
||||
///
|
||||
/// Because the field id is a `u8`, tantivy can only have at most `255` fields.
|
||||
/// Value 255 is reserved.
|
||||
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, Serialize, Deserialize)]
|
||||
pub struct Field(pub u32);
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use schema::TextOptions;
|
||||
use schema::IntOptions;
|
||||
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
use std::fmt;
|
||||
use serde::{Serialize, Deserialize, Serializer, Deserializer};
|
||||
use serde::ser::SerializeStruct;
|
||||
use serde::de::{self, Visitor, MapAccess};
|
||||
use schema::FieldType;
|
||||
|
||||
/// A `FieldEntry` represents a field and its configuration.
|
||||
@@ -94,75 +94,99 @@ impl FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for FieldEntry {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
let mut s = serializer.serialize_struct("field_entry", 3)?;
|
||||
s.serialize_field("name", &self.name)?;
|
||||
|
||||
|
||||
impl Encodable for FieldEntry {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("field_entry", 3, |s| {
|
||||
try!(s.emit_struct_field("name", 0, |s| {
|
||||
self.name.encode(s)
|
||||
}));
|
||||
match self.field_type {
|
||||
FieldType::Str(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("text")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
FieldType::U64(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("u64")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
FieldType::I64(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("i64")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
match self.field_type {
|
||||
FieldType::Str(ref options) => {
|
||||
s.serialize_field("type", "text")?;
|
||||
s.serialize_field("options", options)?;
|
||||
},
|
||||
FieldType::U64(ref options) => {
|
||||
s.serialize_field("type", "u64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
},
|
||||
FieldType::I64(ref options) => {
|
||||
s.serialize_field("type", "i64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
s.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for FieldEntry {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
|
||||
d.read_struct("field_entry", 3, |d| {
|
||||
let name = try!(d.read_struct_field("name", 0, |d| {
|
||||
d.read_str()
|
||||
}));
|
||||
let field_type: String = try!(d.read_struct_field("type", 1, |d| {
|
||||
d.read_str()
|
||||
}));
|
||||
d.read_struct_field("options", 2, |d| {
|
||||
match field_type.as_ref() {
|
||||
"u64" => {
|
||||
let int_options = try!(IntOptions::decode(d));
|
||||
Ok(FieldEntry::new_u64(name, int_options))
|
||||
}
|
||||
"i64" => {
|
||||
let int_options = try!(IntOptions::decode(d));
|
||||
Ok(FieldEntry::new_i64(name, int_options))
|
||||
}
|
||||
"text" => {
|
||||
let text_options = try!(TextOptions::decode(d));
|
||||
Ok(FieldEntry::new_text(name, text_options))
|
||||
}
|
||||
_ => {
|
||||
Err(d.error(&format!("Field type {:?} unknown", field_type)))
|
||||
impl<'de> Deserialize<'de> for FieldEntry {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
#[serde(field_identifier, rename_all = "lowercase")]
|
||||
enum Field { Name, Type, Options };
|
||||
|
||||
const FIELDS: &'static [&'static str] = &["name", "type", "options"];
|
||||
|
||||
struct FieldEntryVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for FieldEntryVisitor {
|
||||
type Value = FieldEntry;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("struct FieldEntry")
|
||||
}
|
||||
|
||||
fn visit_map<V>(self, mut map: V) -> Result<FieldEntry, V::Error>
|
||||
where V: MapAccess<'de>
|
||||
{
|
||||
let mut name = None;
|
||||
let mut ty = None;
|
||||
let mut field_type = None;
|
||||
while let Some(key) = map.next_key()? {
|
||||
match key {
|
||||
Field::Name => {
|
||||
if name.is_some() {
|
||||
return Err(de::Error::duplicate_field("name"));
|
||||
}
|
||||
name = Some(map.next_value()?);
|
||||
}
|
||||
Field::Type => {
|
||||
if ty.is_some() {
|
||||
return Err(de::Error::duplicate_field("type"));
|
||||
}
|
||||
ty = Some(map.next_value()?);
|
||||
}
|
||||
Field::Options => {
|
||||
match ty {
|
||||
None => return Err(de::Error::custom("The `type` field must be specified before `options`")),
|
||||
Some(ty) => {
|
||||
match ty {
|
||||
"text" => field_type = Some(FieldType::Str(map.next_value()?)),
|
||||
"u64" => field_type = Some(FieldType::U64(map.next_value()?)),
|
||||
"i64" => field_type = Some(FieldType::I64(map.next_value()?)),
|
||||
_ => return Err(de::Error::custom(format!("Unrecognised type {}", ty)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
let name = name.ok_or_else(|| de::Error::missing_field("name"))?;
|
||||
ty.ok_or_else(|| de::Error::missing_field("ty"))?;
|
||||
let field_type = field_type.ok_or_else(|| de::Error::missing_field("options"))?;
|
||||
|
||||
Ok(FieldEntry {
|
||||
name: name,
|
||||
field_type: field_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_struct("field_entry", FIELDS, FieldEntryVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,18 +196,31 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use schema::TEXT;
|
||||
use rustc_serialize::json;
|
||||
use serde_json;
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization() {
|
||||
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
|
||||
assert_eq!(format!("{}", json::as_pretty_json(&field_value)), r#"{
|
||||
|
||||
let expected = r#"{
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"options": {
|
||||
"indexing": "position",
|
||||
"stored": false
|
||||
}
|
||||
}"#);
|
||||
}"#;
|
||||
let field_value_json = serde_json::to_string_pretty(&field_value).unwrap();
|
||||
|
||||
assert_eq!(expected, &field_value_json);
|
||||
|
||||
let field_value: FieldEntry = serde_json::from_str(expected).unwrap();
|
||||
|
||||
assert_eq!("title", field_value.name);
|
||||
|
||||
match field_value.field_type {
|
||||
FieldType::Str(_) => assert!(true),
|
||||
_ => panic!("expected FieldType::Str")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use schema::TextOptions;
|
||||
use schema::IntOptions;
|
||||
use schema::{TextOptions, IntOptions};
|
||||
|
||||
use rustc_serialize::json::Json;
|
||||
use serde_json::Value as JsonValue;
|
||||
use schema::Value;
|
||||
|
||||
|
||||
@@ -19,7 +18,7 @@ pub enum ValueParsingError {
|
||||
|
||||
/// A `FieldType` describes the type (text, u64) of a field as well as
|
||||
/// how it should be handled by tantivy.
|
||||
#[derive(Clone, Debug, RustcDecodable, RustcEncodable)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum FieldType {
|
||||
/// String field type configuration
|
||||
Str(TextOptions),
|
||||
@@ -30,7 +29,7 @@ pub enum FieldType {
|
||||
}
|
||||
|
||||
impl FieldType {
|
||||
|
||||
|
||||
/// returns true iff the field is indexed.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match self {
|
||||
@@ -51,9 +50,9 @@ impl FieldType {
|
||||
/// Tantivy will not try to cast values.
|
||||
/// For instance, If the json value is the integer `3` and the
|
||||
/// target field is a `Str`, this method will return an Error.
|
||||
pub fn value_from_json(&self, json: &Json) -> Result<Value, ValueParsingError> {
|
||||
pub fn value_from_json(&self, json: &JsonValue) -> Result<Value, ValueParsingError> {
|
||||
match *json {
|
||||
Json::String(ref field_text) => {
|
||||
JsonValue::String(ref field_text) => {
|
||||
match *self {
|
||||
FieldType::Str(_) => {
|
||||
Ok(Value::Str(field_text.clone()))
|
||||
@@ -63,31 +62,23 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
}
|
||||
Json::U64(ref field_val_u64) => {
|
||||
JsonValue::Number(ref field_val_num) => {
|
||||
match *self {
|
||||
FieldType::I64(_) => {
|
||||
if *field_val_u64 > (i64::max_value() as u64) {
|
||||
Err(ValueParsingError::OverflowError(format!("Value {:?} is too high for a i64.", field_val_u64)))
|
||||
if let Some(field_val_i64) = field_val_num.as_i64() {
|
||||
Ok(Value::I64(field_val_i64))
|
||||
}
|
||||
else {
|
||||
Ok(Value::I64(*field_val_u64 as i64))
|
||||
Err(ValueParsingError::OverflowError(format!("Expected an i64 int, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
FieldType::U64(_) => {
|
||||
Ok(Value::U64(*field_val_u64))
|
||||
}
|
||||
_ => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
},
|
||||
Json::I64(ref field_val_i64) => {
|
||||
match *self {
|
||||
FieldType::I64(_) => {
|
||||
Ok(Value::I64(* field_val_i64))
|
||||
}
|
||||
FieldType::U64(_) => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a positive integer, got {:?}", json)))
|
||||
if let Some(field_val_u64) = field_val_num.as_u64() {
|
||||
Ok(Value::U64(field_val_u64))
|
||||
}
|
||||
else {
|
||||
Err(ValueParsingError::OverflowError(format!("Expected an u64 int, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
FieldType::Str(_) => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))
|
||||
|
||||
@@ -7,7 +7,7 @@ use schema::Value;
|
||||
|
||||
|
||||
/// `FieldValue` holds together a `Field` and its `Value`.
|
||||
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct FieldValue {
|
||||
field: Field,
|
||||
value: Value,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::ops::BitOr;
|
||||
|
||||
/// Define how a u64 field should be handled by tantivy.
|
||||
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
|
||||
/// Define how an int field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct IntOptions {
|
||||
indexed: bool,
|
||||
fast: bool,
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use schema::Value;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
|
||||
|
||||
@@ -11,36 +9,5 @@ use rustc_serialize::Encoder;
|
||||
/// A `NamedFieldDocument` is a simple representation of a document
|
||||
/// as a `BTreeMap<String, Vec<Value>>`.
|
||||
///
|
||||
#[derive(Serialize)]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
||||
|
||||
|
||||
impl Encodable for NamedFieldDocument {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("named_field_document", self.0.len(), |s| {
|
||||
for (i, (name, vals)) in self.0.iter().enumerate() {
|
||||
s.emit_struct_field(name, i, |s| {
|
||||
for (j, val) in vals.iter().enumerate() {
|
||||
s.emit_seq(vals.len(), |s| {
|
||||
s.emit_seq_elt(j, |s| {
|
||||
match *val {
|
||||
Value::Str(ref text) => {
|
||||
s.emit_str(text)
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
s.emit_u64(*val)
|
||||
}
|
||||
Value::I64(ref val) => {
|
||||
s.emit_i64(*val)
|
||||
}
|
||||
}
|
||||
})
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encoder;
|
||||
use rustc_serialize::json;
|
||||
use rustc_serialize::json::Json;
|
||||
use std::collections::BTreeMap;
|
||||
use schema::field_type::ValueParsingError;
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde_json::{self, Value as JsonValue, Map as JsonObject};
|
||||
use serde::{Serialize, Serializer, Deserialize, Deserializer};
|
||||
use serde::ser::SerializeSeq;
|
||||
use serde::de::{Visitor, SeqAccess};
|
||||
use super::*;
|
||||
use std::fmt;
|
||||
|
||||
@@ -215,14 +213,12 @@ impl Schema {
|
||||
///
|
||||
/// Encoding a document cannot fail.
|
||||
pub fn to_json(&self, doc: &Document) -> String {
|
||||
json::encode(&self.to_named_doc(doc)).unwrap()
|
||||
serde_json::to_string(&self.to_named_doc(doc)).expect("doc encoding failed. This is a bug")
|
||||
}
|
||||
|
||||
/// Build a document object from a json-object.
|
||||
pub fn parse_document(&self, doc_json: &str) -> Result<Document, DocParsingError> {
|
||||
let json_node = try!(Json::from_str(doc_json));
|
||||
let some_json_obj = json_node.as_object();
|
||||
if !some_json_obj.is_some() {
|
||||
let json_obj: JsonObject<String, JsonValue> = serde_json::from_str(doc_json).map_err(|_| {
|
||||
let doc_json_sample: String =
|
||||
if doc_json.len() < 20 {
|
||||
String::from(doc_json)
|
||||
@@ -230,9 +226,9 @@ impl Schema {
|
||||
else {
|
||||
format!("{:?}...", &doc_json[0..20])
|
||||
};
|
||||
return Err(DocParsingError::NotJSONObject(doc_json_sample))
|
||||
}
|
||||
let json_obj = some_json_obj.unwrap();
|
||||
DocParsingError::NotJSON(doc_json_sample)
|
||||
})?;
|
||||
|
||||
let mut doc = Document::default();
|
||||
for (field_name, json_value) in json_obj.iter() {
|
||||
match self.get_field(field_name) {
|
||||
@@ -240,7 +236,7 @@ impl Schema {
|
||||
let field_entry = self.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
match *json_value {
|
||||
Json::Array(ref json_items) => {
|
||||
JsonValue::Array(ref json_items) => {
|
||||
for json_item in json_items {
|
||||
let value = try!(
|
||||
field_type
|
||||
@@ -276,30 +272,50 @@ impl fmt::Debug for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for Schema {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
try!(d.read_seq(|d, num_fields| {
|
||||
for _ in 0..num_fields {
|
||||
let field_entry = try!(FieldEntry::decode(d));
|
||||
schema_builder.add_field(field_entry);
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(schema_builder.build())
|
||||
impl Serialize for Schema {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
let mut seq = serializer.serialize_seq(Some(self.0.fields.len()))?;
|
||||
for e in &self.0.fields {
|
||||
seq.serialize_element(e)?;
|
||||
}
|
||||
seq.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodable for Schema {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
try!(s.emit_seq(self.0.fields.len(),
|
||||
|mut e| {
|
||||
for (ord, field) in self.0.fields.iter().enumerate() {
|
||||
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
|
||||
impl<'de> Deserialize<'de> for Schema
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
struct SchemaVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for SchemaVisitor
|
||||
{
|
||||
type Value = Schema;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("struct Schema")
|
||||
}
|
||||
|
||||
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
|
||||
where A: SeqAccess<'de>
|
||||
{
|
||||
let mut schema = SchemaBuilder {
|
||||
fields: Vec::with_capacity(seq.size_hint().unwrap_or(0)),
|
||||
fields_map: HashMap::with_capacity(seq.size_hint().unwrap_or(0)),
|
||||
};
|
||||
|
||||
while let Some(value) = seq.next_element()? {
|
||||
schema.add_field(value);
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(())
|
||||
|
||||
Ok(schema.build())
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_map(SchemaVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,28 +335,19 @@ impl From<SchemaBuilder> for Schema {
|
||||
#[derive(Debug)]
|
||||
pub enum DocParsingError {
|
||||
/// The payload given is not valid JSON.
|
||||
NotJSON(json::ParserError),
|
||||
/// The payload given is not a JSON Object (`{...}`).
|
||||
NotJSONObject(String),
|
||||
NotJSON(String),
|
||||
/// One of the value node could not be parsed.
|
||||
ValueError(String, ValueParsingError),
|
||||
/// The json-document contains a field that is not declared in the schema.
|
||||
NoSuchFieldInSchema(String),
|
||||
}
|
||||
|
||||
impl From<json::ParserError> for DocParsingError {
|
||||
fn from(err: json::ParserError) -> DocParsingError {
|
||||
DocParsingError::NotJSON(err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use schema::*;
|
||||
use rustc_serialize::json;
|
||||
use serde_json;
|
||||
use schema::field_type::ValueParsingError;
|
||||
use schema::schema::DocParsingError::NotJSON;
|
||||
|
||||
@@ -348,11 +355,13 @@ mod tests {
|
||||
pub fn test_schema_serialization() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let count_options = IntOptions::default().set_stored().set_fast();
|
||||
let popularity_options = IntOptions::default().set_stored().set_fast();
|
||||
schema_builder.add_text_field("title", TEXT);
|
||||
schema_builder.add_text_field("author", STRING);
|
||||
schema_builder.add_u64_field("count", count_options);
|
||||
schema_builder.add_i64_field("popularity", popularity_options);
|
||||
let schema = schema_builder.build();
|
||||
let schema_json: String = format!("{}", json::as_pretty_json(&schema));
|
||||
let schema_json = serde_json::to_string_pretty(&schema).unwrap();
|
||||
let expected = r#"[
|
||||
{
|
||||
"name": "title",
|
||||
@@ -378,10 +387,29 @@ mod tests {
|
||||
"fast": true,
|
||||
"stored": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "popularity",
|
||||
"type": "i64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"fast": true,
|
||||
"stored": true
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
println!("{}", schema_json);
|
||||
println!("{}", expected);
|
||||
assert_eq!(schema_json, expected);
|
||||
|
||||
let schema: Schema = serde_json::from_str(expected).unwrap();
|
||||
|
||||
let mut fields = schema.fields().iter();
|
||||
|
||||
assert_eq!("title", fields.next().unwrap().name());
|
||||
assert_eq!("author", fields.next().unwrap().name());
|
||||
assert_eq!("count", fields.next().unwrap().name());
|
||||
assert_eq!("popularity", fields.next().unwrap().name());
|
||||
}
|
||||
|
||||
|
||||
@@ -400,6 +428,7 @@ mod tests {
|
||||
"count": 4
|
||||
}"#;
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
|
||||
let doc_serdeser = schema.parse_document(&schema.to_json(&doc)).unwrap();
|
||||
assert_eq!(doc, doc_serdeser);
|
||||
}
|
||||
@@ -408,9 +437,11 @@ mod tests {
|
||||
pub fn test_parse_document() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let count_options = IntOptions::default().set_stored().set_fast();
|
||||
let popularity_options = IntOptions::default().set_stored().set_fast();
|
||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||
let author_field = schema_builder.add_text_field("author", STRING);
|
||||
let count_field = schema_builder.add_u64_field("count", count_options);
|
||||
let popularity_field = schema_builder.add_i64_field("popularity", popularity_options);
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let doc = schema.parse_document("{}").unwrap();
|
||||
@@ -420,32 +451,20 @@ mod tests {
|
||||
let doc = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 4
|
||||
"count": 4,
|
||||
"popularity": 10
|
||||
}"#).unwrap();
|
||||
assert_eq!(doc.get_first(title_field).unwrap().text(), "my title");
|
||||
assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton");
|
||||
assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4);
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton"
|
||||
"count": 4
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::NotJSON(__)) => {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10);
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 4,
|
||||
"popularity": 10,
|
||||
"jambon": "bayonne"
|
||||
}"#);
|
||||
match json_err {
|
||||
@@ -453,7 +472,7 @@ mod tests {
|
||||
assert_eq!(field_name, "jambon");
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
panic!("expected additional field 'jambon' to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -462,6 +481,7 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": "5",
|
||||
"popularity": "10",
|
||||
"jambon": "bayonne"
|
||||
}"#);
|
||||
match json_err {
|
||||
@@ -469,7 +489,7 @@ mod tests {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
panic!("expected string of 5 to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -477,26 +497,28 @@ mod tests {
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": -5
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::TypeError(_))) => {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 5000000000
|
||||
"count": -5,
|
||||
"popularity": 10
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
assert!(false);
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
panic!("expected -5 to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 9223372036854775808,
|
||||
"popularity": 10
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
panic!("expected 9223372036854775808 to fit into u64, but it didn't");
|
||||
}
|
||||
_ => {
|
||||
assert!(true);
|
||||
@@ -507,14 +529,30 @@ mod tests {
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 50000000000000000000
|
||||
"count": 50,
|
||||
"popularity": 9223372036854775808
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
assert!(true);
|
||||
},
|
||||
_ => {
|
||||
panic!("expected 9223372036854775808 to overflow i64, but it didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 50,
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(NotJSON(_)) => {
|
||||
assert!(true);
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
assert!(false)
|
||||
panic!("expected invalid JSON to fail parsing, but it didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
use std::ops::BitOr;
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
|
||||
/// Define how a text field should be handled by tantivy.
|
||||
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
|
||||
#[derive(Clone,Debug,PartialEq,Eq, Serialize, Deserialize)]
|
||||
pub struct TextOptions {
|
||||
indexing: TextIndexingOptions,
|
||||
stored: bool,
|
||||
@@ -51,9 +47,10 @@ impl Default for TextOptions {
|
||||
|
||||
|
||||
/// Describe how a field should be indexed
|
||||
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash, Serialize, Deserialize)]
|
||||
pub enum TextIndexingOptions {
|
||||
/// Unindexed fields will not generate any postings. They will not be searchable either.
|
||||
#[serde(rename="unindexed")]
|
||||
Unindexed,
|
||||
/// Untokenized means that the field text will not be split into tokens before being indexed.
|
||||
/// A field with the value "Hello world", will have the document suscribe to one single
|
||||
@@ -61,62 +58,26 @@ pub enum TextIndexingOptions {
|
||||
///
|
||||
/// It will **not** be searchable if the user enter "hello" for instance.
|
||||
/// This can be useful for tags, or ids for instance.
|
||||
#[serde(rename="untokenized")]
|
||||
Untokenized,
|
||||
/// TokenizedNoFreq will tokenize the field value, and append the document doc id
|
||||
/// to the posting lists associated to all of the tokens.
|
||||
/// The frequence of appearance of the term in the document however will be lost.
|
||||
/// The term frequency used in the TfIdf formula will always be 1.
|
||||
#[serde(rename="tokenize")]
|
||||
TokenizedNoFreq,
|
||||
/// TokenizedWithFreq will tokenize the field value, and encode
|
||||
/// both the docid and the term frequency in the posting lists associated to all
|
||||
#[serde(rename="freq")]
|
||||
// of the tokens.
|
||||
TokenizedWithFreq,
|
||||
/// Like TokenizedWithFreq, but also encodes the positions of the
|
||||
/// terms in a separate file. This option is required for phrase queries.
|
||||
/// Don't use this if you are certain you won't need it, the term positions file can be very big.
|
||||
#[serde(rename="position")]
|
||||
TokenizedWithFreqAndPosition,
|
||||
}
|
||||
|
||||
impl Encodable for TextIndexingOptions {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
let name = match *self {
|
||||
TextIndexingOptions::Unindexed => {
|
||||
"unindexed"
|
||||
}
|
||||
TextIndexingOptions::Untokenized => {
|
||||
"untokenized"
|
||||
}
|
||||
TextIndexingOptions::TokenizedNoFreq => {
|
||||
"tokenize"
|
||||
}
|
||||
TextIndexingOptions::TokenizedWithFreq => {
|
||||
"freq"
|
||||
}
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition => {
|
||||
"position"
|
||||
}
|
||||
};
|
||||
s.emit_str(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for TextIndexingOptions {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
|
||||
use self::TextIndexingOptions::*;
|
||||
let option_name: String = try!(d.read_str());
|
||||
Ok(match option_name.as_ref() {
|
||||
"unindexed" => Unindexed,
|
||||
"untokenized" => Untokenized,
|
||||
"tokenize" => TokenizedNoFreq,
|
||||
"freq" => TokenizedWithFreq,
|
||||
"position" => TokenizedWithFreqAndPosition,
|
||||
_ => {
|
||||
return Err(d.error(&format!("Encoding option {:?} unknown", option_name)));
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TextIndexingOptions {
|
||||
|
||||
/// Returns true iff the term frequency will be encoded.
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
|
||||
use common::BinarySerializable;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use std::fmt;
|
||||
use serde::{Serialize, Serializer, Deserialize, Deserializer};
|
||||
use serde::de::Visitor;
|
||||
|
||||
/// Value represents the value of a any field.
|
||||
/// It is an enum over all over all of the possible field type.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub enum Value {
|
||||
/// The str type is used for any text information.
|
||||
Str(String),
|
||||
@@ -16,6 +14,54 @@ pub enum Value {
|
||||
I64(i64)
|
||||
}
|
||||
|
||||
impl Serialize for Value {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
match *self {
|
||||
Value::Str(ref v) => serializer.serialize_str(v),
|
||||
Value::U64(u) => serializer.serialize_u64(u),
|
||||
Value::I64(u) => serializer.serialize_i64(u),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Value
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
struct ValueVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for ValueVisitor
|
||||
{
|
||||
type Value = Value;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a string or u32")
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> {
|
||||
Ok(Value::U64(v))
|
||||
}
|
||||
|
||||
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> {
|
||||
Ok(Value::I64(v))
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v.to_owned()))
|
||||
}
|
||||
|
||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(ValueVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Returns the text value, provided the value is of the `Str` type.
|
||||
///
|
||||
@@ -88,48 +134,53 @@ impl<'a> From<&'a str> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U64_CODE: u8 = 1;
|
||||
const I64_CODE: u8 = 2;
|
||||
mod binary_serialize {
|
||||
use common::BinarySerializable;
|
||||
use std::io::{self, Read, Write};
|
||||
use super::Value;
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U64_CODE: u8 = 1;
|
||||
const I64_CODE: u8 = 2;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
written_size += try!(U64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
Value::I64(ref val) => {
|
||||
written_size += try!(I64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
written_size += try!(U64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
Value::I64(ref val) => {
|
||||
written_size += try!(I64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(Value::Str(text))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = try!(u64::deserialize(reader));
|
||||
Ok(Value::U64(value))
|
||||
}
|
||||
I64_CODE => {
|
||||
let value = try!(i64::deserialize(reader));
|
||||
Ok(Value::I64(value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(Value::Str(text))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = try!(u64::deserialize(reader));
|
||||
Ok(Value::U64(value))
|
||||
}
|
||||
I64_CODE => {
|
||||
let value = try!(i64::deserialize(reader));
|
||||
Ok(Value::I64(value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user