mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 23:50:41 +00:00
Merge branch 'master' into issue/indexing-refactoring
This commit is contained in:
@@ -11,7 +11,7 @@ use itertools::Itertools;
|
||||
|
||||
/// Documents are really just a list of couple `(field, value)`.
|
||||
/// In this list, one field may appear more than once.
|
||||
#[derive(Debug, RustcEncodable, RustcDecodable, Default)]
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub struct Document {
|
||||
field_values: Vec<FieldValue>,
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ use common::BinarySerializable;
|
||||
///
|
||||
/// Because the field id is a `u8`, tantivy can only have at most `255` fields.
|
||||
/// Value 255 is reserved.
|
||||
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, Serialize, Deserialize)]
|
||||
pub struct Field(pub u32);
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use schema::TextOptions;
|
||||
use schema::IntOptions;
|
||||
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
use std::fmt;
|
||||
use serde::{Serialize, Deserialize, Serializer, Deserializer};
|
||||
use serde::ser::SerializeStruct;
|
||||
use serde::de::{self, Visitor, MapAccess};
|
||||
use schema::FieldType;
|
||||
|
||||
/// A `FieldEntry` represents a field and its configuration.
|
||||
@@ -94,75 +94,99 @@ impl FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for FieldEntry {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
let mut s = serializer.serialize_struct("field_entry", 3)?;
|
||||
s.serialize_field("name", &self.name)?;
|
||||
|
||||
|
||||
impl Encodable for FieldEntry {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("field_entry", 3, |s| {
|
||||
try!(s.emit_struct_field("name", 0, |s| {
|
||||
self.name.encode(s)
|
||||
}));
|
||||
match self.field_type {
|
||||
FieldType::Str(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("text")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
FieldType::U64(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("u64")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
FieldType::I64(ref options) => {
|
||||
s.emit_struct_field("type", 1, |s| {
|
||||
s.emit_str("i64")
|
||||
})?;
|
||||
s.emit_struct_field("options", 2, |s| {
|
||||
options.encode(s)
|
||||
})?;
|
||||
}
|
||||
match self.field_type {
|
||||
FieldType::Str(ref options) => {
|
||||
s.serialize_field("type", "text")?;
|
||||
s.serialize_field("options", options)?;
|
||||
},
|
||||
FieldType::U64(ref options) => {
|
||||
s.serialize_field("type", "u64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
},
|
||||
FieldType::I64(ref options) => {
|
||||
s.serialize_field("type", "i64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
s.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for FieldEntry {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
|
||||
d.read_struct("field_entry", 3, |d| {
|
||||
let name = try!(d.read_struct_field("name", 0, |d| {
|
||||
d.read_str()
|
||||
}));
|
||||
let field_type: String = try!(d.read_struct_field("type", 1, |d| {
|
||||
d.read_str()
|
||||
}));
|
||||
d.read_struct_field("options", 2, |d| {
|
||||
match field_type.as_ref() {
|
||||
"u64" => {
|
||||
let int_options = try!(IntOptions::decode(d));
|
||||
Ok(FieldEntry::new_u64(name, int_options))
|
||||
}
|
||||
"i64" => {
|
||||
let int_options = try!(IntOptions::decode(d));
|
||||
Ok(FieldEntry::new_i64(name, int_options))
|
||||
}
|
||||
"text" => {
|
||||
let text_options = try!(TextOptions::decode(d));
|
||||
Ok(FieldEntry::new_text(name, text_options))
|
||||
}
|
||||
_ => {
|
||||
Err(d.error(&format!("Field type {:?} unknown", field_type)))
|
||||
impl<'de> Deserialize<'de> for FieldEntry {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
#[serde(field_identifier, rename_all = "lowercase")]
|
||||
enum Field { Name, Type, Options };
|
||||
|
||||
const FIELDS: &'static [&'static str] = &["name", "type", "options"];
|
||||
|
||||
struct FieldEntryVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for FieldEntryVisitor {
|
||||
type Value = FieldEntry;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("struct FieldEntry")
|
||||
}
|
||||
|
||||
fn visit_map<V>(self, mut map: V) -> Result<FieldEntry, V::Error>
|
||||
where V: MapAccess<'de>
|
||||
{
|
||||
let mut name = None;
|
||||
let mut ty = None;
|
||||
let mut field_type = None;
|
||||
while let Some(key) = map.next_key()? {
|
||||
match key {
|
||||
Field::Name => {
|
||||
if name.is_some() {
|
||||
return Err(de::Error::duplicate_field("name"));
|
||||
}
|
||||
name = Some(map.next_value()?);
|
||||
}
|
||||
Field::Type => {
|
||||
if ty.is_some() {
|
||||
return Err(de::Error::duplicate_field("type"));
|
||||
}
|
||||
ty = Some(map.next_value()?);
|
||||
}
|
||||
Field::Options => {
|
||||
match ty {
|
||||
None => return Err(de::Error::custom("The `type` field must be specified before `options`")),
|
||||
Some(ty) => {
|
||||
match ty {
|
||||
"text" => field_type = Some(FieldType::Str(map.next_value()?)),
|
||||
"u64" => field_type = Some(FieldType::U64(map.next_value()?)),
|
||||
"i64" => field_type = Some(FieldType::I64(map.next_value()?)),
|
||||
_ => return Err(de::Error::custom(format!("Unrecognised type {}", ty)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
let name = name.ok_or_else(|| de::Error::missing_field("name"))?;
|
||||
ty.ok_or_else(|| de::Error::missing_field("ty"))?;
|
||||
let field_type = field_type.ok_or_else(|| de::Error::missing_field("options"))?;
|
||||
|
||||
Ok(FieldEntry {
|
||||
name: name,
|
||||
field_type: field_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_struct("field_entry", FIELDS, FieldEntryVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,18 +196,31 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use schema::TEXT;
|
||||
use rustc_serialize::json;
|
||||
use serde_json;
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization() {
|
||||
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
|
||||
assert_eq!(format!("{}", json::as_pretty_json(&field_value)), r#"{
|
||||
|
||||
let expected = r#"{
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"options": {
|
||||
"indexing": "position",
|
||||
"stored": false
|
||||
}
|
||||
}"#);
|
||||
}"#;
|
||||
let field_value_json = serde_json::to_string_pretty(&field_value).unwrap();
|
||||
|
||||
assert_eq!(expected, &field_value_json);
|
||||
|
||||
let field_value: FieldEntry = serde_json::from_str(expected).unwrap();
|
||||
|
||||
assert_eq!("title", field_value.name);
|
||||
|
||||
match field_value.field_type {
|
||||
FieldType::Str(_) => assert!(true),
|
||||
_ => panic!("expected FieldType::Str")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use schema::TextOptions;
|
||||
use schema::IntOptions;
|
||||
use schema::{TextOptions, IntOptions};
|
||||
|
||||
use rustc_serialize::json::Json;
|
||||
use serde_json::Value as JsonValue;
|
||||
use schema::Value;
|
||||
|
||||
|
||||
@@ -19,7 +18,7 @@ pub enum ValueParsingError {
|
||||
|
||||
/// A `FieldType` describes the type (text, u64) of a field as well as
|
||||
/// how it should be handled by tantivy.
|
||||
#[derive(Clone, Debug, RustcDecodable, RustcEncodable)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum FieldType {
|
||||
/// String field type configuration
|
||||
Str(TextOptions),
|
||||
@@ -30,7 +29,7 @@ pub enum FieldType {
|
||||
}
|
||||
|
||||
impl FieldType {
|
||||
|
||||
|
||||
/// returns true iff the field is indexed.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match self {
|
||||
@@ -51,9 +50,9 @@ impl FieldType {
|
||||
/// Tantivy will not try to cast values.
|
||||
/// For instance, If the json value is the integer `3` and the
|
||||
/// target field is a `Str`, this method will return an Error.
|
||||
pub fn value_from_json(&self, json: &Json) -> Result<Value, ValueParsingError> {
|
||||
pub fn value_from_json(&self, json: &JsonValue) -> Result<Value, ValueParsingError> {
|
||||
match *json {
|
||||
Json::String(ref field_text) => {
|
||||
JsonValue::String(ref field_text) => {
|
||||
match *self {
|
||||
FieldType::Str(_) => {
|
||||
Ok(Value::Str(field_text.clone()))
|
||||
@@ -63,31 +62,23 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
}
|
||||
Json::U64(ref field_val_u64) => {
|
||||
JsonValue::Number(ref field_val_num) => {
|
||||
match *self {
|
||||
FieldType::I64(_) => {
|
||||
if *field_val_u64 > (i64::max_value() as u64) {
|
||||
Err(ValueParsingError::OverflowError(format!("Value {:?} is too high for a i64.", field_val_u64)))
|
||||
if let Some(field_val_i64) = field_val_num.as_i64() {
|
||||
Ok(Value::I64(field_val_i64))
|
||||
}
|
||||
else {
|
||||
Ok(Value::I64(*field_val_u64 as i64))
|
||||
Err(ValueParsingError::OverflowError(format!("Expected an i64 int, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
FieldType::U64(_) => {
|
||||
Ok(Value::U64(*field_val_u64))
|
||||
}
|
||||
_ => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
},
|
||||
Json::I64(ref field_val_i64) => {
|
||||
match *self {
|
||||
FieldType::I64(_) => {
|
||||
Ok(Value::I64(* field_val_i64))
|
||||
}
|
||||
FieldType::U64(_) => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a positive integer, got {:?}", json)))
|
||||
if let Some(field_val_u64) = field_val_num.as_u64() {
|
||||
Ok(Value::U64(field_val_u64))
|
||||
}
|
||||
else {
|
||||
Err(ValueParsingError::OverflowError(format!("Expected an u64 int, got {:?}", json)))
|
||||
}
|
||||
}
|
||||
FieldType::Str(_) => {
|
||||
Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json)))
|
||||
|
||||
@@ -7,7 +7,7 @@ use schema::Value;
|
||||
|
||||
|
||||
/// `FieldValue` holds together a `Field` and its `Value`.
|
||||
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct FieldValue {
|
||||
field: Field,
|
||||
value: Value,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::ops::BitOr;
|
||||
|
||||
/// Define how a u64 field should be handled by tantivy.
|
||||
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
|
||||
/// Define how an int field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct IntOptions {
|
||||
indexed: bool,
|
||||
fast: bool,
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use schema::Value;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
|
||||
|
||||
@@ -11,36 +9,5 @@ use rustc_serialize::Encoder;
|
||||
/// A `NamedFieldDocument` is a simple representation of a document
|
||||
/// as a `BTreeMap<String, Vec<Value>>`.
|
||||
///
|
||||
#[derive(Serialize)]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
||||
|
||||
|
||||
impl Encodable for NamedFieldDocument {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("named_field_document", self.0.len(), |s| {
|
||||
for (i, (name, vals)) in self.0.iter().enumerate() {
|
||||
s.emit_struct_field(name, i, |s| {
|
||||
for (j, val) in vals.iter().enumerate() {
|
||||
s.emit_seq(vals.len(), |s| {
|
||||
s.emit_seq_elt(j, |s| {
|
||||
match *val {
|
||||
Value::Str(ref text) => {
|
||||
s.emit_str(text)
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
s.emit_u64(*val)
|
||||
}
|
||||
Value::I64(ref val) => {
|
||||
s.emit_i64(*val)
|
||||
}
|
||||
}
|
||||
})
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encoder;
|
||||
use rustc_serialize::json;
|
||||
use rustc_serialize::json::Json;
|
||||
use std::collections::BTreeMap;
|
||||
use schema::field_type::ValueParsingError;
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde_json::{self, Value as JsonValue, Map as JsonObject};
|
||||
use serde::{Serialize, Serializer, Deserialize, Deserializer};
|
||||
use serde::ser::SerializeSeq;
|
||||
use serde::de::{Visitor, SeqAccess};
|
||||
use super::*;
|
||||
use std::fmt;
|
||||
|
||||
@@ -215,14 +213,12 @@ impl Schema {
|
||||
///
|
||||
/// Encoding a document cannot fail.
|
||||
pub fn to_json(&self, doc: &Document) -> String {
|
||||
json::encode(&self.to_named_doc(doc)).unwrap()
|
||||
serde_json::to_string(&self.to_named_doc(doc)).expect("doc encoding failed. This is a bug")
|
||||
}
|
||||
|
||||
/// Build a document object from a json-object.
|
||||
pub fn parse_document(&self, doc_json: &str) -> Result<Document, DocParsingError> {
|
||||
let json_node = try!(Json::from_str(doc_json));
|
||||
let some_json_obj = json_node.as_object();
|
||||
if !some_json_obj.is_some() {
|
||||
let json_obj: JsonObject<String, JsonValue> = serde_json::from_str(doc_json).map_err(|_| {
|
||||
let doc_json_sample: String =
|
||||
if doc_json.len() < 20 {
|
||||
String::from(doc_json)
|
||||
@@ -230,9 +226,9 @@ impl Schema {
|
||||
else {
|
||||
format!("{:?}...", &doc_json[0..20])
|
||||
};
|
||||
return Err(DocParsingError::NotJSONObject(doc_json_sample))
|
||||
}
|
||||
let json_obj = some_json_obj.unwrap();
|
||||
DocParsingError::NotJSON(doc_json_sample)
|
||||
})?;
|
||||
|
||||
let mut doc = Document::default();
|
||||
for (field_name, json_value) in json_obj.iter() {
|
||||
match self.get_field(field_name) {
|
||||
@@ -240,7 +236,7 @@ impl Schema {
|
||||
let field_entry = self.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
match *json_value {
|
||||
Json::Array(ref json_items) => {
|
||||
JsonValue::Array(ref json_items) => {
|
||||
for json_item in json_items {
|
||||
let value = try!(
|
||||
field_type
|
||||
@@ -276,30 +272,50 @@ impl fmt::Debug for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for Schema {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
try!(d.read_seq(|d, num_fields| {
|
||||
for _ in 0..num_fields {
|
||||
let field_entry = try!(FieldEntry::decode(d));
|
||||
schema_builder.add_field(field_entry);
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(schema_builder.build())
|
||||
impl Serialize for Schema {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
let mut seq = serializer.serialize_seq(Some(self.0.fields.len()))?;
|
||||
for e in &self.0.fields {
|
||||
seq.serialize_element(e)?;
|
||||
}
|
||||
seq.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodable for Schema {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
try!(s.emit_seq(self.0.fields.len(),
|
||||
|mut e| {
|
||||
for (ord, field) in self.0.fields.iter().enumerate() {
|
||||
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
|
||||
impl<'de> Deserialize<'de> for Schema
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
struct SchemaVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for SchemaVisitor
|
||||
{
|
||||
type Value = Schema;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("struct Schema")
|
||||
}
|
||||
|
||||
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
|
||||
where A: SeqAccess<'de>
|
||||
{
|
||||
let mut schema = SchemaBuilder {
|
||||
fields: Vec::with_capacity(seq.size_hint().unwrap_or(0)),
|
||||
fields_map: HashMap::with_capacity(seq.size_hint().unwrap_or(0)),
|
||||
};
|
||||
|
||||
while let Some(value) = seq.next_element()? {
|
||||
schema.add_field(value);
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(())
|
||||
|
||||
Ok(schema.build())
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_map(SchemaVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,28 +335,19 @@ impl From<SchemaBuilder> for Schema {
|
||||
#[derive(Debug)]
|
||||
pub enum DocParsingError {
|
||||
/// The payload given is not valid JSON.
|
||||
NotJSON(json::ParserError),
|
||||
/// The payload given is not a JSON Object (`{...}`).
|
||||
NotJSONObject(String),
|
||||
NotJSON(String),
|
||||
/// One of the value node could not be parsed.
|
||||
ValueError(String, ValueParsingError),
|
||||
/// The json-document contains a field that is not declared in the schema.
|
||||
NoSuchFieldInSchema(String),
|
||||
}
|
||||
|
||||
impl From<json::ParserError> for DocParsingError {
|
||||
fn from(err: json::ParserError) -> DocParsingError {
|
||||
DocParsingError::NotJSON(err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use schema::*;
|
||||
use rustc_serialize::json;
|
||||
use serde_json;
|
||||
use schema::field_type::ValueParsingError;
|
||||
use schema::schema::DocParsingError::NotJSON;
|
||||
|
||||
@@ -348,11 +355,13 @@ mod tests {
|
||||
pub fn test_schema_serialization() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let count_options = IntOptions::default().set_stored().set_fast();
|
||||
let popularity_options = IntOptions::default().set_stored().set_fast();
|
||||
schema_builder.add_text_field("title", TEXT);
|
||||
schema_builder.add_text_field("author", STRING);
|
||||
schema_builder.add_u64_field("count", count_options);
|
||||
schema_builder.add_i64_field("popularity", popularity_options);
|
||||
let schema = schema_builder.build();
|
||||
let schema_json: String = format!("{}", json::as_pretty_json(&schema));
|
||||
let schema_json = serde_json::to_string_pretty(&schema).unwrap();
|
||||
let expected = r#"[
|
||||
{
|
||||
"name": "title",
|
||||
@@ -378,10 +387,29 @@ mod tests {
|
||||
"fast": true,
|
||||
"stored": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "popularity",
|
||||
"type": "i64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"fast": true,
|
||||
"stored": true
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
println!("{}", schema_json);
|
||||
println!("{}", expected);
|
||||
assert_eq!(schema_json, expected);
|
||||
|
||||
let schema: Schema = serde_json::from_str(expected).unwrap();
|
||||
|
||||
let mut fields = schema.fields().iter();
|
||||
|
||||
assert_eq!("title", fields.next().unwrap().name());
|
||||
assert_eq!("author", fields.next().unwrap().name());
|
||||
assert_eq!("count", fields.next().unwrap().name());
|
||||
assert_eq!("popularity", fields.next().unwrap().name());
|
||||
}
|
||||
|
||||
|
||||
@@ -400,6 +428,7 @@ mod tests {
|
||||
"count": 4
|
||||
}"#;
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
|
||||
let doc_serdeser = schema.parse_document(&schema.to_json(&doc)).unwrap();
|
||||
assert_eq!(doc, doc_serdeser);
|
||||
}
|
||||
@@ -408,9 +437,11 @@ mod tests {
|
||||
pub fn test_parse_document() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let count_options = IntOptions::default().set_stored().set_fast();
|
||||
let popularity_options = IntOptions::default().set_stored().set_fast();
|
||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||
let author_field = schema_builder.add_text_field("author", STRING);
|
||||
let count_field = schema_builder.add_u64_field("count", count_options);
|
||||
let popularity_field = schema_builder.add_i64_field("popularity", popularity_options);
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let doc = schema.parse_document("{}").unwrap();
|
||||
@@ -420,32 +451,20 @@ mod tests {
|
||||
let doc = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 4
|
||||
"count": 4,
|
||||
"popularity": 10
|
||||
}"#).unwrap();
|
||||
assert_eq!(doc.get_first(title_field).unwrap().text(), "my title");
|
||||
assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton");
|
||||
assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4);
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton"
|
||||
"count": 4
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::NotJSON(__)) => {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10);
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 4,
|
||||
"popularity": 10,
|
||||
"jambon": "bayonne"
|
||||
}"#);
|
||||
match json_err {
|
||||
@@ -453,7 +472,7 @@ mod tests {
|
||||
assert_eq!(field_name, "jambon");
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
panic!("expected additional field 'jambon' to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -462,6 +481,7 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": "5",
|
||||
"popularity": "10",
|
||||
"jambon": "bayonne"
|
||||
}"#);
|
||||
match json_err {
|
||||
@@ -469,7 +489,7 @@ mod tests {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
panic!("expected string of 5 to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -477,26 +497,28 @@ mod tests {
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": -5
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::TypeError(_))) => {
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 5000000000
|
||||
"count": -5,
|
||||
"popularity": 10
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
assert!(false);
|
||||
assert!(true);
|
||||
}
|
||||
_ => {
|
||||
panic!("expected -5 to fail but didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 9223372036854775808,
|
||||
"popularity": 10
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
panic!("expected 9223372036854775808 to fit into u64, but it didn't");
|
||||
}
|
||||
_ => {
|
||||
assert!(true);
|
||||
@@ -507,14 +529,30 @@ mod tests {
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 50000000000000000000
|
||||
"count": 50,
|
||||
"popularity": 9223372036854775808
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => {
|
||||
assert!(true);
|
||||
},
|
||||
_ => {
|
||||
panic!("expected 9223372036854775808 to overflow i64, but it didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(r#"{
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 50,
|
||||
}"#);
|
||||
match json_err {
|
||||
Err(NotJSON(_)) => {
|
||||
assert!(true);
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
assert!(false)
|
||||
panic!("expected invalid JSON to fail parsing, but it didn't");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
use std::ops::BitOr;
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
|
||||
/// Define how a text field should be handled by tantivy.
|
||||
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
|
||||
#[derive(Clone,Debug,PartialEq,Eq, Serialize, Deserialize)]
|
||||
pub struct TextOptions {
|
||||
indexing: TextIndexingOptions,
|
||||
stored: bool,
|
||||
@@ -51,9 +47,10 @@ impl Default for TextOptions {
|
||||
|
||||
|
||||
/// Describe how a field should be indexed
|
||||
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash, Serialize, Deserialize)]
|
||||
pub enum TextIndexingOptions {
|
||||
/// Unindexed fields will not generate any postings. They will not be searchable either.
|
||||
#[serde(rename="unindexed")]
|
||||
Unindexed,
|
||||
/// Untokenized means that the field text will not be split into tokens before being indexed.
|
||||
/// A field with the value "Hello world", will have the document suscribe to one single
|
||||
@@ -61,62 +58,26 @@ pub enum TextIndexingOptions {
|
||||
///
|
||||
/// It will **not** be searchable if the user enter "hello" for instance.
|
||||
/// This can be useful for tags, or ids for instance.
|
||||
#[serde(rename="untokenized")]
|
||||
Untokenized,
|
||||
/// TokenizedNoFreq will tokenize the field value, and append the document doc id
|
||||
/// to the posting lists associated to all of the tokens.
|
||||
/// The frequence of appearance of the term in the document however will be lost.
|
||||
/// The term frequency used in the TfIdf formula will always be 1.
|
||||
#[serde(rename="tokenize")]
|
||||
TokenizedNoFreq,
|
||||
/// TokenizedWithFreq will tokenize the field value, and encode
|
||||
/// both the docid and the term frequency in the posting lists associated to all
|
||||
#[serde(rename="freq")]
|
||||
// of the tokens.
|
||||
TokenizedWithFreq,
|
||||
/// Like TokenizedWithFreq, but also encodes the positions of the
|
||||
/// terms in a separate file. This option is required for phrase queries.
|
||||
/// Don't use this if you are certain you won't need it, the term positions file can be very big.
|
||||
#[serde(rename="position")]
|
||||
TokenizedWithFreqAndPosition,
|
||||
}
|
||||
|
||||
impl Encodable for TextIndexingOptions {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
let name = match *self {
|
||||
TextIndexingOptions::Unindexed => {
|
||||
"unindexed"
|
||||
}
|
||||
TextIndexingOptions::Untokenized => {
|
||||
"untokenized"
|
||||
}
|
||||
TextIndexingOptions::TokenizedNoFreq => {
|
||||
"tokenize"
|
||||
}
|
||||
TextIndexingOptions::TokenizedWithFreq => {
|
||||
"freq"
|
||||
}
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition => {
|
||||
"position"
|
||||
}
|
||||
};
|
||||
s.emit_str(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Decodable for TextIndexingOptions {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
|
||||
use self::TextIndexingOptions::*;
|
||||
let option_name: String = try!(d.read_str());
|
||||
Ok(match option_name.as_ref() {
|
||||
"unindexed" => Unindexed,
|
||||
"untokenized" => Untokenized,
|
||||
"tokenize" => TokenizedNoFreq,
|
||||
"freq" => TokenizedWithFreq,
|
||||
"position" => TokenizedWithFreqAndPosition,
|
||||
_ => {
|
||||
return Err(d.error(&format!("Encoding option {:?} unknown", option_name)));
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TextIndexingOptions {
|
||||
|
||||
/// Returns true iff the term frequency will be encoded.
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
|
||||
use common::BinarySerializable;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use std::fmt;
|
||||
use serde::{Serialize, Serializer, Deserialize, Deserializer};
|
||||
use serde::de::Visitor;
|
||||
|
||||
/// Value represents the value of a any field.
|
||||
/// It is an enum over all over all of the possible field type.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable)]
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub enum Value {
|
||||
/// The str type is used for any text information.
|
||||
Str(String),
|
||||
@@ -16,6 +14,54 @@ pub enum Value {
|
||||
I64(i64)
|
||||
}
|
||||
|
||||
impl Serialize for Value {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer
|
||||
{
|
||||
match *self {
|
||||
Value::Str(ref v) => serializer.serialize_str(v),
|
||||
Value::U64(u) => serializer.serialize_u64(u),
|
||||
Value::I64(u) => serializer.serialize_i64(u),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Value
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
struct ValueVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for ValueVisitor
|
||||
{
|
||||
type Value = Value;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a string or u32")
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> {
|
||||
Ok(Value::U64(v))
|
||||
}
|
||||
|
||||
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> {
|
||||
Ok(Value::I64(v))
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v.to_owned()))
|
||||
}
|
||||
|
||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(ValueVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Returns the text value, provided the value is of the `Str` type.
|
||||
///
|
||||
@@ -88,48 +134,53 @@ impl<'a> From<&'a str> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U64_CODE: u8 = 1;
|
||||
const I64_CODE: u8 = 2;
|
||||
mod binary_serialize {
|
||||
use common::BinarySerializable;
|
||||
use std::io::{self, Read, Write};
|
||||
use super::Value;
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U64_CODE: u8 = 1;
|
||||
const I64_CODE: u8 = 2;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
written_size += try!(U64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
Value::I64(ref val) => {
|
||||
written_size += try!(I64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
},
|
||||
Value::U64(ref val) => {
|
||||
written_size += try!(U64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
Value::I64(ref val) => {
|
||||
written_size += try!(I64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
},
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(Value::Str(text))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = try!(u64::deserialize(reader));
|
||||
Ok(Value::U64(value))
|
||||
}
|
||||
I64_CODE => {
|
||||
let value = try!(i64::deserialize(reader));
|
||||
Ok(Value::I64(value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(Value::Str(text))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = try!(u64::deserialize(reader));
|
||||
Ok(Value::U64(value))
|
||||
}
|
||||
I64_CODE => {
|
||||
let value = try!(i64::deserialize(reader));
|
||||
Ok(Value::I64(value))
|
||||
}
|
||||
_ => {
|
||||
Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user