Using u32 for field ids

This commit is contained in:
Paul Masurel
2017-04-15 13:04:33 +09:00
parent 44c684af5c
commit 8a28d1643d
9 changed files with 48 additions and 46 deletions

View File

@@ -75,7 +75,7 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
assert_eq!(source.len(), 20 as usize);
assert_eq!(source.len(), 23 as usize);
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
@@ -108,7 +108,7 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
assert_eq!(source.len(), 45 as usize);
assert_eq!(source.len(), 48 as usize);
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
@@ -143,7 +143,7 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
assert_eq!(source.len(), 18 as usize);
assert_eq!(source.len(), 21 as usize);
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();

View File

@@ -15,7 +15,7 @@ impl U32FastFieldsWriter {
.iter()
.enumerate()
.filter(|&(_, field_entry)| field_entry.is_u32_fast())
.map(|(field_id, _)| Field(field_id as u8))
.map(|(field_id, _)| Field(field_id as u32))
.collect();
U32FastFieldsWriter::new(u32_fields)
}

View File

@@ -280,7 +280,7 @@ mod tests {
let delete_queue = DeleteQueue::new();
let make_op = |i: usize| {
let field = Field(1u8);
let field = Field(1u32);
DeleteOperation {
opstamp: i as u64,
term: Term::from_field_u32(field, i as u32)

View File

@@ -103,7 +103,7 @@ impl IndexMerger {
.iter()
.enumerate()
.filter(|&(_, field_entry)| field_entry.is_indexed())
.map(|(field_id, _)| Field(field_id as u8))
.map(|(field_id, _)| Field(field_id as u32))
.collect();
self.generic_write_fast_field(fieldnorm_fastfields, &extract_fieldnorm_reader, fast_field_serializer)
}
@@ -114,7 +114,7 @@ impl IndexMerger {
.iter()
.enumerate()
.filter(|&(_, field_entry)| field_entry.is_u32_fast())
.map(|(field_id, _)| Field(field_id as u8))
.map(|(field_id, _)| Field(field_id as u32))
.collect();
self.generic_write_fast_field(fast_fields, &extract_fast_field_reader, fast_field_serializer)
}

View File

@@ -41,7 +41,7 @@ fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter {
.iter()
.enumerate()
.filter(|&(_, field_entry)| field_entry.is_indexed())
.map(|(field_id, _)| Field(field_id as u8))
.map(|(field_id, _)| Field(field_id as u32))
.collect();
U32FastFieldsWriter::new(u32_fields)
}

View File

@@ -315,42 +315,42 @@ mod test {
#[test]
pub fn test_parse_query_to_ast_disjunction() {
test_parse_query_to_logical_ast_helper("title:toto",
"Term([0, 116, 111, 116, 111])",
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
false);
test_parse_query_to_logical_ast_helper("+title:toto",
"Term([0, 116, 111, 116, 111])",
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
false);
test_parse_query_to_logical_ast_helper("+title:toto -titi",
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
"(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \
105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))",
false);
assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(),
QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b",
"(Term([0, 97]) (Term([0, 98]) Term([1, 98])))",
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))",
false);
test_parse_query_to_logical_ast_helper("title:\"a b\"",
"\"[Term([0, 97]), Term([0, 98])]\"",
"\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"",
false);
}
#[test]
pub fn test_parse_query_to_ast_conjunction() {
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true);
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 0, 0, 0, 116, 111, 116, 111])", true);
test_parse_query_to_logical_ast_helper("+title:toto",
"Term([0, 116, 111, 116, 111])",
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
true);
test_parse_query_to_logical_ast_helper("+title:toto -titi",
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
"(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \
105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))",
true);
assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(),
QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b",
"(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))",
"(+Term([0, 0, 0, 0, 97]) +(Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))",
true);
test_parse_query_to_logical_ast_helper("title:\"a b\"",
"\"[Term([0, 97]), Term([0, 98])]\"",
"\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"",
true);
}
}

View File

@@ -11,7 +11,7 @@ use common::BinarySerializable;
/// Because the field id is a `u8`, tantivy can only have at most `255` fields.
/// Value 255 is reserved.
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)]
pub struct Field(pub u8);
pub struct Field(pub u32);
impl BinarySerializable for Field {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
@@ -19,7 +19,7 @@ impl BinarySerializable for Field {
}
fn deserialize(reader: &mut Read) -> io::Result<Field> {
u8::deserialize(reader).map(Field)
u32::deserialize(reader).map(Field)
}
}

View File

@@ -89,7 +89,7 @@ impl SchemaBuilder {
/// Adds a field entry to the schema in build.
fn add_field(&mut self, field_entry: FieldEntry) -> Field {
let field = Field(self.fields.len() as u8);
let field = Field(self.fields.len() as u32);
let field_name = field_entry.name().clone();
self.fields.push(field_entry);
self.fields_map.insert(field_name, field);

View File

@@ -24,13 +24,14 @@ impl Term {
/// Set the content of the term.
pub fn set_content(&mut self, content: &[u8]) {
assert!(content.len() >= 4);
self.0.resize(content.len(), 0u8);
(&mut self.0[..]).clone_from_slice(content);
}
/// Returns the field id.
fn field_id(&self,) -> u8 {
self.0[0]
fn field_id(&self,) -> u32 {
BigEndian::read_u32(&self.0[..4])
}
/// Returns the field.
@@ -41,15 +42,17 @@ impl Term {
/// Builds a term given a field, and a u32-value
///
/// Assuming the term has a field id of 1, and a u32 value of 3234,
/// the Term will have 5 bytes.
/// The first byte is `1`, and the 4 following bytes are that of the u32.
/// the Term will have 8 bytes.
///
/// The first four byte are dedicated to storing the field id as a u32.
/// The 4 following bytes are encoding the u32 value.
pub fn from_field_u32(field: Field, val: u32) -> Term {
const U32_TERM_LEN: usize = 1 + 4;
const U32_TERM_LEN: usize = 4 + 4;
let mut buffer = allocate_vec(U32_TERM_LEN);
buffer[0] = field.0;
// we want BigEndian here to have lexicographic order
// match the natural order of vals.
BigEndian::write_u32(&mut buffer[1..5], val);
// match the natural order of `(field, val)`
BigEndian::write_u32(&mut buffer[0..4], field.0);
BigEndian::write_u32(&mut buffer[4..], val);
Term(buffer)
}
@@ -60,10 +63,9 @@ impl Term {
/// The first byte is 2, and the three following bytes are the utf-8
/// representation of "abc".
pub fn from_field_text(field: Field, text: &str) -> Term {
let mut buffer = Vec::with_capacity(1 + text.len());
buffer.clear();
field.serialize(&mut buffer).unwrap();
buffer.extend(text.as_bytes());
let mut buffer = allocate_vec(4 + text.len());
BigEndian::write_u32(&mut buffer[0..4], field.0);
buffer[4..].clone_from_slice(text.as_bytes());
Term(buffer)
}
@@ -71,7 +73,7 @@ impl Term {
///
/// Panics if the term is not a u32 field.
pub fn get_u32(&self) -> u32 {
BigEndian::read_u32(&self.0[1..])
BigEndian::read_u32(&self.0[4..])
}
/// Builds a term from its byte representation.
@@ -89,7 +91,7 @@ impl Term {
/// If the term is a u32, its value is encoded according
/// to `byteorder::LittleEndian`.
pub fn value(&self) -> &[u8] {
&self.0[1..]
&self.0[4..]
}
/// Returns the text associated with the term.
@@ -104,7 +106,7 @@ impl Term {
/// Set the texts only, keeping the field untouched.
pub fn set_text(&mut self, text: &str) {
self.0.resize(1, 0u8);
self.0.resize(4, 0u8);
self.0.extend(text.as_bytes());
}
@@ -141,18 +143,18 @@ mod tests {
{
let term = Term::from_field_text(title_field, "test");
assert_eq!(term.field(), title_field);
assert_eq!(term.as_slice()[0], 1u8);
assert_eq!(&term.as_slice()[1..], "test".as_bytes());
assert_eq!(&term.as_slice()[0..4], &[0u8,0u8,0u8,1u8]);
assert_eq!(&term.as_slice()[4..], "test".as_bytes());
}
{
let term = Term::from_field_u32(count_field, 983u32);
assert_eq!(term.field(), count_field);
assert_eq!(term.as_slice()[0], 2u8);
assert_eq!(term.as_slice().len(), 5);
assert_eq!(term.as_slice()[1], 0u8);
assert_eq!(term.as_slice()[2], 0u8);
assert_eq!(term.as_slice()[3], (933u32 / 256u32) as u8);
assert_eq!(term.as_slice()[4], (983u32 % 256u32) as u8);
assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 2u8]);
assert_eq!(term.as_slice().len(), 8);
assert_eq!(term.as_slice()[4], 0u8);
assert_eq!(term.as_slice()[5], 0u8);
assert_eq!(term.as_slice()[6], (933u32 / 256u32) as u8);
assert_eq!(term.as_slice()[7], (983u32 % 256u32) as u8);
}
}