From 8a28d1643d00099fdf3127c0d45b5e06f93ce6e3 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 15 Apr 2017 13:04:33 +0900 Subject: [PATCH] Using u32 for field ids --- src/fastfield/mod.rs | 6 ++-- src/fastfield/writer.rs | 2 +- src/indexer/delete_queue.rs | 2 +- src/indexer/merger.rs | 4 +-- src/indexer/segment_writer.rs | 2 +- src/query/query_parser/query_parser.rs | 24 ++++++------- src/schema/field.rs | 4 +-- src/schema/schema.rs | 2 +- src/schema/term.rs | 48 ++++++++++++++------------ 9 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index ae3d83f88..cb03faa87 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -75,7 +75,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 20 as usize); + assert_eq!(source.len(), 23 as usize); } { let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); @@ -108,7 +108,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 45 as usize); + assert_eq!(source.len(), 48 as usize); } { let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); @@ -143,7 +143,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 18 as usize); + assert_eq!(source.len(), 21 as usize); } { let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 715182f36..c3d5c7b88 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -15,7 +15,7 @@ impl U32FastFieldsWriter { .iter() .enumerate() .filter(|&(_, field_entry)| field_entry.is_u32_fast()) - .map(|(field_id, _)| Field(field_id as u8)) + .map(|(field_id, _)| Field(field_id as u32)) .collect(); U32FastFieldsWriter::new(u32_fields) } diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index a031c63d3..dbc6a7a7c 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -280,7 +280,7 @@ mod tests { let delete_queue = DeleteQueue::new(); let make_op = |i: usize| { - let field = Field(1u8); + let field = Field(1u32); DeleteOperation { opstamp: i as u64, term: Term::from_field_u32(field, i as u32) diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index bb0bb7ff7..8feecc145 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -103,7 +103,7 @@ impl IndexMerger { .iter() .enumerate() .filter(|&(_, field_entry)| field_entry.is_indexed()) - .map(|(field_id, _)| Field(field_id as u8)) + .map(|(field_id, _)| Field(field_id as u32)) .collect(); self.generic_write_fast_field(fieldnorm_fastfields, &extract_fieldnorm_reader, fast_field_serializer) } @@ -114,7 +114,7 @@ impl IndexMerger { .iter() .enumerate() .filter(|&(_, field_entry)| field_entry.is_u32_fast()) - .map(|(field_id, _)| Field(field_id as u8)) + .map(|(field_id, _)| Field(field_id as u32)) .collect(); self.generic_write_fast_field(fast_fields, &extract_fast_field_reader, fast_field_serializer) } diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 63842f9f1..04faaa62c 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -41,7 +41,7 @@ fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter { .iter() .enumerate() .filter(|&(_, field_entry)| field_entry.is_indexed()) - .map(|(field_id, _)| Field(field_id as u8)) + .map(|(field_id, _)| Field(field_id as u32)) .collect(); U32FastFieldsWriter::new(u32_fields) } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 90b7f23be..427e86844 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -315,42 +315,42 @@ mod test { #[test] pub fn test_parse_query_to_ast_disjunction() { test_parse_query_to_logical_ast_helper("title:toto", - "Term([0, 116, 111, 116, 111])", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", false); test_parse_query_to_logical_ast_helper("+title:toto", - "Term([0, 116, 111, 116, 111])", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", false); test_parse_query_to_logical_ast_helper("+title:toto -titi", - "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \ - 105, 116, 105]) Term([1, 116, 105, 116, 105])))", + "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \ + 105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))", false); assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(), QueryParserError::AllButQueryForbidden); test_parse_query_to_logical_ast_helper("title:a b", - "(Term([0, 97]) (Term([0, 98]) Term([1, 98])))", + "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))", false); test_parse_query_to_logical_ast_helper("title:\"a b\"", - "\"[Term([0, 97]), Term([0, 98])]\"", + "\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"", false); } #[test] pub fn test_parse_query_to_ast_conjunction() { - test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true); + test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 0, 0, 0, 116, 111, 116, 111])", true); test_parse_query_to_logical_ast_helper("+title:toto", - "Term([0, 116, 111, 116, 111])", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", true); test_parse_query_to_logical_ast_helper("+title:toto -titi", - "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \ - 105, 116, 105]) Term([1, 116, 105, 116, 105])))", + "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \ + 105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))", true); assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(), QueryParserError::AllButQueryForbidden); test_parse_query_to_logical_ast_helper("title:a b", - "(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))", + "(+Term([0, 0, 0, 0, 97]) +(Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))", true); test_parse_query_to_logical_ast_helper("title:\"a b\"", - "\"[Term([0, 97]), Term([0, 98])]\"", + "\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"", true); } } diff --git a/src/schema/field.rs b/src/schema/field.rs index 792b5b658..26d8da366 100644 --- a/src/schema/field.rs +++ b/src/schema/field.rs @@ -11,7 +11,7 @@ use common::BinarySerializable; /// Because the field id is a `u8`, tantivy can only have at most `255` fields. /// Value 255 is reserved. #[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)] -pub struct Field(pub u8); +pub struct Field(pub u32); impl BinarySerializable for Field { fn serialize(&self, writer: &mut Write) -> io::Result { @@ -19,7 +19,7 @@ impl BinarySerializable for Field { } fn deserialize(reader: &mut Read) -> io::Result { - u8::deserialize(reader).map(Field) + u32::deserialize(reader).map(Field) } } diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 3ed8b6c9d..bdc28cf10 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -89,7 +89,7 @@ impl SchemaBuilder { /// Adds a field entry to the schema in build. fn add_field(&mut self, field_entry: FieldEntry) -> Field { - let field = Field(self.fields.len() as u8); + let field = Field(self.fields.len() as u32); let field_name = field_entry.name().clone(); self.fields.push(field_entry); self.fields_map.insert(field_name, field); diff --git a/src/schema/term.rs b/src/schema/term.rs index f5502e00e..294c2d346 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -24,13 +24,14 @@ impl Term { /// Set the content of the term. pub fn set_content(&mut self, content: &[u8]) { + assert!(content.len() >= 4); self.0.resize(content.len(), 0u8); (&mut self.0[..]).clone_from_slice(content); } /// Returns the field id. - fn field_id(&self,) -> u8 { - self.0[0] + fn field_id(&self,) -> u32 { + BigEndian::read_u32(&self.0[..4]) } /// Returns the field. @@ -41,15 +42,17 @@ impl Term { /// Builds a term given a field, and a u32-value /// /// Assuming the term has a field id of 1, and a u32 value of 3234, - /// the Term will have 5 bytes. - /// The first byte is `1`, and the 4 following bytes are that of the u32. + /// the Term will have 8 bytes. + /// + /// The first four byte are dedicated to storing the field id as a u32. + /// The 4 following bytes are encoding the u32 value. pub fn from_field_u32(field: Field, val: u32) -> Term { - const U32_TERM_LEN: usize = 1 + 4; + const U32_TERM_LEN: usize = 4 + 4; let mut buffer = allocate_vec(U32_TERM_LEN); - buffer[0] = field.0; // we want BigEndian here to have lexicographic order - // match the natural order of vals. - BigEndian::write_u32(&mut buffer[1..5], val); + // match the natural order of `(field, val)` + BigEndian::write_u32(&mut buffer[0..4], field.0); + BigEndian::write_u32(&mut buffer[4..], val); Term(buffer) } @@ -60,10 +63,9 @@ impl Term { /// The first byte is 2, and the three following bytes are the utf-8 /// representation of "abc". pub fn from_field_text(field: Field, text: &str) -> Term { - let mut buffer = Vec::with_capacity(1 + text.len()); - buffer.clear(); - field.serialize(&mut buffer).unwrap(); - buffer.extend(text.as_bytes()); + let mut buffer = allocate_vec(4 + text.len()); + BigEndian::write_u32(&mut buffer[0..4], field.0); + buffer[4..].clone_from_slice(text.as_bytes()); Term(buffer) } @@ -71,7 +73,7 @@ impl Term { /// /// Panics if the term is not a u32 field. pub fn get_u32(&self) -> u32 { - BigEndian::read_u32(&self.0[1..]) + BigEndian::read_u32(&self.0[4..]) } /// Builds a term from its byte representation. @@ -89,7 +91,7 @@ impl Term { /// If the term is a u32, its value is encoded according /// to `byteorder::LittleEndian`. pub fn value(&self) -> &[u8] { - &self.0[1..] + &self.0[4..] } /// Returns the text associated with the term. @@ -104,7 +106,7 @@ impl Term { /// Set the texts only, keeping the field untouched. pub fn set_text(&mut self, text: &str) { - self.0.resize(1, 0u8); + self.0.resize(4, 0u8); self.0.extend(text.as_bytes()); } @@ -141,18 +143,18 @@ mod tests { { let term = Term::from_field_text(title_field, "test"); assert_eq!(term.field(), title_field); - assert_eq!(term.as_slice()[0], 1u8); - assert_eq!(&term.as_slice()[1..], "test".as_bytes()); + assert_eq!(&term.as_slice()[0..4], &[0u8,0u8,0u8,1u8]); + assert_eq!(&term.as_slice()[4..], "test".as_bytes()); } { let term = Term::from_field_u32(count_field, 983u32); assert_eq!(term.field(), count_field); - assert_eq!(term.as_slice()[0], 2u8); - assert_eq!(term.as_slice().len(), 5); - assert_eq!(term.as_slice()[1], 0u8); - assert_eq!(term.as_slice()[2], 0u8); - assert_eq!(term.as_slice()[3], (933u32 / 256u32) as u8); - assert_eq!(term.as_slice()[4], (983u32 % 256u32) as u8); + assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 2u8]); + assert_eq!(term.as_slice().len(), 8); + assert_eq!(term.as_slice()[4], 0u8); + assert_eq!(term.as_slice()[5], 0u8); + assert_eq!(term.as_slice()[6], (933u32 / 256u32) as u8); + assert_eq!(term.as_slice()[7], (983u32 % 256u32) as u8); } }