mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Using u32 for field ids
This commit is contained in:
@@ -75,7 +75,7 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
assert_eq!(source.len(), 20 as usize);
|
||||
assert_eq!(source.len(), 23 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
||||
@@ -108,7 +108,7 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
assert_eq!(source.len(), 45 as usize);
|
||||
assert_eq!(source.len(), 48 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
||||
@@ -143,7 +143,7 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
assert_eq!(source.len(), 18 as usize);
|
||||
assert_eq!(source.len(), 21 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
||||
|
||||
@@ -15,7 +15,7 @@ impl U32FastFieldsWriter {
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, field_entry)| field_entry.is_u32_fast())
|
||||
.map(|(field_id, _)| Field(field_id as u8))
|
||||
.map(|(field_id, _)| Field(field_id as u32))
|
||||
.collect();
|
||||
U32FastFieldsWriter::new(u32_fields)
|
||||
}
|
||||
|
||||
@@ -280,7 +280,7 @@ mod tests {
|
||||
let delete_queue = DeleteQueue::new();
|
||||
|
||||
let make_op = |i: usize| {
|
||||
let field = Field(1u8);
|
||||
let field = Field(1u32);
|
||||
DeleteOperation {
|
||||
opstamp: i as u64,
|
||||
term: Term::from_field_u32(field, i as u32)
|
||||
|
||||
@@ -103,7 +103,7 @@ impl IndexMerger {
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, field_entry)| field_entry.is_indexed())
|
||||
.map(|(field_id, _)| Field(field_id as u8))
|
||||
.map(|(field_id, _)| Field(field_id as u32))
|
||||
.collect();
|
||||
self.generic_write_fast_field(fieldnorm_fastfields, &extract_fieldnorm_reader, fast_field_serializer)
|
||||
}
|
||||
@@ -114,7 +114,7 @@ impl IndexMerger {
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, field_entry)| field_entry.is_u32_fast())
|
||||
.map(|(field_id, _)| Field(field_id as u8))
|
||||
.map(|(field_id, _)| Field(field_id as u32))
|
||||
.collect();
|
||||
self.generic_write_fast_field(fast_fields, &extract_fast_field_reader, fast_field_serializer)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter {
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, field_entry)| field_entry.is_indexed())
|
||||
.map(|(field_id, _)| Field(field_id as u8))
|
||||
.map(|(field_id, _)| Field(field_id as u32))
|
||||
.collect();
|
||||
U32FastFieldsWriter::new(u32_fields)
|
||||
}
|
||||
|
||||
@@ -315,42 +315,42 @@ mod test {
|
||||
#[test]
|
||||
pub fn test_parse_query_to_ast_disjunction() {
|
||||
test_parse_query_to_logical_ast_helper("title:toto",
|
||||
"Term([0, 116, 111, 116, 111])",
|
||||
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
|
||||
false);
|
||||
test_parse_query_to_logical_ast_helper("+title:toto",
|
||||
"Term([0, 116, 111, 116, 111])",
|
||||
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
|
||||
false);
|
||||
test_parse_query_to_logical_ast_helper("+title:toto -titi",
|
||||
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
|
||||
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
|
||||
"(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \
|
||||
105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))",
|
||||
false);
|
||||
assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(),
|
||||
QueryParserError::AllButQueryForbidden);
|
||||
test_parse_query_to_logical_ast_helper("title:a b",
|
||||
"(Term([0, 97]) (Term([0, 98]) Term([1, 98])))",
|
||||
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))",
|
||||
false);
|
||||
test_parse_query_to_logical_ast_helper("title:\"a b\"",
|
||||
"\"[Term([0, 97]), Term([0, 98])]\"",
|
||||
"\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"",
|
||||
false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_query_to_ast_conjunction() {
|
||||
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true);
|
||||
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 0, 0, 0, 116, 111, 116, 111])", true);
|
||||
test_parse_query_to_logical_ast_helper("+title:toto",
|
||||
"Term([0, 116, 111, 116, 111])",
|
||||
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
|
||||
true);
|
||||
test_parse_query_to_logical_ast_helper("+title:toto -titi",
|
||||
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
|
||||
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
|
||||
"(+Term([0, 0, 0, 0, 116, 111, 116, 111]) -(Term([0, 0, 0, 0, 116, \
|
||||
105, 116, 105]) Term([0, 0, 0, 1, 116, 105, 116, 105])))",
|
||||
true);
|
||||
assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(),
|
||||
QueryParserError::AllButQueryForbidden);
|
||||
test_parse_query_to_logical_ast_helper("title:a b",
|
||||
"(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))",
|
||||
"(+Term([0, 0, 0, 0, 97]) +(Term([0, 0, 0, 0, 98]) Term([0, 0, 0, 1, 98])))",
|
||||
true);
|
||||
test_parse_query_to_logical_ast_helper("title:\"a b\"",
|
||||
"\"[Term([0, 97]), Term([0, 98])]\"",
|
||||
"\"[Term([0, 0, 0, 0, 97]), Term([0, 0, 0, 0, 98])]\"",
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use common::BinarySerializable;
|
||||
/// Because the field id is a `u8`, tantivy can only have at most `255` fields.
|
||||
/// Value 255 is reserved.
|
||||
#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, RustcEncodable, RustcDecodable)]
|
||||
pub struct Field(pub u8);
|
||||
pub struct Field(pub u32);
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
@@ -19,7 +19,7 @@ impl BinarySerializable for Field {
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Field> {
|
||||
u8::deserialize(reader).map(Field)
|
||||
u32::deserialize(reader).map(Field)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ impl SchemaBuilder {
|
||||
|
||||
/// Adds a field entry to the schema in build.
|
||||
fn add_field(&mut self, field_entry: FieldEntry) -> Field {
|
||||
let field = Field(self.fields.len() as u8);
|
||||
let field = Field(self.fields.len() as u32);
|
||||
let field_name = field_entry.name().clone();
|
||||
self.fields.push(field_entry);
|
||||
self.fields_map.insert(field_name, field);
|
||||
|
||||
@@ -24,13 +24,14 @@ impl Term {
|
||||
|
||||
/// Set the content of the term.
|
||||
pub fn set_content(&mut self, content: &[u8]) {
|
||||
assert!(content.len() >= 4);
|
||||
self.0.resize(content.len(), 0u8);
|
||||
(&mut self.0[..]).clone_from_slice(content);
|
||||
}
|
||||
|
||||
/// Returns the field id.
|
||||
fn field_id(&self,) -> u8 {
|
||||
self.0[0]
|
||||
fn field_id(&self,) -> u32 {
|
||||
BigEndian::read_u32(&self.0[..4])
|
||||
}
|
||||
|
||||
/// Returns the field.
|
||||
@@ -41,15 +42,17 @@ impl Term {
|
||||
/// Builds a term given a field, and a u32-value
|
||||
///
|
||||
/// Assuming the term has a field id of 1, and a u32 value of 3234,
|
||||
/// the Term will have 5 bytes.
|
||||
/// The first byte is `1`, and the 4 following bytes are that of the u32.
|
||||
/// the Term will have 8 bytes.
|
||||
///
|
||||
/// The first four byte are dedicated to storing the field id as a u32.
|
||||
/// The 4 following bytes are encoding the u32 value.
|
||||
pub fn from_field_u32(field: Field, val: u32) -> Term {
|
||||
const U32_TERM_LEN: usize = 1 + 4;
|
||||
const U32_TERM_LEN: usize = 4 + 4;
|
||||
let mut buffer = allocate_vec(U32_TERM_LEN);
|
||||
buffer[0] = field.0;
|
||||
// we want BigEndian here to have lexicographic order
|
||||
// match the natural order of vals.
|
||||
BigEndian::write_u32(&mut buffer[1..5], val);
|
||||
// match the natural order of `(field, val)`
|
||||
BigEndian::write_u32(&mut buffer[0..4], field.0);
|
||||
BigEndian::write_u32(&mut buffer[4..], val);
|
||||
Term(buffer)
|
||||
}
|
||||
|
||||
@@ -60,10 +63,9 @@ impl Term {
|
||||
/// The first byte is 2, and the three following bytes are the utf-8
|
||||
/// representation of "abc".
|
||||
pub fn from_field_text(field: Field, text: &str) -> Term {
|
||||
let mut buffer = Vec::with_capacity(1 + text.len());
|
||||
buffer.clear();
|
||||
field.serialize(&mut buffer).unwrap();
|
||||
buffer.extend(text.as_bytes());
|
||||
let mut buffer = allocate_vec(4 + text.len());
|
||||
BigEndian::write_u32(&mut buffer[0..4], field.0);
|
||||
buffer[4..].clone_from_slice(text.as_bytes());
|
||||
Term(buffer)
|
||||
}
|
||||
|
||||
@@ -71,7 +73,7 @@ impl Term {
|
||||
///
|
||||
/// Panics if the term is not a u32 field.
|
||||
pub fn get_u32(&self) -> u32 {
|
||||
BigEndian::read_u32(&self.0[1..])
|
||||
BigEndian::read_u32(&self.0[4..])
|
||||
}
|
||||
|
||||
/// Builds a term from its byte representation.
|
||||
@@ -89,7 +91,7 @@ impl Term {
|
||||
/// If the term is a u32, its value is encoded according
|
||||
/// to `byteorder::LittleEndian`.
|
||||
pub fn value(&self) -> &[u8] {
|
||||
&self.0[1..]
|
||||
&self.0[4..]
|
||||
}
|
||||
|
||||
/// Returns the text associated with the term.
|
||||
@@ -104,7 +106,7 @@ impl Term {
|
||||
|
||||
/// Set the texts only, keeping the field untouched.
|
||||
pub fn set_text(&mut self, text: &str) {
|
||||
self.0.resize(1, 0u8);
|
||||
self.0.resize(4, 0u8);
|
||||
self.0.extend(text.as_bytes());
|
||||
}
|
||||
|
||||
@@ -141,18 +143,18 @@ mod tests {
|
||||
{
|
||||
let term = Term::from_field_text(title_field, "test");
|
||||
assert_eq!(term.field(), title_field);
|
||||
assert_eq!(term.as_slice()[0], 1u8);
|
||||
assert_eq!(&term.as_slice()[1..], "test".as_bytes());
|
||||
assert_eq!(&term.as_slice()[0..4], &[0u8,0u8,0u8,1u8]);
|
||||
assert_eq!(&term.as_slice()[4..], "test".as_bytes());
|
||||
}
|
||||
{
|
||||
let term = Term::from_field_u32(count_field, 983u32);
|
||||
assert_eq!(term.field(), count_field);
|
||||
assert_eq!(term.as_slice()[0], 2u8);
|
||||
assert_eq!(term.as_slice().len(), 5);
|
||||
assert_eq!(term.as_slice()[1], 0u8);
|
||||
assert_eq!(term.as_slice()[2], 0u8);
|
||||
assert_eq!(term.as_slice()[3], (933u32 / 256u32) as u8);
|
||||
assert_eq!(term.as_slice()[4], (983u32 % 256u32) as u8);
|
||||
assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 2u8]);
|
||||
assert_eq!(term.as_slice().len(), 8);
|
||||
assert_eq!(term.as_slice()[4], 0u8);
|
||||
assert_eq!(term.as_slice()[5], 0u8);
|
||||
assert_eq!(term.as_slice()[6], (933u32 / 256u32) as u8);
|
||||
assert_eq!(term.as_slice()[7], (983u32 % 256u32) as u8);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user