Merge branch 'master' of github.com:tantivy-search/tantivy

This commit is contained in:
Paul Masurel
2018-05-08 00:08:17 -07:00
3 changed files with 28 additions and 2 deletions

View File

@@ -4,6 +4,8 @@ use schema::Field;
use std::io;
use std::io::Write;
/// The fieldnorms serializer is in charge of
/// the serialization of field norms for all fields.
pub struct FieldNormsSerializer {
composite_write: CompositeWrite,
}
@@ -16,6 +18,7 @@ impl FieldNormsSerializer {
Ok(FieldNormsSerializer { composite_write })
}
/// Serialize the given field
pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> {
let write = self.composite_write.for_field(field);
write.write_all(fieldnorms_data)?;
@@ -23,6 +26,7 @@ impl FieldNormsSerializer {
Ok(())
}
/// Clean up / flush / close
pub fn close(self) -> io::Result<()> {
self.composite_write.close()?;
Ok(())

View File

@@ -6,13 +6,20 @@ use schema::Field;
use schema::Schema;
use std::io;
/// The `FieldNormsWriter` is in charge of tracking the fieldnorm byte
/// of each document for each field with field norms.
///
/// `FieldNormsWriter` stores a Vec<u8> for each tracked field, using a
/// byte per document per field.
pub struct FieldNormsWriter {
fields: Vec<Field>,
fieldnorms_buffer: Vec<Vec<u8>>,
}
impl FieldNormsWriter {
pub fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
/// Returns the fields that should have field norms computed
/// according to the given schema.
pub(crate) fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
schema
.fields()
.iter()
@@ -22,6 +29,8 @@ impl FieldNormsWriter {
.collect::<Vec<Field>>()
}
/// Initialize with state for tracking the field norm fields
/// specified in the schema.
pub fn for_schema(schema: &Schema) -> FieldNormsWriter {
let fields = FieldNormsWriter::fields_with_fieldnorm(schema);
let max_field = fields
@@ -36,12 +45,24 @@ impl FieldNormsWriter {
}
}
/// Ensure that all documents in 0..max_doc have a byte associated with them
/// in each of the fieldnorm vectors.
///
/// Will extend with 0-bytes for documents that have not been seen.
pub fn fill_up_to_max_doc(&mut self, max_doc: DocId) {
for &field in self.fields.iter() {
self.fieldnorms_buffer[field.0 as usize].resize(max_doc as usize, 0u8);
}
}
/// Set the fieldnorm byte for the given document for the given field.
///
/// Will internally convert the u32 `fieldnorm` value to the appropriate byte
/// to approximate the field norm in less space.
///
/// * doc - the document id
/// * field - the field being set
/// * fieldnorm - the number of terms present in document `doc` in field `field`
pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) {
let fieldnorm_buffer: &mut Vec<u8> = &mut self.fieldnorms_buffer[field.0 as usize];
assert!(
@@ -53,6 +74,7 @@ impl FieldNormsWriter {
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
}
/// Serialize the seen fieldnorm values to the serializer for all fields.
pub fn serialize(&self, fieldnorms_serializer: &mut FieldNormsSerializer) -> io::Result<()> {
for &field in self.fields.iter() {
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.0 as usize][..];

View File

@@ -194,7 +194,7 @@ pub mod tokenizer;
pub mod collector;
pub mod directory;
pub mod fastfield;
pub(crate) mod fieldnorm;
pub mod fieldnorm;
pub mod postings;
pub mod query;
pub mod schema;