diff --git a/src/fieldnorm/serializer.rs b/src/fieldnorm/serializer.rs index 5308d9f6a..af6bcd3dc 100644 --- a/src/fieldnorm/serializer.rs +++ b/src/fieldnorm/serializer.rs @@ -4,6 +4,8 @@ use schema::Field; use std::io; use std::io::Write; +/// The fieldnorms serializer is in charge of +/// the serialization of field norms for all fields. pub struct FieldNormsSerializer { composite_write: CompositeWrite, } @@ -16,6 +18,7 @@ impl FieldNormsSerializer { Ok(FieldNormsSerializer { composite_write }) } + /// Serialize the given field pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> { let write = self.composite_write.for_field(field); write.write_all(fieldnorms_data)?; @@ -23,6 +26,7 @@ impl FieldNormsSerializer { Ok(()) } + /// Clean up / flush / close pub fn close(self) -> io::Result<()> { self.composite_write.close()?; Ok(()) diff --git a/src/fieldnorm/writer.rs b/src/fieldnorm/writer.rs index c36ea99fa..5302f2856 100644 --- a/src/fieldnorm/writer.rs +++ b/src/fieldnorm/writer.rs @@ -6,13 +6,20 @@ use schema::Field; use schema::Schema; use std::io; +/// The `FieldNormsWriter` is in charge of tracking the fieldnorm byte +/// of each document for each field with field norms. +/// +/// `FieldNormsWriter` stores a Vec for each tracked field, using a +/// byte per document per field. pub struct FieldNormsWriter { fields: Vec, fieldnorms_buffer: Vec>, } impl FieldNormsWriter { - pub fn fields_with_fieldnorm(schema: &Schema) -> Vec { + /// Returns the fields that should have field norms computed + /// according to the given schema. + pub(crate) fn fields_with_fieldnorm(schema: &Schema) -> Vec { schema .fields() .iter() @@ -22,6 +29,8 @@ impl FieldNormsWriter { .collect::>() } + /// Initialize with state for tracking the field norm fields + /// specified in the schema. pub fn for_schema(schema: &Schema) -> FieldNormsWriter { let fields = FieldNormsWriter::fields_with_fieldnorm(schema); let max_field = fields @@ -36,12 +45,24 @@ impl FieldNormsWriter { } } + /// Ensure that all documents in 0..max_doc have a byte associated with them + /// in each of the fieldnorm vectors. + /// + /// Will extend with 0-bytes for documents that have not been seen. pub fn fill_up_to_max_doc(&mut self, max_doc: DocId) { for &field in self.fields.iter() { self.fieldnorms_buffer[field.0 as usize].resize(max_doc as usize, 0u8); } } + /// Set the fieldnorm byte for the given document for the given field. + /// + /// Will internally convert the u32 `fieldnorm` value to the appropriate byte + /// to approximate the field norm in less space. + /// + /// * doc - the document id + /// * field - the field being set + /// * fieldnorm - the number of terms present in document `doc` in field `field` pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) { let fieldnorm_buffer: &mut Vec = &mut self.fieldnorms_buffer[field.0 as usize]; assert!( @@ -53,6 +74,7 @@ impl FieldNormsWriter { fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm); } + /// Serialize the seen fieldnorm values to the serializer for all fields. pub fn serialize(&self, fieldnorms_serializer: &mut FieldNormsSerializer) -> io::Result<()> { for &field in self.fields.iter() { let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.0 as usize][..]; diff --git a/src/lib.rs b/src/lib.rs index f1e59f2c9..29e3eb89b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -194,7 +194,7 @@ pub mod tokenizer; pub mod collector; pub mod directory; pub mod fastfield; -pub(crate) mod fieldnorm; +pub mod fieldnorm; pub mod postings; pub mod query; pub mod schema;