mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
Merge branch 'master' of github.com:tantivy-search/tantivy
This commit is contained in:
@@ -4,6 +4,8 @@ use schema::Field;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
/// The fieldnorms serializer is in charge of
|
||||
/// the serialization of field norms for all fields.
|
||||
pub struct FieldNormsSerializer {
|
||||
composite_write: CompositeWrite,
|
||||
}
|
||||
@@ -16,6 +18,7 @@ impl FieldNormsSerializer {
|
||||
Ok(FieldNormsSerializer { composite_write })
|
||||
}
|
||||
|
||||
/// Serialize the given field
|
||||
pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> {
|
||||
let write = self.composite_write.for_field(field);
|
||||
write.write_all(fieldnorms_data)?;
|
||||
@@ -23,6 +26,7 @@ impl FieldNormsSerializer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clean up / flush / close
|
||||
pub fn close(self) -> io::Result<()> {
|
||||
self.composite_write.close()?;
|
||||
Ok(())
|
||||
|
||||
@@ -6,13 +6,20 @@ use schema::Field;
|
||||
use schema::Schema;
|
||||
use std::io;
|
||||
|
||||
/// The `FieldNormsWriter` is in charge of tracking the fieldnorm byte
|
||||
/// of each document for each field with field norms.
|
||||
///
|
||||
/// `FieldNormsWriter` stores a Vec<u8> for each tracked field, using a
|
||||
/// byte per document per field.
|
||||
pub struct FieldNormsWriter {
|
||||
fields: Vec<Field>,
|
||||
fieldnorms_buffer: Vec<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FieldNormsWriter {
|
||||
pub fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
|
||||
/// Returns the fields that should have field norms computed
|
||||
/// according to the given schema.
|
||||
pub(crate) fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
|
||||
schema
|
||||
.fields()
|
||||
.iter()
|
||||
@@ -22,6 +29,8 @@ impl FieldNormsWriter {
|
||||
.collect::<Vec<Field>>()
|
||||
}
|
||||
|
||||
/// Initialize with state for tracking the field norm fields
|
||||
/// specified in the schema.
|
||||
pub fn for_schema(schema: &Schema) -> FieldNormsWriter {
|
||||
let fields = FieldNormsWriter::fields_with_fieldnorm(schema);
|
||||
let max_field = fields
|
||||
@@ -36,12 +45,24 @@ impl FieldNormsWriter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure that all documents in 0..max_doc have a byte associated with them
|
||||
/// in each of the fieldnorm vectors.
|
||||
///
|
||||
/// Will extend with 0-bytes for documents that have not been seen.
|
||||
pub fn fill_up_to_max_doc(&mut self, max_doc: DocId) {
|
||||
for &field in self.fields.iter() {
|
||||
self.fieldnorms_buffer[field.0 as usize].resize(max_doc as usize, 0u8);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the fieldnorm byte for the given document for the given field.
|
||||
///
|
||||
/// Will internally convert the u32 `fieldnorm` value to the appropriate byte
|
||||
/// to approximate the field norm in less space.
|
||||
///
|
||||
/// * doc - the document id
|
||||
/// * field - the field being set
|
||||
/// * fieldnorm - the number of terms present in document `doc` in field `field`
|
||||
pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) {
|
||||
let fieldnorm_buffer: &mut Vec<u8> = &mut self.fieldnorms_buffer[field.0 as usize];
|
||||
assert!(
|
||||
@@ -53,6 +74,7 @@ impl FieldNormsWriter {
|
||||
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
|
||||
}
|
||||
|
||||
/// Serialize the seen fieldnorm values to the serializer for all fields.
|
||||
pub fn serialize(&self, fieldnorms_serializer: &mut FieldNormsSerializer) -> io::Result<()> {
|
||||
for &field in self.fields.iter() {
|
||||
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.0 as usize][..];
|
||||
|
||||
@@ -194,7 +194,7 @@ pub mod tokenizer;
|
||||
pub mod collector;
|
||||
pub mod directory;
|
||||
pub mod fastfield;
|
||||
pub(crate) mod fieldnorm;
|
||||
pub mod fieldnorm;
|
||||
pub mod postings;
|
||||
pub mod query;
|
||||
pub mod schema;
|
||||
|
||||
Reference in New Issue
Block a user