From cae34ffe47253b6325f04da561c0a4f94cbcab94 Mon Sep 17 00:00:00 2001 From: Saroh <325288+saroh@users.noreply.github.com> Date: Wed, 2 Mar 2022 15:40:47 +0100 Subject: [PATCH 1/4] update fastfield doc --- fastfield_codecs/src/lib.rs | 1 + src/fastfield/alive_bitset.rs | 8 ++++---- src/fastfield/bytes/writer.rs | 2 +- src/fastfield/mod.rs | 6 +++--- src/fastfield/multivalued/writer.rs | 9 +++++---- src/fastfield/serializer/mod.rs | 2 +- src/fastfield/writer.rs | 2 +- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 1204dd6b0..78b5322b4 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -63,6 +63,7 @@ pub trait FastFieldDataAccess { } #[derive(Debug, Clone)] +/// Contains statistics used for compression in field writing. pub struct FastFieldStats { pub min_value: u64, pub max_value: u64, diff --git a/src/fastfield/alive_bitset.rs b/src/fastfield/alive_bitset.rs index bea0120fd..cc2a93643 100644 --- a/src/fastfield/alive_bitset.rs +++ b/src/fastfield/alive_bitset.rs @@ -7,7 +7,7 @@ use ownedbytes::OwnedBytes; use crate::space_usage::ByteCount; use crate::DocId; -/// Write a alive `BitSet` +/// Write an alive `BitSet` /// /// where `alive_bitset` is the set of alive `DocId`. /// Warning: this function does not call terminate. The caller is in charge of @@ -55,7 +55,7 @@ impl AliveBitSet { AliveBitSet::from(readonly_bitset) } - /// Opens a delete bitset given its file. + /// Opens an alive bitset given its file. pub fn open(bytes: OwnedBytes) -> AliveBitSet { let bitset = ReadOnlyBitSet::open(bytes); AliveBitSet::from(bitset) @@ -79,13 +79,13 @@ impl AliveBitSet { self.bitset.iter() } - /// Get underlying bitset + /// Get underlying bitset. #[inline] pub fn bitset(&self) -> &ReadOnlyBitSet { &self.bitset } - /// The number of deleted docs + /// The number of alive documents. pub fn num_alive_docs(&self) -> usize { self.num_alive_docs } diff --git a/src/fastfield/bytes/writer.rs b/src/fastfield/bytes/writer.rs index 91f74adf9..2a98c1c08 100644 --- a/src/fastfield/bytes/writer.rs +++ b/src/fastfield/bytes/writer.rs @@ -7,7 +7,7 @@ use crate::DocId; /// Writer for byte array (as in, any number of bytes per document) fast fields /// -/// This `BytesFastFieldWriter` is only useful for advanced user. +/// This `BytesFastFieldWriter` is only useful for advanced users. /// The normal way to get your associated bytes in your index /// is to /// - declare your field with fast set to `Cardinality::SingleValue` diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index f864e8d4e..9c780c893 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -2,7 +2,7 @@ //! //! It is the equivalent of `Lucene`'s `DocValues`. //! -//! Fast fields is a column-oriented fashion storage of `tantivy`. +//! A fast field is a column-oriented fashion storage for `tantivy`. //! //! It is designed for the fast random access of some document //! fields given a document id. @@ -12,8 +12,8 @@ //! //! //! Fields have to be declared as `FAST` in the schema. -//! Currently only 64-bits integers (signed or unsigned) are -//! supported. +//! Currently supported fields are: u64, i64 and f64. +//! u8 are supported for advanced usage. //! //! They are stored in a bit-packed fashion so that their //! memory usage is directly linear with the amplitude of the diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index d94a0bf6f..a848ee65d 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -14,7 +14,7 @@ use crate::DocId; /// Writer for multi-valued (as in, more than one value per document) /// int fast field. /// -/// This `Writer` is only useful for advanced user. +/// This `Writer` is only useful for advanced users. /// The normal way to get your multivalued int in your index /// is to /// - declare your field with fast set to `Cardinality::MultiValues` @@ -23,10 +23,11 @@ use crate::DocId; /// /// The `MultiValuedFastFieldWriter` can be acquired from the /// fastfield writer, by calling -/// [`.get_multivalue_writer(...)`](./struct.FastFieldsWriter.html#method.get_multivalue_writer). +/// [`.get_multivalue_writer_mut(...)`](./struct.FastFieldsWriter.html#method. +/// get_multivalue_writer_mut). /// -/// Once acquired, writing is done by calling calls to -/// `.add_document_vals(&[u64])` once per document. +/// Once acquired, writing is done by calling +/// [`.add_document_vals(&[u64])`](MultiValuedFastFieldWriter::add_document_vals) once per document. /// /// The serializer makes it possible to remap all of the values /// that were pushed to the writer using a mapping. diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index b138689d9..cc5329730 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -197,7 +197,7 @@ impl CompositeFastFieldSerializer { /// Closes the serializer /// - /// After this call the data must be persistently save on disk. + /// After this call the data must be persistently saved on disk. pub fn close(self) -> io::Result<()> { self.composite_write.close() } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 34bc9099d..6d19ed5f8 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -14,7 +14,7 @@ use crate::postings::UnorderedTermId; use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema}; use crate::termdict::TermOrdinal; -/// The fastfieldswriter regroup all of the fast field writers. +/// The `FastFieldsWriter` regroups all of the fast field writers. pub struct FastFieldsWriter { single_value_writers: Vec, multi_values_writers: Vec, From a41d3d51a47210c64a914ed39bba5cf1ae6cc1b1 Mon Sep 17 00:00:00 2001 From: Antoine G <325288+saroh@users.noreply.github.com> Date: Thu, 3 Mar 2022 11:25:06 +0100 Subject: [PATCH 2/4] Update fastfield_codecs/src/lib.rs --- fastfield_codecs/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 78b5322b4..9285321ea 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -63,7 +63,7 @@ pub trait FastFieldDataAccess { } #[derive(Debug, Clone)] -/// Contains statistics used for compression in field writing. +/// Statistics are used in codec detection and stored in the fast field footer. pub struct FastFieldStats { pub min_value: u64, pub max_value: u64, From 8771b2673f841c6e03c74b1cf4fbc544a89ad799 Mon Sep 17 00:00:00 2001 From: Antoine G <325288+saroh@users.noreply.github.com> Date: Thu, 3 Mar 2022 11:25:24 +0100 Subject: [PATCH 3/4] Update src/fastfield/writer.rs Co-authored-by: PSeitz --- src/fastfield/writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 6d19ed5f8..a05cf1629 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -14,7 +14,7 @@ use crate::postings::UnorderedTermId; use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema}; use crate::termdict::TermOrdinal; -/// The `FastFieldsWriter` regroups all of the fast field writers. +/// The `FastFieldsWriter` groups all of the fast field writers. pub struct FastFieldsWriter { single_value_writers: Vec, multi_values_writers: Vec, From d36e0a954986c35565e545886dd500ae4d4d19cd Mon Sep 17 00:00:00 2001 From: saroh <325288+saroh@users.noreply.github.com> Date: Thu, 3 Mar 2022 17:43:18 +0100 Subject: [PATCH 4/4] fix fastfield doc --- src/fastfield/mod.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 9c780c893..be5285d39 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -12,11 +12,10 @@ //! //! //! Fields have to be declared as `FAST` in the schema. -//! Currently supported fields are: u64, i64 and f64. -//! u8 are supported for advanced usage. +//! Currently supported fields are: u64, i64, f64 and bytes. //! -//! They are stored in a bit-packed fashion so that their -//! memory usage is directly linear with the amplitude of the +//! u64, i64 and f64 fields are stored in a bit-packed fashion so that +//! their memory usage is directly linear with the amplitude of the //! values stored. //! //! Read access performance is comparable to that of an array lookup.