mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-28 14:10:42 +00:00
more minor doc text changes
This commit is contained in:
@@ -20,16 +20,16 @@ pub use self::chained_collector::chain;
|
||||
///
|
||||
///
|
||||
/// For instance,
|
||||
/// - keeping track of the top 10 best documents
|
||||
/// - computing a break down over a fast field
|
||||
/// - computing the number of documents matching the query
|
||||
///
|
||||
/// - keeping track of the top 10 best documents
|
||||
/// - computing a breakdown over a fast field
|
||||
/// - computing the number of documents matching the query
|
||||
///
|
||||
/// Queries are in charge of pushing the `DocSet` to the collector.
|
||||
///
|
||||
/// As they work on multiple segment, they first inform
|
||||
/// the collector of a change in segment and then
|
||||
/// call the collect method to push document to the collector.
|
||||
/// As they work on multiple segments, they first inform
|
||||
/// the collector of a change in a segment and then
|
||||
/// call the collect method to push the document to the collector.
|
||||
///
|
||||
/// Temporally, our collector will receive calls
|
||||
/// - `.set_segment(0, segment_reader_0)`
|
||||
@@ -45,10 +45,10 @@ pub use self::chained_collector::chain;
|
||||
///
|
||||
/// Segments are not guaranteed to be visited in any specific order.
|
||||
pub trait Collector {
|
||||
/// `set_segment` is called before starting enumerating
|
||||
/// `set_segment` is called before beginning to enumerate
|
||||
/// on this segment.
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>;
|
||||
/// The query pushes scored document to the collector via this method.
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
fn collect(&mut self, scored_doc: ScoredDoc);
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ impl<'a, C: Collector> Collector for &'a mut C {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
|
||||
(*self).set_segment(segment_local_id, segment)
|
||||
}
|
||||
/// The query pushes scored document to the collector via this method.
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
(*self).collect(scored_doc);
|
||||
}
|
||||
@@ -120,10 +120,10 @@ pub mod tests {
|
||||
|
||||
|
||||
|
||||
/// Collects in order all of the fast field for all of the
|
||||
/// doc of the `DocSet`
|
||||
/// Collects in order all of the fast fields for all of the
|
||||
/// doc in the `DocSet`
|
||||
///
|
||||
/// This collector is essentially useful for tests.
|
||||
/// This collector is mainly useful for tests.
|
||||
pub struct FastFieldTestCollector {
|
||||
vals: Vec<u32>,
|
||||
field: Field,
|
||||
|
||||
@@ -5,7 +5,7 @@ use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
|
||||
|
||||
/// Multicollector makes it possible to collect on more than one collector
|
||||
/// Multicollector makes it possible to collect on more than one collector.
|
||||
/// It should only be used for use cases where the Collector types is unknown
|
||||
/// at compile time.
|
||||
/// If the type of the collectors is known, you should prefer to use `ChainedCollector`.
|
||||
@@ -60,4 +60,4 @@ mod tests {
|
||||
assert_eq!(count_collector.count(), 3);
|
||||
assert!(top_collector.at_capacity());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ pub struct TopCollector {
|
||||
|
||||
impl TopCollector {
|
||||
|
||||
/// Creates a top collector, with a number of document of "limit"
|
||||
/// Creates a top collector, with a number of documents equal to "limit".
|
||||
///
|
||||
/// # Panics
|
||||
/// The method panics if limit is 0
|
||||
@@ -65,9 +65,9 @@ impl TopCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the decreasingly sorted K-best documents.
|
||||
/// Returns K best documents sorted in decreasing order.
|
||||
///
|
||||
/// Calling this method will triggers the sort.
|
||||
/// Calling this method triggers the sort.
|
||||
/// The result of the sort is not cached.
|
||||
pub fn docs(&self) -> Vec<DocAddress> {
|
||||
self.score_docs()
|
||||
@@ -76,9 +76,9 @@ impl TopCollector {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the decreasingly sorted K-best ScoredDocument.
|
||||
/// Returns K best ScoredDocument sorted in decreasing order.
|
||||
///
|
||||
/// Calling this method will triggers the sort.
|
||||
/// Calling this method triggers the sort.
|
||||
/// The result of the sort is not cached.
|
||||
pub fn score_docs(&self) -> Vec<(Score, DocAddress)> {
|
||||
let mut scored_docs: Vec<GlobalScoredDoc> = self.heap
|
||||
@@ -90,9 +90,9 @@ impl TopCollector {
|
||||
.map(|GlobalScoredDoc(score, doc_address)| (score, doc_address))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Return true iff at least K document have gone through
|
||||
/// the collector.
|
||||
|
||||
/// Return true iff at least K documents have gone through
|
||||
/// the collector.
|
||||
#[inline]
|
||||
pub fn at_capacity(&self, ) -> bool {
|
||||
self.heap.len() >= self.limit
|
||||
@@ -176,8 +176,8 @@ mod tests {
|
||||
.collect();
|
||||
assert_eq!(docs, vec!(7, 1, 5, 3));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -185,4 +185,4 @@ mod tests {
|
||||
fn test_top_0() {
|
||||
TopCollector::with_limit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::marker::Sync;
|
||||
|
||||
/// Write-once read many (WORM) abstraction for where tantivy's index should be stored.
|
||||
///
|
||||
/// There is currently two implementations of `Directory`
|
||||
/// There are currently two implementations of `Directory`
|
||||
///
|
||||
/// - The [`MMapDirectory`](struct.MmapDirectory.html), this
|
||||
/// should be your default choice.
|
||||
@@ -20,19 +20,19 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
|
||||
|
||||
/// Opens a virtual file for read.
|
||||
///
|
||||
/// Once a virtualfile is open, its data may not
|
||||
/// Once a virtual file is open, its data may not
|
||||
/// change.
|
||||
///
|
||||
/// Specifically, subsequent write or flush should
|
||||
/// have no effect the returned `ReadOnlySource` object.
|
||||
/// Specifically, subsequent writes or flushes should
|
||||
/// have no effect on the returned `ReadOnlySource` object.
|
||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, FileError>;
|
||||
|
||||
/// Removes a file
|
||||
///
|
||||
/// Removing a file will not affect eventual
|
||||
/// Removing a file will not affect an eventual
|
||||
/// existing ReadOnlySource pointing to it.
|
||||
///
|
||||
/// Removing a non existing files, yields a
|
||||
/// Removing a nonexistent file, yields a
|
||||
/// `FileError::DoesNotExist`.
|
||||
fn delete(&self, path: &Path) -> result::Result<(), FileError>;
|
||||
|
||||
@@ -44,28 +44,28 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
|
||||
/// same path should return a `ReadOnlySource`.
|
||||
///
|
||||
/// Write operations may be aggressively buffered.
|
||||
/// The client of this trait is in charge to call flush
|
||||
/// The client of this trait is responsible for calling flush
|
||||
/// to ensure that subsequent `read` operations
|
||||
/// will take in account preceding `write` operations.
|
||||
/// will take into account preceding `write` operations.
|
||||
///
|
||||
/// Flush operation should also be persistent.
|
||||
///
|
||||
/// User shall not rely on `Drop` triggering `flush`.
|
||||
/// The user shall not rely on `Drop` triggering `flush`.
|
||||
/// Note that `RAMDirectory` will panic! if `flush`
|
||||
/// was not called.
|
||||
///
|
||||
/// The file may not previously exists.
|
||||
/// The file may not previously exist.
|
||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError>;
|
||||
|
||||
/// Atomically replace the content of a file by data.
|
||||
/// Atomically replace the content of a file with data.
|
||||
///
|
||||
/// This calls ensure that reads can never *observe*
|
||||
/// a partially written file.
|
||||
///
|
||||
/// The file may or may not previously exists.
|
||||
/// The file may or may not previously exist.
|
||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()>;
|
||||
|
||||
/// Clone the directory and boxes the clone
|
||||
/// Clones the directory and boxes the clone
|
||||
fn box_clone(&self) -> Box<Directory>;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ impl MmapDirectory {
|
||||
/// Creates a new MmapDirectory in a temporary directory.
|
||||
///
|
||||
/// This is mostly useful to test the MmapDirectory itself.
|
||||
/// For your unit test, prefer the RAMDirectory.
|
||||
/// For your unit tests, prefer the RAMDirectory.
|
||||
pub fn create_from_tempdir() -> io::Result<MmapDirectory> {
|
||||
let tempdir = try!(TempDir::new("index"));
|
||||
let tempdir_path = PathBuf::from(tempdir.path());
|
||||
@@ -81,7 +81,7 @@ impl MmapDirectory {
|
||||
}
|
||||
|
||||
/// Joins a relative_path to the directory `root_path`
|
||||
/// to create proper complete `filepath`.
|
||||
/// to create a proper complete `filepath`.
|
||||
fn resolve_path(&self, relative_path: &Path) -> PathBuf {
|
||||
self.root_path.join(relative_path)
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use directory::error::{OpenWriteError, FileError};
|
||||
use directory::WritePtr;
|
||||
use super::shared_vec_slice::SharedVecSlice;
|
||||
|
||||
/// Writer associated to the `RAMDirectory`
|
||||
/// Writer associated with the `RAMDirectory`
|
||||
///
|
||||
/// The Writer just writes a buffer.
|
||||
///
|
||||
@@ -133,9 +133,9 @@ impl fmt::Debug for RAMDirectory {
|
||||
}
|
||||
|
||||
|
||||
/// Directory storing everything in anonymous memory.
|
||||
/// A Directory storing everything in anonymous memory.
|
||||
///
|
||||
/// It's main purpose is unit test.
|
||||
/// It is mainly meant for unit testing.
|
||||
/// Writes are only made visible upon flushing.
|
||||
///
|
||||
#[derive(Clone)]
|
||||
@@ -161,7 +161,7 @@ impl Directory for RAMDirectory {
|
||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
|
||||
let path_buf = PathBuf::from(path);
|
||||
let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());
|
||||
// force the creation of the file to mimick the MMap directory.
|
||||
// force the creation of the file to mimic the MMap directory.
|
||||
if try!(self.fs.write(path_buf.clone(), &Vec::new())) {
|
||||
Err(OpenWriteError::FileAlreadyExists(path_buf))
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ pub trait DocSet {
|
||||
/// element.
|
||||
fn advance(&mut self,) -> bool;
|
||||
|
||||
/// After skipping, position the iterator in such a way `.doc()`
|
||||
/// will return a value greater or equal to target.
|
||||
/// After skipping, position the iterator in such a way that `.doc()`
|
||||
/// will return a value greater than or equal to target.
|
||||
///
|
||||
/// SkipResult expresses whether the `target value` was reached, overstepped,
|
||||
/// or if the `DocSet` was entirely consumed without finding any value
|
||||
|
||||
@@ -12,8 +12,8 @@ use common::HasLen;
|
||||
/// as well as the list of term positions.
|
||||
///
|
||||
/// Its main implementation is `SegmentPostings`,
|
||||
/// but other implementations mocking SegmentPostings exists,
|
||||
/// in order to help merging segments or for testing.
|
||||
/// but other implementations mocking SegmentPostings exist,
|
||||
/// in order to help when merging segments or for testing.
|
||||
pub trait Postings: DocSet {
|
||||
/// Returns the term frequency
|
||||
fn term_freq(&self,) -> u32;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# Schema definition
|
||||
|
||||
Tantivy has a very strict schema.
|
||||
The schema defines information about the fields your index contains, that is for each field :
|
||||
The schema defines information about the fields your index contains, that is, for each field :
|
||||
|
||||
* the field name (may only contain letters `[a-zA-Z]`, number `[0-9]`, and `_`)
|
||||
* the type of the field (currently only `text` and `u32` are supported)
|
||||
@@ -37,20 +37,20 @@ let schema = schema_builder.build();
|
||||
|
||||
We can split the problem of generating a search result page into two phases :
|
||||
|
||||
* identifying the list of 10 or so document to be displayed (Conceptually `query -> doc_ids[]`)
|
||||
* identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`)
|
||||
* for each of these documents, retrieving the information required to generate the serp page. (`doc_ids[] -> Document[]`)
|
||||
|
||||
In the first phase, the hability to search for documents by the given field, is determined by the [`TextIndexingOptions`](enum.TextIndexingOptions.html) of our
|
||||
In the first phase, the ability to search for documents by the given field is determined by the [`TextIndexingOptions`](enum.TextIndexingOptions.html) of our
|
||||
[`TextOptions`](struct.TextOptions.html).
|
||||
|
||||
The effect of each possible settings is described more in detail [`TextIndexingOptions`](enum.TextIndexingOptions.html).
|
||||
The effect of each possible setting is described more in detail [`TextIndexingOptions`](enum.TextIndexingOptions.html).
|
||||
|
||||
On the other hand setting the field as stored or not determines whether the field should be returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc)
|
||||
is called.
|
||||
|
||||
### Shortcuts
|
||||
|
||||
For convenience, a few special value of `TextOptions` for your convenience.
|
||||
For convenience, a few special values of `TextOptions`.
|
||||
They can be composed using the `|` operator.
|
||||
The example can be rewritten :
|
||||
|
||||
@@ -82,7 +82,7 @@ Just like for Text fields (see above),
|
||||
setting the field as stored defines whether the field will be
|
||||
returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called,
|
||||
and setting the field as indexed means that we will be able perform queries such as `num_stars:10`.
|
||||
Note that contrary to text fields, u32 can only be indexed in one way for the moment.
|
||||
Note that unlike text fields, u32 can only be indexed in one way for the moment.
|
||||
This may change when we will start supporting range queries.
|
||||
|
||||
The `fast` option on the other hand is specific to u32 fields, and is only relevant
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::fmt;
|
||||
|
||||
|
||||
/// Tantivy has a very strict schema.
|
||||
/// You need to specify in advance, whether a field is indexed or not,
|
||||
/// You need to specify in advance whether a field is indexed or not,
|
||||
/// stored or not, and RAM-based or not.
|
||||
///
|
||||
/// This is done by creating a schema object, and
|
||||
@@ -483,4 +483,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user