From d85cfab7a1d2208773c465267abb5a1725b17ae4 Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Thu, 29 Sep 2016 16:28:10 -0400 Subject: [PATCH 1/7] minor fixes to grammar and usage in example file --- examples/simple_search.rs | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 851968576..39efabe13 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -25,7 +25,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // # Defining the schema // - // Tantivy index require to have a very strict schema. + // The Tantivy index requires a very strict schema. // The schema declares which fields are in the index, // and for each field, its type and "the way it should // be indexed". @@ -47,7 +47,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // `STORED` means that the field will also be saved // in a compressed, row-oriented key-value store. // This store is useful to reconstruct the - // document that were selected during the search phase. + // documents that were selected during the search phase. schema_builder.add_text_field("title", TEXT | STORED); // Our first field is body. @@ -64,29 +64,29 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // Let's create a brand new index. // // This will actually just save a meta.json - // with our schema the directory. + // with our schema in the directory. let index = try!(Index::create(index_path, schema.clone())); // To insert document we need an index writer. - // There shall be only one writer at a time. - // Besides, this single `IndexWriter` is already + // There must be only one writer at a time. + // This single `IndexWriter` is already // multithreaded. // - // Here we used a buffer of 1 GB. Using a bigger + // Here we use a buffer of 1 GB. Using a bigger // heap for the indexer can increase its throughput. // This buffer will be split between the indexing // threads. let mut index_writer = try!(index.writer(1_000_000_000)); - // Let's now index our documents! + // Let's index our documents! // We first need a handle on the title and the body field. // ### Create a document "manually". // - // We can create a document manually, by setting adding the fields + // We can create a document manually, by setting the fields // one by one in a Document object. let title = schema.get_field("title").unwrap(); let body = schema.get_field("body").unwrap(); @@ -122,7 +122,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // This is an example, so we will only index 3 documents // here. You can check out tantivy's tutorial to index // the English wikipedia. Tantivy's indexing is rather fast. - // Indexing 5 millions articles of the English wikipedia takes + // Indexing 5 million articles of the English wikipedia takes // around 4 minutes on my computer! @@ -131,56 +131,56 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // At this point our documents are not searchable. // // - // We need to call .commit() explicitely to force the + // We need to call .commit() explicitly to force the // index_writer to finish processing the documents in the queue, - // flush the current index on the disk, and advertise + // flush the current index to the disk, and advertise // the existence of new documents. // // This call is blocking. try!(index_writer.commit()); // If `.commit()` returns correctly, then all of the - // documents have been added before are guaranteed to be + // documents that have been added are guaranteed to be // persistently indexed. // // In the scenario of a crash or a power failure, - // tantivy behaves as if it rollbacked to its last + // tantivy behaves as if has rolled back to its last // commit. // # Searching // - // Let's search our index. This starts + // Let's search our index. We start // by creating a searcher. There can be more // than one searcher at a time. // - // You are supposed to acquire a search + // You should create a searcher // every time you start a "search query". let searcher = index.searcher(); // The query parser can interpret human queries. // Here, if the user does not specify which - // field he wants to search, tantivy will search + // field they want to search, tantivy will search // in both title and body. let query_parser = QueryParser::new(index.schema(), vec!(title, body)); // QueryParser may fail if the query is not in the right // format. For user facing applications, this can be a problem. - // A ticket has been filled regarding this problem. + // A ticket has been opened regarding this problem. let query = try!(query_parser.parse_query("sea whale")); // A query defines a set of documents, as // well as the way they should be scored. // - // Query created by the query parser are scoring according + // A query created by the query parser is scored according // to a metric called Tf-Idf, and will consider // any document matching at least one of our terms. // ### Collectors // - // We are not interested in all of the document but - // only in the top 10. Keep track of our top 10 best documents + // We are not interested in all of the documents but + // only in the top 10. Keeping track of our top 10 best documents // is the role of the TopCollector. let mut top_collector = TopCollector::with_limit(10); @@ -188,14 +188,14 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { // We can now perform our query. try!(query.search(&searcher, &mut top_collector)); - // Our top collector now contains are 10 + // Our top collector now contains the 10 // most relevant doc ids... let doc_addresses = top_collector.docs(); // The actual documents still need to be // retrieved from Tantivy's store. // - // Since body was not configured as stored, + // Since the body field was not configured as stored, // the document returned will only contain // a title. @@ -205,4 +205,4 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { } Ok(()) -} \ No newline at end of file +} From 9c1d08c4891b3659adea7a76474fae17e5061d37 Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Thu, 29 Sep 2016 16:31:50 -0400 Subject: [PATCH 2/7] literally one character change to README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 395acd5a4..7a5f414b6 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ It will walk you through getting a wikipedia search engine up and running in a f Tantivy has a git submodule called `simdcomp`. After cloning the repository, you will need to initialize and update -the submodules. The project can then be build using `cargo`. +the submodules. The project can then be built using `cargo`. git clone git@github.com:fulmicoton/tantivy.git git submodule init From b9ef5909ad1a82a8b7b7d33d3fedaee0ccfd9ec9 Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Thu, 29 Sep 2016 16:47:02 -0400 Subject: [PATCH 3/7] small changes to doc comments --- src/collector/count_collector.rs | 4 ++-- src/lib.rs | 2 +- src/postings/docset.rs | 6 +++--- src/postings/postings.rs | 4 ++-- src/query/query_parser.rs | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 9eaf74165..dc1d85580 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -5,13 +5,13 @@ use SegmentReader; use SegmentLocalId; /// `CountCollector` collector only counts how many -/// document are matching the query. +/// documents match the query. pub struct CountCollector { count: usize, } impl CountCollector { - /// Returns the count of document that where + /// Returns the count of documents that were /// collected. pub fn count(&self,) -> usize { self.count diff --git a/src/lib.rs b/src/lib.rs index 0f335c425..15378e025 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,7 +96,7 @@ pub use postings::SegmentPostingsOption; /// u32 identifying a document within a segment. -/// Document gets their doc id assigned incrementally, +/// Documents have their doc id assigned incrementally, /// as they are added in the segment. pub type DocId = u32; diff --git a/src/postings/docset.rs b/src/postings/docset.rs index e7fb59315..e84c56380 100644 --- a/src/postings/docset.rs +++ b/src/postings/docset.rs @@ -4,7 +4,7 @@ use std::borrow::BorrowMut; use std::cmp::Ordering; -/// Expressed the outcome of a call to `DocSet`'s `.skip_next(...)`. +/// Expresses the outcome of a call to `DocSet`'s `.skip_next(...)`. #[derive(PartialEq, Eq, Debug)] pub enum SkipResult { /// target was in the docset @@ -24,7 +24,7 @@ pub trait DocSet { /// element. fn advance(&mut self,) -> bool; - /// After skipping position, the iterator in such a way `.doc()` + /// After skipping, position the iterator in such a way `.doc()` /// will return a value greater or equal to target. /// /// SkipResult expresses whether the `target value` was reached, overstepped, @@ -97,4 +97,4 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet { } } - \ No newline at end of file + diff --git a/src/postings/postings.rs b/src/postings/postings.rs index e3323ae19..bffbeb876 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -12,8 +12,8 @@ use common::HasLen; /// as well as the list of term positions. /// /// Its main implementation is `SegmentPostings`, -/// but some other implementation mocking SegmentPostings exists, -/// in order to help merging segment or for testing. +/// but other implementations mocking SegmentPostings exists, +/// in order to help merging segments or for testing. pub trait Postings: DocSet { /// Returns the term frequency fn term_freq(&self,) -> u32; diff --git a/src/query/query_parser.rs b/src/query/query_parser.rs index f15196a06..89876825d 100644 --- a/src/query/query_parser.rs +++ b/src/query/query_parser.rs @@ -29,7 +29,7 @@ pub enum ParsingError { /// Tantivy's Query parser /// -/// The language covered by the current is extremely simple. +/// The language covered by the current parser is extremely simple. /// /// * simple terms: "e.g.: `Barack Obama` are simply analyzed using /// tantivy's `StandardTokenizer`, hence becoming `["barack", "obama"]`. @@ -44,7 +44,7 @@ pub enum ParsingError { /// /// This behavior is slower, but is not a bad idea if the user is sorting /// by relevance : The user typically just scans through the first few -/// documents in order of decreasing relevance and will stop when the document +/// documents in order of decreasing relevance and will stop when the documents /// are not relevant anymore. /// Making it possible to make this behavior customizable is tracked in /// [issue #27](https://github.com/fulmicoton/tantivy/issues/27). @@ -135,9 +135,9 @@ impl QueryParser { /// Parse a query /// /// Note that `parse_query` returns an error if the input - /// not a valid query. + /// is not a valid query. /// - /// There is currently no lenient mode for the query parse + /// There is currently no lenient mode for the query parser /// which makes it a bad choice for a public/broad user search engine. /// /// Implementing a lenient mode for this query parser is tracked From 54437105445e9cfe0d6021612b00a4267be0cce0 Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Thu, 29 Sep 2016 21:51:12 -0400 Subject: [PATCH 4/7] more minor doc text changes --- src/collector/mod.rs | 24 ++++++++++++------------ src/collector/multi_collector.rs | 4 ++-- src/collector/top_collector.rs | 22 +++++++++++----------- src/directory/directory.rs | 26 +++++++++++++------------- src/directory/mmap_directory.rs | 4 ++-- src/directory/ram_directory.rs | 8 ++++---- src/postings/docset.rs | 4 ++-- src/postings/postings.rs | 4 ++-- src/schema/mod.rs | 12 ++++++------ src/schema/schema.rs | 4 ++-- 10 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/collector/mod.rs b/src/collector/mod.rs index de4092738..81bdc330f 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -20,16 +20,16 @@ pub use self::chained_collector::chain; /// /// /// For instance, -/// - keeping track of the top 10 best documents -/// - computing a break down over a fast field -/// - computing the number of documents matching the query /// +/// - keeping track of the top 10 best documents +/// - computing a breakdown over a fast field +/// - computing the number of documents matching the query /// /// Queries are in charge of pushing the `DocSet` to the collector. /// -/// As they work on multiple segment, they first inform -/// the collector of a change in segment and then -/// call the collect method to push document to the collector. +/// As they work on multiple segments, they first inform +/// the collector of a change in a segment and then +/// call the collect method to push the document to the collector. /// /// Temporally, our collector will receive calls /// - `.set_segment(0, segment_reader_0)` @@ -45,10 +45,10 @@ pub use self::chained_collector::chain; /// /// Segments are not guaranteed to be visited in any specific order. pub trait Collector { - /// `set_segment` is called before starting enumerating + /// `set_segment` is called before beginning to enumerate /// on this segment. fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>; - /// The query pushes scored document to the collector via this method. + /// The query pushes the scored document to the collector via this method. fn collect(&mut self, scored_doc: ScoredDoc); } @@ -57,7 +57,7 @@ impl<'a, C: Collector> Collector for &'a mut C { fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> { (*self).set_segment(segment_local_id, segment) } - /// The query pushes scored document to the collector via this method. + /// The query pushes the scored document to the collector via this method. fn collect(&mut self, scored_doc: ScoredDoc) { (*self).collect(scored_doc); } @@ -120,10 +120,10 @@ pub mod tests { - /// Collects in order all of the fast field for all of the - /// doc of the `DocSet` + /// Collects in order all of the fast fields for all of the + /// doc in the `DocSet` /// - /// This collector is essentially useful for tests. + /// This collector is mainly useful for tests. pub struct FastFieldTestCollector { vals: Vec, field: Field, diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 092547c78..92958018d 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -5,7 +5,7 @@ use SegmentReader; use SegmentLocalId; -/// Multicollector makes it possible to collect on more than one collector +/// Multicollector makes it possible to collect on more than one collector. /// It should only be used for use cases where the Collector types is unknown /// at compile time. /// If the type of the collectors is known, you should prefer to use `ChainedCollector`. @@ -60,4 +60,4 @@ mod tests { assert_eq!(count_collector.count(), 3); assert!(top_collector.at_capacity()); } -} \ No newline at end of file +} diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index c04c82774..98641594d 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -50,7 +50,7 @@ pub struct TopCollector { impl TopCollector { - /// Creates a top collector, with a number of document of "limit" + /// Creates a top collector, with a number of documents equal to "limit". /// /// # Panics /// The method panics if limit is 0 @@ -65,9 +65,9 @@ impl TopCollector { } } - /// Returns the decreasingly sorted K-best documents. + /// Returns K best documents sorted in decreasing order. /// - /// Calling this method will triggers the sort. + /// Calling this method triggers the sort. /// The result of the sort is not cached. pub fn docs(&self) -> Vec { self.score_docs() @@ -76,9 +76,9 @@ impl TopCollector { .collect() } - /// Returns the decreasingly sorted K-best ScoredDocument. + /// Returns K best ScoredDocument sorted in decreasing order. /// - /// Calling this method will triggers the sort. + /// Calling this method triggers the sort. /// The result of the sort is not cached. pub fn score_docs(&self) -> Vec<(Score, DocAddress)> { let mut scored_docs: Vec = self.heap @@ -90,9 +90,9 @@ impl TopCollector { .map(|GlobalScoredDoc(score, doc_address)| (score, doc_address)) .collect() } - - /// Return true iff at least K document have gone through - /// the collector. + + /// Return true iff at least K documents have gone through + /// the collector. #[inline] pub fn at_capacity(&self, ) -> bool { self.heap.len() >= self.limit @@ -176,8 +176,8 @@ mod tests { .collect(); assert_eq!(docs, vec!(7, 1, 5, 3)); } - - + + } #[test] @@ -185,4 +185,4 @@ mod tests { fn test_top_0() { TopCollector::with_limit(0); } -} \ No newline at end of file +} diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 08f602251..2a77825c2 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -9,7 +9,7 @@ use std::marker::Sync; /// Write-once read many (WORM) abstraction for where tantivy's index should be stored. /// -/// There is currently two implementations of `Directory` +/// There are currently two implementations of `Directory` /// /// - The [`MMapDirectory`](struct.MmapDirectory.html), this /// should be your default choice. @@ -20,19 +20,19 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static { /// Opens a virtual file for read. /// - /// Once a virtualfile is open, its data may not + /// Once a virtual file is open, its data may not /// change. /// - /// Specifically, subsequent write or flush should - /// have no effect the returned `ReadOnlySource` object. + /// Specifically, subsequent writes or flushes should + /// have no effect on the returned `ReadOnlySource` object. fn open_read(&self, path: &Path) -> result::Result; /// Removes a file /// - /// Removing a file will not affect eventual + /// Removing a file will not affect an eventual /// existing ReadOnlySource pointing to it. /// - /// Removing a non existing files, yields a + /// Removing a nonexistent file, yields a /// `FileError::DoesNotExist`. fn delete(&self, path: &Path) -> result::Result<(), FileError>; @@ -44,28 +44,28 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static { /// same path should return a `ReadOnlySource`. /// /// Write operations may be aggressively buffered. - /// The client of this trait is in charge to call flush + /// The client of this trait is responsible for calling flush /// to ensure that subsequent `read` operations - /// will take in account preceding `write` operations. + /// will take into account preceding `write` operations. /// /// Flush operation should also be persistent. /// - /// User shall not rely on `Drop` triggering `flush`. + /// The user shall not rely on `Drop` triggering `flush`. /// Note that `RAMDirectory` will panic! if `flush` /// was not called. /// - /// The file may not previously exists. + /// The file may not previously exist. fn open_write(&mut self, path: &Path) -> Result; - /// Atomically replace the content of a file by data. + /// Atomically replace the content of a file with data. /// /// This calls ensure that reads can never *observe* /// a partially written file. /// - /// The file may or may not previously exists. + /// The file may or may not previously exist. fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()>; - /// Clone the directory and boxes the clone + /// Clones the directory and boxes the clone fn box_clone(&self) -> Box; } diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index eefa760b1..4117494c0 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -47,7 +47,7 @@ impl MmapDirectory { /// Creates a new MmapDirectory in a temporary directory. /// /// This is mostly useful to test the MmapDirectory itself. - /// For your unit test, prefer the RAMDirectory. + /// For your unit tests, prefer the RAMDirectory. pub fn create_from_tempdir() -> io::Result { let tempdir = try!(TempDir::new("index")); let tempdir_path = PathBuf::from(tempdir.path()); @@ -81,7 +81,7 @@ impl MmapDirectory { } /// Joins a relative_path to the directory `root_path` - /// to create proper complete `filepath`. + /// to create a proper complete `filepath`. fn resolve_path(&self, relative_path: &Path) -> PathBuf { self.root_path.join(relative_path) } diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 8a002a088..d3b408985 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -11,7 +11,7 @@ use directory::error::{OpenWriteError, FileError}; use directory::WritePtr; use super::shared_vec_slice::SharedVecSlice; -/// Writer associated to the `RAMDirectory` +/// Writer associated with the `RAMDirectory` /// /// The Writer just writes a buffer. /// @@ -133,9 +133,9 @@ impl fmt::Debug for RAMDirectory { } -/// Directory storing everything in anonymous memory. +/// A Directory storing everything in anonymous memory. /// -/// It's main purpose is unit test. +/// It is mainly meant for unit testing. /// Writes are only made visible upon flushing. /// #[derive(Clone)] @@ -161,7 +161,7 @@ impl Directory for RAMDirectory { fn open_write(&mut self, path: &Path) -> Result { let path_buf = PathBuf::from(path); let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone()); - // force the creation of the file to mimick the MMap directory. + // force the creation of the file to mimic the MMap directory. if try!(self.fs.write(path_buf.clone(), &Vec::new())) { Err(OpenWriteError::FileAlreadyExists(path_buf)) } diff --git a/src/postings/docset.rs b/src/postings/docset.rs index e84c56380..db40db619 100644 --- a/src/postings/docset.rs +++ b/src/postings/docset.rs @@ -24,8 +24,8 @@ pub trait DocSet { /// element. fn advance(&mut self,) -> bool; - /// After skipping, position the iterator in such a way `.doc()` - /// will return a value greater or equal to target. + /// After skipping, position the iterator in such a way that `.doc()` + /// will return a value greater than or equal to target. /// /// SkipResult expresses whether the `target value` was reached, overstepped, /// or if the `DocSet` was entirely consumed without finding any value diff --git a/src/postings/postings.rs b/src/postings/postings.rs index bffbeb876..071068c95 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -12,8 +12,8 @@ use common::HasLen; /// as well as the list of term positions. /// /// Its main implementation is `SegmentPostings`, -/// but other implementations mocking SegmentPostings exists, -/// in order to help merging segments or for testing. +/// but other implementations mocking SegmentPostings exist, +/// in order to help when merging segments or for testing. pub trait Postings: DocSet { /// Returns the term frequency fn term_freq(&self,) -> u32; diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 2abde20ba..5be6bc512 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -4,7 +4,7 @@ # Schema definition Tantivy has a very strict schema. -The schema defines information about the fields your index contains, that is for each field : +The schema defines information about the fields your index contains, that is, for each field : * the field name (may only contain letters `[a-zA-Z]`, number `[0-9]`, and `_`) * the type of the field (currently only `text` and `u32` are supported) @@ -37,20 +37,20 @@ let schema = schema_builder.build(); We can split the problem of generating a search result page into two phases : -* identifying the list of 10 or so document to be displayed (Conceptually `query -> doc_ids[]`) +* identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`) * for each of these documents, retrieving the information required to generate the serp page. (`doc_ids[] -> Document[]`) -In the first phase, the hability to search for documents by the given field, is determined by the [`TextIndexingOptions`](enum.TextIndexingOptions.html) of our +In the first phase, the ability to search for documents by the given field is determined by the [`TextIndexingOptions`](enum.TextIndexingOptions.html) of our [`TextOptions`](struct.TextOptions.html). -The effect of each possible settings is described more in detail [`TextIndexingOptions`](enum.TextIndexingOptions.html). +The effect of each possible setting is described more in detail [`TextIndexingOptions`](enum.TextIndexingOptions.html). On the other hand setting the field as stored or not determines whether the field should be returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called. ### Shortcuts -For convenience, a few special value of `TextOptions` for your convenience. +For convenience, a few special values of `TextOptions`. They can be composed using the `|` operator. The example can be rewritten : @@ -82,7 +82,7 @@ Just like for Text fields (see above), setting the field as stored defines whether the field will be returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called, and setting the field as indexed means that we will be able perform queries such as `num_stars:10`. -Note that contrary to text fields, u32 can only be indexed in one way for the moment. +Note that unlike text fields, u32 can only be indexed in one way for the moment. This may change when we will start supporting range queries. The `fast` option on the other hand is specific to u32 fields, and is only relevant diff --git a/src/schema/schema.rs b/src/schema/schema.rs index f41d5572f..8d6e3bedd 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -15,7 +15,7 @@ use std::fmt; /// Tantivy has a very strict schema. -/// You need to specify in advance, whether a field is indexed or not, +/// You need to specify in advance whether a field is indexed or not, /// stored or not, and RAM-based or not. /// /// This is done by creating a schema object, and @@ -483,4 +483,4 @@ mod tests { } } } -} \ No newline at end of file +} From 784043b8eb3cb269cacb414bb1c3c0e6d20c4c3c Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Fri, 30 Sep 2016 11:16:56 -0400 Subject: [PATCH 5/7] more small changes --- src/collector/count_collector.rs | 3 +-- src/collector/mod.rs | 2 +- src/core/index.rs | 16 ++++++++-------- src/core/segment_reader.rs | 10 +++++----- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index dc1d85580..44d547ec3 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -20,8 +20,7 @@ impl CountCollector { impl Default for CountCollector { fn default() -> CountCollector { - CountCollector { - count: 0, + CountCollector {count: 0, } } } diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 81bdc330f..683b7eb1c 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -29,7 +29,7 @@ pub use self::chained_collector::chain; /// /// As they work on multiple segments, they first inform /// the collector of a change in a segment and then -/// call the collect method to push the document to the collector. +/// call the `collect` method to push the document to the collector. /// /// Temporally, our collector will receive calls /// - `.set_segment(0, segment_reader_0)` diff --git a/src/core/index.rs b/src/core/index.rs index ac2b287cf..86b620319 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -90,7 +90,7 @@ impl Index { /// Creates a new index in a temp directory. /// /// The index will use the `MMapDirectory` in a newly created directory. - /// The temp directory will be destroyed automatically when the Index object + /// The temp directory will be destroyed automatically when the `Index` object /// is destroyed. /// /// The temp directory is only used for testing the `MmapDirectory`. @@ -100,7 +100,7 @@ impl Index { Index::from_directory(directory, schema) } - /// Creates a new index given a directory and an IndexMeta. + /// Creates a new index given a directory and an `IndexMeta`. fn create_from_metas(directory: Box, metas: IndexMeta) -> Result { let schema = metas.schema.clone(); let index = Index { @@ -160,7 +160,7 @@ impl Index { /// Marks the segment as published. // TODO find a rusty way to hide that, while keeping - // it visible for IndexWriters. + // it visible for `IndexWriter`s. pub fn publish_segments(&mut self, segment_ids: &[SegmentId], docstamp: u64) -> Result<()> { @@ -204,7 +204,7 @@ impl Index { } - /// Return a segment object given a segment_id + /// Return a segment object given a `segment_id` /// /// The segment may or may not exist. fn segment(&self, segment_id: SegmentId) -> Segment { @@ -246,7 +246,7 @@ impl Index { /// Either // - it fails, in which case an error is returned, /// and the `meta.json` remains untouched, - /// - it success, and `meta.json` is written + /// - it succeeds, and `meta.json` is written /// and flushed. pub fn save_metas(&mut self,) -> Result<()> { let mut w = Vec::new(); @@ -286,9 +286,9 @@ impl Index { /// /// This method should be called every single time a search /// query is performed. - /// The searcher are taken from a pool of `NUM_SEARCHERS` searchers. + /// The searchers are taken from a pool of `NUM_SEARCHERS` searchers. /// If no searcher is available - /// it may block. + /// this may block. /// /// The same searcher must be used for a given query, as it ensures /// the use of a consistent segment set. @@ -313,4 +313,4 @@ impl Clone for Index { searcher_pool: self.searcher_pool.clone(), } } -} \ No newline at end of file +} diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index be1e46ec2..148e34ab2 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -25,7 +25,7 @@ use schema::TextIndexingOptions; use error::Error; -/// Entrypoint to access all of the datastructures of the `Segment` +/// Entry point to access all of the datastructures of the `Segment` /// /// - term dictionary /// - postings @@ -34,8 +34,8 @@ use error::Error; /// - field norm reader /// /// The segment reader has a very low memory footprint, -/// as close to all of the memory data is in Mmapped. -/// +/// as close to all of the memory data is mmapped. +/// pub struct SegmentReader { segment_info: SegmentInfo, segment_id: SegmentId, @@ -51,7 +51,7 @@ pub struct SegmentReader { impl SegmentReader { /// Returns the highest document id ever attributed in /// this segment + 1. - /// Today, `tantivy` does not handle deletes so, it happens + /// Today, `tantivy` does not handle deletes, so it happens /// to also be the number of documents in the index. pub fn max_doc(&self) -> DocId { self.segment_info.max_doc @@ -233,7 +233,7 @@ impl SegmentReader { self.read_postings(term, segment_posting_option) } - /// Returns the term info of associated with the term. + /// Returns the term info associated with the term. pub fn get_term_info(&self, term: &Term) -> Option { self.term_infos.get(term.as_slice()) } From a2cc0ff58c6fc6054697eedbc441151c0342a66f Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Fri, 30 Sep 2016 11:17:47 -0400 Subject: [PATCH 6/7] readme updates --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7a5f414b6..75b0b13a6 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,10 @@ It is strongly inspired by Lucene's design. # Features - configurable indexing (optional term frequency and position indexing) -- Tf-Idf scoring +- tf-idf scoring - Basic query language - Incremental indexing -- Multithreaded indexing (indexing en wikipedia takes 4mn on my desktop) +- Multithreaded indexing (indexing english Wikipedia takes 4 minutes on my desktop) - Mmap based - SIMD integer compression - u32 fast fields (equivalent of doc values in Lucene) From ac9aa5cb5e11031ffdddbd34f0ce06583c3456c9 Mon Sep 17 00:00:00 2001 From: "Michael J. Curry" Date: Fri, 30 Sep 2016 11:19:00 -0400 Subject: [PATCH 7/7] literally one character changed --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 75b0b13a6..f62295e2b 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ It is strongly inspired by Lucene's design. - tf-idf scoring - Basic query language - Incremental indexing -- Multithreaded indexing (indexing english Wikipedia takes 4 minutes on my desktop) -- Mmap based +- Multithreaded indexing (indexing English Wikipedia takes 4 minutes on my desktop) +- mmap based - SIMD integer compression - u32 fast fields (equivalent of doc values in Lucene) - LZ4 compressed document store