SegmentUpdater.add_segment does not need to return true

replace scoped_pool (#685 )
Added handling of pre-tokenized text fields (#642 ). (#669 )
2026-01-03 15:52:55 +00:00 · 2019-11-09 20:30:40 +09:00 · 2019-11-07 10:26:08 +09:00 · 2019-11-07 10:10:56 +09:00 · 2019-11-07 09:55:33 +09:00 · 2019-11-05 16:26:12 +09:00
9 changed files with 66 additions and 45 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: fulmicoton
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -50,10 +50,10 @@ owned-read = "0.4"
 failure = "0.1"
 htmlescape = "0.3.1"
 fail = "0.3"
-scoped-pool = "1.0"
 murmurhash32 = "0.2"
 chrono = "0.4"
-smallvec = "0.6"
+smallvec = "1.0"
+rayon = "1"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3"
@@ -64,6 +64,10 @@ maplit = "1"
 matches = "0.1.8"
 time = "0.1.42"

+[dev-dependencies.fail]
+version = "0.3"
+features = ["failpoints"]
+
 [profile.release]
 opt-level = 3
 debug = false
@@ -87,10 +91,6 @@ members = ["query-grammar"]
 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }

-[dev-dependencies.fail]
-version = "0.3"
-features = ["failpoints"]
-
 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
 # in a different binary.
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,6 +1,6 @@
 use crate::Result;
 use crossbeam::channel;
-use scoped_pool::{Pool, ThreadConfig};
+use rayon::{ThreadPool, ThreadPoolBuilder};

 /// Search executor whether search request are single thread or multithread.
 ///
@@ -11,7 +11,7 @@ use scoped_pool::{Pool, ThreadConfig};
 /// used by the client. Second, we may stop using rayon in the future.
 pub enum Executor {
    SingleThread,
-    ThreadPool(Pool),
+    ThreadPool(ThreadPool),
 }

 impl Executor {
@@ -21,10 +21,12 @@ impl Executor {
    }

    // Creates an Executor that dispatches the tasks in a thread pool.
-    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> Executor {
-        let thread_config = ThreadConfig::new().prefix(prefix);
-        let pool = Pool::with_thread_config(num_threads, thread_config);
-        Executor::ThreadPool(pool)
+    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> Result<Executor> {
+        let pool = ThreadPoolBuilder::new()
+            .num_threads(num_threads)
+            .thread_name(move |num| format!("{}{}", prefix, num))
+            .build()?;
+        Ok(Executor::ThreadPool(pool))
    }

    // Perform a map in the thread pool.
@@ -48,9 +50,9 @@ impl Executor {
                let num_fruits = args_with_indices.len();
                let fruit_receiver = {
                    let (fruit_sender, fruit_receiver) = channel::unbounded();
-                    pool.scoped(|scope| {
+                    pool.scope(|scope| {
                        for arg_with_idx in args_with_indices {
-                            scope.execute(|| {
+                            scope.spawn(|_| {
                                let (idx, arg) = arg_with_idx;
                                let fruit = f(arg);
                                if let Err(err) = fruit_sender.send((idx, fruit)) {
@@ -103,6 +105,7 @@ mod tests {
    #[should_panic] //< unfortunately the panic message is not propagated
    fn test_panic_propagates_multi_thread() {
        let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
+            .unwrap()
            .map(
                |_| {
                    panic!("panic should propagate");
@@ -126,6 +129,7 @@ mod tests {
    #[test]
    fn test_map_multithread() {
        let result: Vec<usize> = Executor::multi_thread(3, "search-test")
+            .unwrap()
            .map(|i| Ok(i * 2), 0..10)
            .unwrap();
        assert_eq!(result.len(), 10);
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -73,15 +73,16 @@ impl Index {

    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
-    pub fn set_multithread_executor(&mut self, num_threads: usize) {
-        self.executor = Arc::new(Executor::multi_thread(num_threads, "thrd-tantivy-search-"));
+    pub fn set_multithread_executor(&mut self, num_threads: usize) -> Result<()> {
+        self.executor = Arc::new(Executor::multi_thread(num_threads, "thrd-tantivy-search-")?);
+        Ok(())
    }

    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
-    pub fn set_default_multithread_executor(&mut self) {
+    pub fn set_default_multithread_executor(&mut self) -> Result<()> {
        let default_num_threads = num_cpus::get();
-        self.set_multithread_executor(default_num_threads);
+        self.set_multithread_executor(default_num_threads)
    }

    /// Creates a new index using the `RAMDirectory`.
--- a/src/error.rs
+++ b/src/error.rs
@@ -170,3 +170,9 @@ impl From<serde_json::Error> for TantivyError {
        TantivyError::IOError(io_err.into())
    }
 }
+
+impl From<rayon::ThreadPoolBuildError> for TantivyError {
+    fn from(error: rayon::ThreadPoolBuildError) -> TantivyError {
+        TantivyError::SystemError(error.to_string())
+    }
+}
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -227,7 +227,8 @@ fn index_documents(
        delete_cursor,
        delete_bitset_opt,
    );
-    Ok(segment_updater.add_segment(segment_entry))
+    segment_updater.add_segment(segment_entry);
+    Ok(true)
 }

 fn apply_deletes(
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -199,14 +199,12 @@ impl SegmentUpdater {
        self.0.pool.spawn_fn(move || Ok(f(me_clone)))
    }

-    pub fn add_segment(&self, segment_entry: SegmentEntry) -> bool {
+    pub fn add_segment(&self, segment_entry: SegmentEntry) {
        self.run_async(|segment_updater| {
            segment_updater.0.segment_manager.add_segment(segment_entry);
            segment_updater.consider_merge_options();
-            true
        })
        .forget();
-        true
    }

    /// Orders `SegmentManager` to remove all segments
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -170,6 +170,7 @@ impl SegmentWriter {
                                if let Some(last_token) = tok_str.tokens.last() {
                                    total_offset += last_token.offset_to;
                                }
+
                                token_streams
                                    .push(Box::new(PreTokenizedStream::from(tok_str.clone())));
                            }
--- a/src/tokenizer/tokenized_string.rs
+++ b/src/tokenizer/tokenized_string.rs
@@ -43,32 +43,29 @@ impl PreTokenizedStream {
        tok_strings: &'a [&'a PreTokenizedString],
    ) -> Box<dyn TokenStream + 'a> {
        if tok_strings.len() == 1 {
-            return Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()));
-        }
-        let mut offsets = vec![];
-        let mut total_offset = 0;
-        for &tok_string in tok_strings {
-            offsets.push(total_offset);
-            if let Some(last_token) = tok_string.tokens.last() {
-                total_offset += last_token.offset_to;
+            Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()))
+        } else {
+            let mut offsets = vec![];
+            let mut total_offset = 0;
+            for &tok_string in tok_strings {
+                offsets.push(total_offset);
+                if let Some(last_token) = tok_string.tokens.last() {
+                    total_offset += last_token.offset_to;
+                }
            }
+            let token_streams: Vec<_> = tok_strings
+                .iter()
+                .map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
+                .collect();
+            Box::new(TokenStreamChain::new(offsets, token_streams))
        }
-        let token_streams: Vec<_> = tok_strings
-            .iter()
-            .map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
-            .collect();
-        Box::new(TokenStreamChain::new(offsets, token_streams))
    }
 }

 impl TokenStream for PreTokenizedStream {
    fn advance(&mut self) -> bool {
-        if self.current_token >= self.tokenized_string.tokens.len() as i64 - 1 {
-            // This was our last token.
-            return false;
-        }
        self.current_token += 1;
-        true
+        self.current_token < self.tokenized_string.tokens.len() as i64
    }

    fn token(&self) -> &Token {
@@ -117,13 +114,13 @@ mod tests {
            ],
        };

-        let mut tok_stream = PreTokenizedStream::from(tok_text.clone());
+        let mut token_stream = PreTokenizedStream::from(tok_text.clone());

-        let mut i = 0;
-        while tok_stream.advance() {
-            assert!(*tok_stream.token() == tok_text.tokens[i]);
-            i += 1;
+        for expected_token in tok_text.tokens {
+            assert!(token_stream.advance());
+            assert_eq!(token_stream.token(), &expected_token);
        }
+        assert!(!token_stream.advance());
    }

    #[test]
@@ -182,6 +179,7 @@ mod tests {
                position_length: 1,
            },
        ];
+
        for expected_token in expected_tokens {
            assert!(token_stream.advance());
            assert_eq!(token_stream.token(), &expected_token);
Author	SHA1	Message	Date
Paul Masurel	4fc8712f1a	SegmentUpdater.add_segment does not need to return true	2019-11-09 20:30:40 +09:00
Jacob Brown	6e4fdfd4bf	replace scoped_pool (#685 )	2019-11-07 10:26:08 +09:00
kkoziara	0519056bd8	Added handling of pre-tokenized text fields (#642 ). (#669 ) * Added handling of pre-tokenized text fields (#642). * * Updated changelog and examples concerning #642. * Added tokenized_text method to Value implementation. * Implemented From<TokenizedString> for TokenizedStream. * * Removed tokenized flag from TextOptions and code reliance on the flag. * Changed naming to use word "pre-tokenized" instead of "tokenized". * Updated example code. * Fixed comments. * Minor code refactoring. Test improvements.	2019-11-07 10:10:56 +09:00
dependabot-preview[bot]	7305ad575e	Update smallvec requirement from 0.6 to 1.0 (#686 ) Updates the requirements on [smallvec](https://github.com/servo/rust-smallvec) to permit the latest version. - [Release notes](https://github.com/servo/rust-smallvec/releases) - [Commits](https://github.com/servo/rust-smallvec/compare/v0.6.0...v1.0.0) Signed-off-by: dependabot-preview[bot] <support@dependabot.com>	2019-11-07 09:55:33 +09:00
Paul Masurel	79f64ac2f4	Create FUNDING.yml	2019-11-05 16:26:12 +09:00