diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..b1a5aece3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,19 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + +**Describe the bug** +- What did you do? +- What happened? +- What was expected? + +**Which version of tantivy are you using?** +If "master", ideally give the specific sha1 revision. + +**To Reproduce** + +If your bug is deterministic, can you give a minimal reproducing code? +Some bugs are not deterministic. Can you describe with precision in which context it happened? +If this is possible, can you share your code? diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..3affc3c24 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,14 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**[Optional] describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 000000000..e00e9a1b0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,7 @@ +--- +name: Question +about: Ask any question about tantivy's usage... + +--- + +Try to be specific about your use case... diff --git a/src/tokenizer/stop_word_filter.rs b/src/tokenizer/stop_word_filter.rs index f94ec632f..45691d470 100644 --- a/src/tokenizer/stop_word_filter.rs +++ b/src/tokenizer/stop_word_filter.rs @@ -39,6 +39,16 @@ impl StopWordFilter { StopWordFilter { words: set } } + + fn english() -> StopWordFilter { + let words: [&'static str; 33] = [ + "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", + "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", + "there", "these", "they", "this", "to", "was", "will", "with", + ]; + + StopWordFilter::remove(words.iter().map(|s| s.to_string()).collect()) + } } pub struct StopWordFilterStream @@ -98,3 +108,9 @@ where false } } + +impl Default for StopWordFilter { + fn default() -> StopWordFilter { + StopWordFilter::english() + } +} diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index cbb46af3b..410e7f30b 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -8,6 +8,7 @@ use tokenizer::RawTokenizer; use tokenizer::RemoveLongFilter; use tokenizer::SimpleTokenizer; use tokenizer::Stemmer; +use tokenizer::StopWordFilter; use tokenizer::Tokenizer; /// The tokenizer manager serves as a store for