added travis ci conf.

This commit is contained in:
Paul Masurel
2016-05-01 15:06:40 +09:00
parent 056e4e6cf3
commit 3a2af1aa65

95
src/analyzer/mod.rs Normal file
View File

@@ -0,0 +1,95 @@
extern crate regex;
use std::str::Chars;
pub struct TokenIter<'a> {
chars: Chars<'a>,
term_buffer: String,
}
fn append_char_lowercase(c: char, term_buffer: &mut String) {
for c_lower in c.to_lowercase() {
term_buffer.push(c_lower);
}
}
pub trait StreamingIterator<'a, T> {
fn next(&'a mut self) -> Option<T>;
}
impl<'a, 'b> TokenIter<'b> {
fn consume_token(&'a mut self) -> Option<&'a str> {
loop {
match self.chars.next() {
Some(c) => {
if c.is_alphanumeric() {
append_char_lowercase(c, &mut self.term_buffer);
}
else {
break;
}
},
None => {
break;
}
}
}
return Some(&self.term_buffer);
}
}
impl<'a, 'b> StreamingIterator<'a, &'a str> for TokenIter<'b> {
fn next(&'a mut self,) -> Option<&'a str> {
self.term_buffer.clear();
// skipping non-letter characters.
loop {
match self.chars.next() {
Some(c) => {
if c.is_alphanumeric() {
append_char_lowercase(c, &mut self.term_buffer);
return self.consume_token();
}
}
None => { return None; }
}
}
}
}
pub struct SimpleTokenizer;
impl SimpleTokenizer {
pub fn new() -> SimpleTokenizer {
SimpleTokenizer
}
pub fn tokenize<'a>(&self, text: &'a str) -> TokenIter<'a> {
TokenIter {
term_buffer: String::new(),
chars: text.chars(),
}
}
}
#[test]
fn test_tokenizer() {
let simple_tokenizer = SimpleTokenizer::new();
let mut term_reader = simple_tokenizer.tokenize("hello, happy tax payer!");
assert_eq!(term_reader.next().unwrap(), "hello");
assert_eq!(term_reader.next().unwrap(), "happy");
assert_eq!(term_reader.next().unwrap(), "tax");
assert_eq!(term_reader.next().unwrap(), "payer");
assert_eq!(term_reader.next(), None);
}
#[test]
fn test_tokenizer_empty() {
let simple_tokenizer = SimpleTokenizer::new();
let mut term_reader = simple_tokenizer.tokenize("");
assert_eq!(term_reader.next(), None);
}