mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 07:42:54 +00:00
Compare commits
6 Commits
bugfix-uni
...
issue/866b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
37e7af322d | ||
|
|
151498cbe7 | ||
|
|
3a72b1cb98 | ||
|
|
2737822620 | ||
|
|
06c12ae221 | ||
|
|
4e4400af7f |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -12,3 +12,4 @@ cpp/simdcomp/bitpackingbenchmark
|
||||
*.bk
|
||||
.idea
|
||||
trace.dat
|
||||
cargo-timing*
|
||||
|
||||
@@ -23,6 +23,7 @@ memmap = {version = "0.7", optional=true}
|
||||
lz4 = {version="1.20", optional=true}
|
||||
snap = "1"
|
||||
tempfile = {version="3.0", optional=true}
|
||||
atomicwrites = "0.2"
|
||||
log = "0.4"
|
||||
serde = {version="1.0", features=["derive"]}
|
||||
serde_json = "1.0"
|
||||
@@ -37,7 +38,7 @@ owning_ref = "0.4"
|
||||
stable_deref_trait = "1.0.0"
|
||||
rust-stemmers = "1.2"
|
||||
downcast-rs = { version="1.0" }
|
||||
tantivy-query-grammar = { version="0.13", path="./query-grammar" }
|
||||
tantivy-query-grammar = { version="0.14.0-dev", path="./query-grammar" }
|
||||
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
|
||||
census = "0.4"
|
||||
fnv = "1.0.6"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.13.0"
|
||||
version = "0.14.0-dev"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
|
||||
@@ -9,8 +9,10 @@ use combine::{
|
||||
|
||||
fn field<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
(
|
||||
letter(),
|
||||
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
|
||||
(letter().or(char('_'))),
|
||||
many(satisfy(|c: char| {
|
||||
c.is_alphanumeric() || c == '_' || c == '-'
|
||||
})),
|
||||
)
|
||||
.skip(char(':'))
|
||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
@@ -279,6 +281,8 @@ pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
type TestParseResult = Result<(), StringStreamError>;
|
||||
|
||||
use super::*;
|
||||
use combine::parser::Parser;
|
||||
|
||||
@@ -296,9 +300,10 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_occur_symbol() {
|
||||
assert_eq!(super::occur_symbol().parse("-"), Ok((Occur::MustNot, "")));
|
||||
assert_eq!(super::occur_symbol().parse("+"), Ok((Occur::Must, "")));
|
||||
fn test_occur_symbol() -> TestParseResult {
|
||||
assert_eq!(super::occur_symbol().parse("-")?, (Occur::MustNot, ""));
|
||||
assert_eq!(super::occur_symbol().parse("+")?, (Occur::Must, ""));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -410,6 +415,25 @@ mod test {
|
||||
assert_eq!(format!("{:?}", ast), "\"abc\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_name() -> TestParseResult {
|
||||
assert_eq!(
|
||||
super::field().parse("my-field-name:a")?,
|
||||
("my-field-name".to_string(), "a")
|
||||
);
|
||||
assert_eq!(
|
||||
super::field().parse("my_field_name:a")?,
|
||||
("my_field_name".to_string(), "a")
|
||||
);
|
||||
assert!(super::field().parse(":a").is_err());
|
||||
assert!(super::field().parse("-my_field:a").is_err());
|
||||
assert_eq!(
|
||||
super::field().parse("_my_field:a")?,
|
||||
("_my_field".to_string(), "a")
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_parser() {
|
||||
// testing the range() parser separately
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::core::MANAGED_FILEPATH;
|
||||
use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
|
||||
use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError};
|
||||
use crate::directory::footer::{Footer, FooterProxy};
|
||||
use crate::directory::DirectoryLock;
|
||||
@@ -246,13 +246,15 @@ impl ManagedDirectory {
|
||||
/// List files for which checksum does not match content
|
||||
pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
|
||||
let mut hashset = HashSet::new();
|
||||
let managed_paths = self
|
||||
let mut managed_paths = self
|
||||
.meta_informations
|
||||
.read()
|
||||
.expect("Managed directory rlock poisoned in list damaged.")
|
||||
.managed_paths
|
||||
.clone();
|
||||
|
||||
managed_paths.remove(*META_FILEPATH);
|
||||
|
||||
for path in managed_paths.into_iter() {
|
||||
if !self.validate_checksum(&path)? {
|
||||
hashset.insert(path);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::core::META_FILEPATH;
|
||||
use atomicwrites;
|
||||
use crate::directory::error::LockError;
|
||||
use crate::directory::error::{
|
||||
DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError,
|
||||
@@ -490,11 +491,9 @@ impl Directory for MmapDirectory {
|
||||
|
||||
fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
|
||||
debug!("Atomic Write {:?}", path);
|
||||
let mut tempfile = tempfile::NamedTempFile::new()?;
|
||||
tempfile.write_all(content)?;
|
||||
tempfile.flush()?;
|
||||
let full_path = self.resolve_path(path);
|
||||
tempfile.into_temp_path().persist(full_path)?;
|
||||
let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
|
||||
meta_file.write(|f| f.write_all(content))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -215,7 +215,6 @@ fn test_watch(directory: &mut dyn Directory) {
|
||||
assert!(directory
|
||||
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
|
||||
.is_ok());
|
||||
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
|
||||
assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
|
||||
}
|
||||
mem::drop(watch_handle);
|
||||
|
||||
@@ -1012,4 +1012,12 @@ mod tests {
|
||||
DOC_COUNT as usize
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_checksum() {
|
||||
let index_path = tempfile::tempdir().expect("dir");
|
||||
let schema = Schema::builder().build();
|
||||
let index = Index::create_in_dir(&index_path, schema).expect("index");
|
||||
assert!(index.validate_checksum().unwrap().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::schema::IntOptions;
|
||||
use crate::schema::TextOptions;
|
||||
use crate::schema::{is_valid_field_name, IntOptions};
|
||||
|
||||
use crate::schema::FieldType;
|
||||
use serde::de::{self, MapAccess, Visitor};
|
||||
@@ -24,6 +24,7 @@ impl FieldEntry {
|
||||
/// Creates a new u64 field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::Str(text_options),
|
||||
@@ -33,6 +34,7 @@ impl FieldEntry {
|
||||
/// Creates a new u64 field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::U64(field_type),
|
||||
@@ -42,6 +44,7 @@ impl FieldEntry {
|
||||
/// Creates a new i64 field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::I64(field_type),
|
||||
@@ -51,6 +54,7 @@ impl FieldEntry {
|
||||
/// Creates a new f64 field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::F64(field_type),
|
||||
@@ -60,6 +64,7 @@ impl FieldEntry {
|
||||
/// Creates a new date field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::Date(field_type),
|
||||
@@ -68,6 +73,7 @@ impl FieldEntry {
|
||||
|
||||
/// Creates a field entry for a facet.
|
||||
pub fn new_facet(field_name: String) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::HierarchicalFacet,
|
||||
@@ -76,6 +82,7 @@ impl FieldEntry {
|
||||
|
||||
/// Creates a field entry for a bytes field
|
||||
pub fn new_bytes(field_name: String) -> FieldEntry {
|
||||
assert!(is_valid_field_name(&field_name));
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::Bytes,
|
||||
@@ -268,6 +275,12 @@ mod tests {
|
||||
use crate::schema::TEXT;
|
||||
use serde_json;
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_invalid_field_name_should_panic() {
|
||||
FieldEntry::new_text("-hello".to_string(), TEXT);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_serialization() {
|
||||
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
|
||||
|
||||
@@ -149,14 +149,16 @@ pub use self::int_options::IntOptions;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
/// Regular expression representing the restriction on a valid field names.
|
||||
pub const FIELD_NAME_PATTERN: &'static str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#;
|
||||
|
||||
/// Validator for a potential `field_name`.
|
||||
/// Returns true iff the name can be use for a field name.
|
||||
///
|
||||
/// A field name must start by a letter `[a-zA-Z]`.
|
||||
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
|
||||
pub fn is_valid_field_name(field_name: &str) -> bool {
|
||||
static FIELD_NAME_PTN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
|
||||
static FIELD_NAME_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(FIELD_NAME_PATTERN).unwrap());
|
||||
FIELD_NAME_PTN.is_match(field_name)
|
||||
}
|
||||
|
||||
@@ -170,6 +172,11 @@ mod tests {
|
||||
assert!(is_valid_field_name("text"));
|
||||
assert!(is_valid_field_name("text0"));
|
||||
assert!(!is_valid_field_name("0text"));
|
||||
assert!(is_valid_field_name("field-name"));
|
||||
assert!(is_valid_field_name("field_name"));
|
||||
assert!(!is_valid_field_name("field!name"));
|
||||
assert!(!is_valid_field_name("-fieldname"));
|
||||
assert!(is_valid_field_name("_fieldname"));
|
||||
assert!(!is_valid_field_name(""));
|
||||
assert!(!is_valid_field_name("シャボン玉"));
|
||||
assert!(is_valid_field_name("my_text_field"));
|
||||
|
||||
Reference in New Issue
Block a user