Kompass master (#590)

* Use once_cell in place of lazy_static

* Minor changes
This commit is contained in:
Paul Masurel
2019-07-10 19:24:54 +09:00
committed by GitHub
parent 697c7e721d
commit 4867be3d3b
15 changed files with 121 additions and 130 deletions

View File

@@ -15,7 +15,7 @@ edition = "2018"
[dependencies]
base64 = "0.10.0"
byteorder = "1.0"
lazy_static = "1"
once_cell = "0.2"
regex = "1.0"
tantivy-fst = "0.1"
memmap = {version = "0.7", optional=true}

View File

@@ -38,7 +38,7 @@ fn load_metas(directory: &dyn Directory) -> Result<IndexMeta> {
serde_json::from_str(&meta_string)
.map_err(|e| {
DataCorruption::new(
META_FILEPATH.clone(),
META_FILEPATH.to_path_buf(),
format!("Meta file cannot be deserialized. {:?}.", e),
)
})

View File

@@ -21,18 +21,16 @@ pub use self::segment_id::SegmentId;
pub use self::segment_meta::SegmentMeta;
pub use self::segment_reader::SegmentReader;
use std::path::PathBuf;
use once_cell::sync::Lazy;
use std::path::Path;
lazy_static! {
/// The meta file contains all the information about the list of segments and the schema
/// of the index.
pub static META_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new("meta.json"));
/// The meta file contains all the information about the list of segments and the schema
/// of the index.
pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
/// The managed file contains a list of files that were created by the tantivy
/// and will therefore be garbage collected when they are deemed useless by tantivy.
///
/// Removing this file is safe, but will prevent the garbage collection of all of the file that
/// are currently in the directory
pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
}
/// The managed file contains a list of files that were created by the tantivy
/// and will therefore be garbage collected when they are deemed useless by tantivy.
///
/// Removing this file is safe, but will prevent the garbage collection of all of the file that
/// are currently in the directory
pub static MANAGED_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new(".managed.json"));

View File

@@ -2,6 +2,8 @@ use std::cmp::{Ord, Ordering};
use std::fmt;
use uuid::Uuid;
#[cfg(test)]
use once_cell::sync::Lazy;
#[cfg(test)]
use std::sync::atomic;
@@ -17,10 +19,10 @@ use std::sync::atomic;
pub struct SegmentId(Uuid);
#[cfg(test)]
lazy_static! {
static ref AUTO_INC_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::default();
static ref ZERO_ARRAY: [u8; 8] = [0u8; 8];
}
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
#[cfg(test)]
const ZERO_ARRAY: [u8; 8] = [0u8; 8];
// During tests, we generate the segment id in a autoincrement manner
// for consistency of segment id between run.
@@ -30,7 +32,7 @@ lazy_static! {
#[cfg(test)]
fn create_uuid() -> Uuid {
let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*ZERO_ARRAY).unwrap()
Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &ZERO_ARRAY).unwrap()
}
#[cfg(not(test))]

View File

@@ -2,14 +2,13 @@ use super::SegmentComponent;
use crate::core::SegmentId;
use crate::Opstamp;
use census::{Inventory, TrackedObject};
use once_cell::sync::Lazy;
use serde;
use std::collections::HashSet;
use std::fmt;
use std::path::PathBuf;
lazy_static! {
static ref INVENTORY: Inventory<InnerSegmentMeta> = { Inventory::new() };
}
static INVENTORY: Lazy<Inventory<InnerSegmentMeta>> = Lazy::new(Inventory::new);
#[derive(Clone, Debug, Serialize, Deserialize)]
struct DeleteMeta {

View File

@@ -1,3 +1,4 @@
use once_cell::sync::Lazy;
use std::path::PathBuf;
/// A directory lock.
@@ -28,29 +29,27 @@ pub struct Lock {
pub is_blocking: bool,
}
lazy_static! {
/// Only one process should be able to write tantivy's index at a time.
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
///
/// If the process is killed and this file remains, it is safe to remove it manually.
///
/// Failing to acquire this lock usually means a misuse of tantivy's API,
/// (creating more than one instance of the `IndexWriter`), are a spurious
/// lock file remaining after a crash. In the latter case, removing the file after
/// checking no process running tantivy is running is safe.
pub static ref INDEX_WRITER_LOCK: Lock = Lock {
filepath: PathBuf::from(".tantivy-writer.lock"),
is_blocking: false
};
/// The meta lock file is here to protect the segment files being opened by
/// `IndexReader::reload()` from being garbage collected.
/// It makes it possible for another process to safely consume
/// our index in-writing. Ideally, we may have prefered `RWLock` semantics
/// here, but it is difficult to achieve on Windows.
///
/// Opening segment readers is a very fast process.
pub static ref META_LOCK: Lock = Lock {
filepath: PathBuf::from(".tantivy-meta.lock"),
is_blocking: true
};
}
/// Only one process should be able to write tantivy's index at a time.
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
///
/// If the process is killed and this file remains, it is safe to remove it manually.
///
/// Failing to acquire this lock usually means a misuse of tantivy's API,
/// (creating more than one instance of the `IndexWriter`), are a spurious
/// lock file remaining after a crash. In the latter case, removing the file after
/// checking no process running tantivy is running is safe.
pub static INDEX_WRITER_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
filepath: PathBuf::from(".tantivy-writer.lock"),
is_blocking: false,
});
/// The meta lock file is here to protect the segment files being opened by
/// `IndexReader::reload()` from being garbage collected.
/// It makes it possible for another process to safely consume
/// our index in-writing. Ideally, we may have prefered `RWLock` semantics
/// here, but it is difficult to achieve on Windows.
///
/// Opening segment readers is a very fast process.
pub static META_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
filepath: PathBuf::from(".tantivy-meta.lock"),
is_blocking: true,
});

View File

@@ -69,7 +69,7 @@ impl ManagedDirectory {
let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
.map_err(|e| {
DataCorruption::new(
MANAGED_FILEPATH.clone(),
MANAGED_FILEPATH.to_path_buf(),
format!("Managed file cannot be deserialized: {:?}. ", e),
)
})?;
@@ -264,13 +264,12 @@ mod tests {
mod mmap_specific {
use super::super::*;
use once_cell::sync::Lazy;
use std::path::Path;
use tempdir::TempDir;
lazy_static! {
static ref TEST_PATH1: &'static Path = Path::new("some_path_for_test");
static ref TEST_PATH2: &'static Path = Path::new("some_path_for_test2");
}
static TEST_PATH1: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test"));
static TEST_PATH2: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test2"));
use crate::directory::MmapDirectory;
use std::io::Write;

View File

@@ -1,4 +1,5 @@
use super::*;
use once_cell::sync::Lazy;
use std::io::Write;
use std::mem;
use std::path::{Path, PathBuf};
@@ -9,9 +10,7 @@ use std::thread;
use std::time;
use std::time::Duration;
lazy_static! {
static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
}
static TEST_PATH: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test"));
#[test]
fn test_ram_directory() {

View File

@@ -133,20 +133,20 @@ mod tests {
use crate::schema::Field;
use crate::schema::Schema;
use crate::schema::FAST;
use once_cell::sync::Lazy;
use rand::prelude::SliceRandom;
use rand::rngs::StdRng;
use rand::SeedableRng;
use std::collections::HashMap;
use std::path::Path;
lazy_static! {
pub static ref SCHEMA: Schema = {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("field", FAST);
schema_builder.build()
};
pub static ref FIELD: Field = { SCHEMA.get_field("field").unwrap() };
}
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("field", FAST);
schema_builder.build()
});
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
#[test]
pub fn test_fastfield() {

View File

@@ -81,7 +81,7 @@ impl SegmentManager {
/// but have not yet been deleted by the garbage collector.
pub fn list_files(&self) -> HashSet<PathBuf> {
let mut files = HashSet::new();
files.insert(META_FILEPATH.clone());
files.insert(META_FILEPATH.to_path_buf());
for segment_meta in SegmentMeta::all() {
files.extend(segment_meta.list_files());
}

View File

@@ -108,9 +108,6 @@
//! [literate programming](http://fulmicoton.com/tantivy-examples/simple_search.html) /
//! [source code](https://github.com/fulmicoton/tantivy/blob/master/examples/simple_search.rs))
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate serde_derive;

View File

@@ -63,6 +63,7 @@ pub mod tests {
use crate::tokenizer::{SimpleTokenizer, MAX_TOKEN_LEN};
use crate::DocId;
use crate::Score;
use once_cell::sync::Lazy;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::iter;
@@ -509,53 +510,52 @@ pub mod tests {
}
}
lazy_static! {
pub static ref TERM_A: Term = {
let field = Field(0);
Term::from_field_text(field, "a")
};
pub static ref TERM_B: Term = {
let field = Field(0);
Term::from_field_text(field, "b")
};
pub static ref TERM_C: Term = {
let field = Field(0);
Term::from_field_text(field, "c")
};
pub static ref TERM_D: Term = {
let field = Field(0);
Term::from_field_text(field, "d")
};
pub static ref INDEX: Index = {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", STRING);
let schema = schema_builder.build();
pub static TERM_A: Lazy<Term> = Lazy::new(|| {
let field = Field(0);
Term::from_field_text(field, "a")
});
pub static TERM_B: Lazy<Term> = Lazy::new(|| {
let field = Field(0);
Term::from_field_text(field, "b")
});
pub static TERM_C: Lazy<Term> = Lazy::new(|| {
let field = Field(0);
Term::from_field_text(field, "c")
});
pub static TERM_D: Lazy<Term> = Lazy::new(|| {
let field = Field(0);
Term::from_field_text(field, "d")
});
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
pub static INDEX: Lazy<Index> = Lazy::new(|| {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", STRING);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let posting_list_size = 1_000_000;
{
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
for _ in 0..posting_list_size {
let mut doc = Document::default();
if rng.gen_bool(1f64 / 15f64) {
doc.add_text(text_field, "a");
}
if rng.gen_bool(1f64 / 10f64) {
doc.add_text(text_field, "b");
}
if rng.gen_bool(1f64 / 5f64) {
doc.add_text(text_field, "c");
}
doc.add_text(text_field, "d");
index_writer.add_document(doc);
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
let index = Index::create_in_ram(schema);
let posting_list_size = 1_000_000;
{
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
for _ in 0..posting_list_size {
let mut doc = Document::default();
if rng.gen_bool(1f64 / 15f64) {
doc.add_text(text_field, "a");
}
assert!(index_writer.commit().is_ok());
if rng.gen_bool(1f64 / 10f64) {
doc.add_text(text_field, "b");
}
if rng.gen_bool(1f64 / 5f64) {
doc.add_text(text_field, "c");
}
doc.add_text(text_field, "d");
index_writer.add_document(doc);
}
index
};
}
assert!(index_writer.commit().is_ok());
}
index
});
/// Wraps a given docset, and forward alls call but the
/// `.skip_next(...)`. This is useful to test that a specialized

View File

@@ -3,21 +3,20 @@ use crate::schema::Term;
use crate::Result;
use crate::Searcher;
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
use once_cell::sync::Lazy;
use std::collections::HashMap;
lazy_static! {
static ref LEV_BUILDER: HashMap<(u8, bool), LevenshteinAutomatonBuilder> = {
let mut lev_builder_cache = HashMap::new();
// TODO make population lazy on a `(distance, val)` basis
for distance in 0..3 {
for &transposition in &[false, true] {
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
}
static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
let mut lev_builder_cache = HashMap::new();
// TODO make population lazy on a `(distance, val)` basis
for distance in 0..3 {
for &transposition in &[false, true] {
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
}
lev_builder_cache
};
}
}
lev_builder_cache
});
/// A Fuzzy Query matches all of the documents
/// containing a specific term that is within

View File

@@ -1,4 +1,5 @@
use crate::common::BinarySerializable;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Borrow;
@@ -183,9 +184,7 @@ impl Display for Facet {
}
fn escape_slashes(s: &str) -> Cow<'_, str> {
lazy_static! {
static ref SLASH_PTN: Regex = Regex::new(r"[\\/]").unwrap();
}
static SLASH_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(r"[\\/]").unwrap());
SLASH_PTN.replace_all(s, "\\/")
}

View File

@@ -146,6 +146,7 @@ pub use self::flags::{FAST, INDEXED, STORED};
pub use self::int_options::Cardinality;
pub use self::int_options::IntOptions;
use once_cell::sync::Lazy;
use regex::Regex;
/// Validator for a potential `field_name`.
@@ -154,9 +155,8 @@ use regex::Regex;
/// A field name must start by a letter `[a-zA-Z]`.
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
pub fn is_valid_field_name(field_name: &str) -> bool {
lazy_static! {
static ref FIELD_NAME_PTN: Regex = Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap();
}
static FIELD_NAME_PTN: Lazy<Regex> =
Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
FIELD_NAME_PTN.is_match(field_name)
}