mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-04 09:30:42 +00:00
Kompass master (#590)
* Use once_cell in place of lazy_static * Minor changes
This commit is contained in:
@@ -15,7 +15,7 @@ edition = "2018"
|
||||
[dependencies]
|
||||
base64 = "0.10.0"
|
||||
byteorder = "1.0"
|
||||
lazy_static = "1"
|
||||
once_cell = "0.2"
|
||||
regex = "1.0"
|
||||
tantivy-fst = "0.1"
|
||||
memmap = {version = "0.7", optional=true}
|
||||
|
||||
@@ -38,7 +38,7 @@ fn load_metas(directory: &dyn Directory) -> Result<IndexMeta> {
|
||||
serde_json::from_str(&meta_string)
|
||||
.map_err(|e| {
|
||||
DataCorruption::new(
|
||||
META_FILEPATH.clone(),
|
||||
META_FILEPATH.to_path_buf(),
|
||||
format!("Meta file cannot be deserialized. {:?}.", e),
|
||||
)
|
||||
})
|
||||
|
||||
@@ -21,18 +21,16 @@ pub use self::segment_id::SegmentId;
|
||||
pub use self::segment_meta::SegmentMeta;
|
||||
pub use self::segment_reader::SegmentReader;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::path::Path;
|
||||
|
||||
lazy_static! {
|
||||
/// The meta file contains all the information about the list of segments and the schema
|
||||
/// of the index.
|
||||
pub static META_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new("meta.json"));
|
||||
|
||||
/// The meta file contains all the information about the list of segments and the schema
|
||||
/// of the index.
|
||||
pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
|
||||
|
||||
/// The managed file contains a list of files that were created by the tantivy
|
||||
/// and will therefore be garbage collected when they are deemed useless by tantivy.
|
||||
///
|
||||
/// Removing this file is safe, but will prevent the garbage collection of all of the file that
|
||||
/// are currently in the directory
|
||||
pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
|
||||
}
|
||||
/// The managed file contains a list of files that were created by the tantivy
|
||||
/// and will therefore be garbage collected when they are deemed useless by tantivy.
|
||||
///
|
||||
/// Removing this file is safe, but will prevent the garbage collection of all of the file that
|
||||
/// are currently in the directory
|
||||
pub static MANAGED_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new(".managed.json"));
|
||||
|
||||
@@ -2,6 +2,8 @@ use std::cmp::{Ord, Ordering};
|
||||
use std::fmt;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(test)]
|
||||
use once_cell::sync::Lazy;
|
||||
#[cfg(test)]
|
||||
use std::sync::atomic;
|
||||
|
||||
@@ -17,10 +19,10 @@ use std::sync::atomic;
|
||||
pub struct SegmentId(Uuid);
|
||||
|
||||
#[cfg(test)]
|
||||
lazy_static! {
|
||||
static ref AUTO_INC_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::default();
|
||||
static ref ZERO_ARRAY: [u8; 8] = [0u8; 8];
|
||||
}
|
||||
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
|
||||
|
||||
#[cfg(test)]
|
||||
const ZERO_ARRAY: [u8; 8] = [0u8; 8];
|
||||
|
||||
// During tests, we generate the segment id in a autoincrement manner
|
||||
// for consistency of segment id between run.
|
||||
@@ -30,7 +32,7 @@ lazy_static! {
|
||||
#[cfg(test)]
|
||||
fn create_uuid() -> Uuid {
|
||||
let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
|
||||
Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*ZERO_ARRAY).unwrap()
|
||||
Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &ZERO_ARRAY).unwrap()
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
|
||||
@@ -2,14 +2,13 @@ use super::SegmentComponent;
|
||||
use crate::core::SegmentId;
|
||||
use crate::Opstamp;
|
||||
use census::{Inventory, TrackedObject};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
|
||||
lazy_static! {
|
||||
static ref INVENTORY: Inventory<InnerSegmentMeta> = { Inventory::new() };
|
||||
}
|
||||
static INVENTORY: Lazy<Inventory<InnerSegmentMeta>> = Lazy::new(Inventory::new);
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct DeleteMeta {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// A directory lock.
|
||||
@@ -28,29 +29,27 @@ pub struct Lock {
|
||||
pub is_blocking: bool,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Only one process should be able to write tantivy's index at a time.
|
||||
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
|
||||
///
|
||||
/// If the process is killed and this file remains, it is safe to remove it manually.
|
||||
///
|
||||
/// Failing to acquire this lock usually means a misuse of tantivy's API,
|
||||
/// (creating more than one instance of the `IndexWriter`), are a spurious
|
||||
/// lock file remaining after a crash. In the latter case, removing the file after
|
||||
/// checking no process running tantivy is running is safe.
|
||||
pub static ref INDEX_WRITER_LOCK: Lock = Lock {
|
||||
filepath: PathBuf::from(".tantivy-writer.lock"),
|
||||
is_blocking: false
|
||||
};
|
||||
/// The meta lock file is here to protect the segment files being opened by
|
||||
/// `IndexReader::reload()` from being garbage collected.
|
||||
/// It makes it possible for another process to safely consume
|
||||
/// our index in-writing. Ideally, we may have prefered `RWLock` semantics
|
||||
/// here, but it is difficult to achieve on Windows.
|
||||
///
|
||||
/// Opening segment readers is a very fast process.
|
||||
pub static ref META_LOCK: Lock = Lock {
|
||||
filepath: PathBuf::from(".tantivy-meta.lock"),
|
||||
is_blocking: true
|
||||
};
|
||||
}
|
||||
/// Only one process should be able to write tantivy's index at a time.
|
||||
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
|
||||
///
|
||||
/// If the process is killed and this file remains, it is safe to remove it manually.
|
||||
///
|
||||
/// Failing to acquire this lock usually means a misuse of tantivy's API,
|
||||
/// (creating more than one instance of the `IndexWriter`), are a spurious
|
||||
/// lock file remaining after a crash. In the latter case, removing the file after
|
||||
/// checking no process running tantivy is running is safe.
|
||||
pub static INDEX_WRITER_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
|
||||
filepath: PathBuf::from(".tantivy-writer.lock"),
|
||||
is_blocking: false,
|
||||
});
|
||||
/// The meta lock file is here to protect the segment files being opened by
|
||||
/// `IndexReader::reload()` from being garbage collected.
|
||||
/// It makes it possible for another process to safely consume
|
||||
/// our index in-writing. Ideally, we may have prefered `RWLock` semantics
|
||||
/// here, but it is difficult to achieve on Windows.
|
||||
///
|
||||
/// Opening segment readers is a very fast process.
|
||||
pub static META_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
|
||||
filepath: PathBuf::from(".tantivy-meta.lock"),
|
||||
is_blocking: true,
|
||||
});
|
||||
|
||||
@@ -69,7 +69,7 @@ impl ManagedDirectory {
|
||||
let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
|
||||
.map_err(|e| {
|
||||
DataCorruption::new(
|
||||
MANAGED_FILEPATH.clone(),
|
||||
MANAGED_FILEPATH.to_path_buf(),
|
||||
format!("Managed file cannot be deserialized: {:?}. ", e),
|
||||
)
|
||||
})?;
|
||||
@@ -264,13 +264,12 @@ mod tests {
|
||||
mod mmap_specific {
|
||||
|
||||
use super::super::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::path::Path;
|
||||
use tempdir::TempDir;
|
||||
|
||||
lazy_static! {
|
||||
static ref TEST_PATH1: &'static Path = Path::new("some_path_for_test");
|
||||
static ref TEST_PATH2: &'static Path = Path::new("some_path_for_test2");
|
||||
}
|
||||
static TEST_PATH1: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test"));
|
||||
static TEST_PATH2: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test2"));
|
||||
|
||||
use crate::directory::MmapDirectory;
|
||||
use std::io::Write;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use super::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::io::Write;
|
||||
use std::mem;
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -9,9 +10,7 @@ use std::thread;
|
||||
use std::time;
|
||||
use std::time::Duration;
|
||||
|
||||
lazy_static! {
|
||||
static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
|
||||
}
|
||||
static TEST_PATH: Lazy<&'static Path> = Lazy::new(|| Path::new("some_path_for_test"));
|
||||
|
||||
#[test]
|
||||
fn test_ram_directory() {
|
||||
|
||||
@@ -133,20 +133,20 @@ mod tests {
|
||||
use crate::schema::Field;
|
||||
use crate::schema::Schema;
|
||||
use crate::schema::FAST;
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref SCHEMA: Schema = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_u64_field("field", FAST);
|
||||
schema_builder.build()
|
||||
};
|
||||
pub static ref FIELD: Field = { SCHEMA.get_field("field").unwrap() };
|
||||
}
|
||||
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_u64_field("field", FAST);
|
||||
schema_builder.build()
|
||||
});
|
||||
|
||||
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield() {
|
||||
|
||||
@@ -81,7 +81,7 @@ impl SegmentManager {
|
||||
/// but have not yet been deleted by the garbage collector.
|
||||
pub fn list_files(&self) -> HashSet<PathBuf> {
|
||||
let mut files = HashSet::new();
|
||||
files.insert(META_FILEPATH.clone());
|
||||
files.insert(META_FILEPATH.to_path_buf());
|
||||
for segment_meta in SegmentMeta::all() {
|
||||
files.extend(segment_meta.list_files());
|
||||
}
|
||||
|
||||
@@ -108,9 +108,6 @@
|
||||
//! [literate programming](http://fulmicoton.com/tantivy-examples/simple_search.html) /
|
||||
//! [source code](https://github.com/fulmicoton/tantivy/blob/master/examples/simple_search.rs))
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ pub mod tests {
|
||||
use crate::tokenizer::{SimpleTokenizer, MAX_TOKEN_LEN};
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use std::iter;
|
||||
@@ -509,53 +510,52 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref TERM_A: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "a")
|
||||
};
|
||||
pub static ref TERM_B: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "b")
|
||||
};
|
||||
pub static ref TERM_C: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "c")
|
||||
};
|
||||
pub static ref TERM_D: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "d")
|
||||
};
|
||||
pub static ref INDEX: Index = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", STRING);
|
||||
let schema = schema_builder.build();
|
||||
pub static TERM_A: Lazy<Term> = Lazy::new(|| {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "a")
|
||||
});
|
||||
pub static TERM_B: Lazy<Term> = Lazy::new(|| {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "b")
|
||||
});
|
||||
pub static TERM_C: Lazy<Term> = Lazy::new(|| {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "c")
|
||||
});
|
||||
pub static TERM_D: Lazy<Term> = Lazy::new(|| {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "d")
|
||||
});
|
||||
|
||||
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
|
||||
pub static INDEX: Lazy<Index> = Lazy::new(|| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", STRING);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let posting_list_size = 1_000_000;
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
for _ in 0..posting_list_size {
|
||||
let mut doc = Document::default();
|
||||
if rng.gen_bool(1f64 / 15f64) {
|
||||
doc.add_text(text_field, "a");
|
||||
}
|
||||
if rng.gen_bool(1f64 / 10f64) {
|
||||
doc.add_text(text_field, "b");
|
||||
}
|
||||
if rng.gen_bool(1f64 / 5f64) {
|
||||
doc.add_text(text_field, "c");
|
||||
}
|
||||
doc.add_text(text_field, "d");
|
||||
index_writer.add_document(doc);
|
||||
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let posting_list_size = 1_000_000;
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
for _ in 0..posting_list_size {
|
||||
let mut doc = Document::default();
|
||||
if rng.gen_bool(1f64 / 15f64) {
|
||||
doc.add_text(text_field, "a");
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
if rng.gen_bool(1f64 / 10f64) {
|
||||
doc.add_text(text_field, "b");
|
||||
}
|
||||
if rng.gen_bool(1f64 / 5f64) {
|
||||
doc.add_text(text_field, "c");
|
||||
}
|
||||
doc.add_text(text_field, "d");
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
index
|
||||
};
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
index
|
||||
});
|
||||
|
||||
/// Wraps a given docset, and forward alls call but the
|
||||
/// `.skip_next(...)`. This is useful to test that a specialized
|
||||
|
||||
@@ -3,21 +3,20 @@ use crate::schema::Term;
|
||||
use crate::Result;
|
||||
use crate::Searcher;
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::collections::HashMap;
|
||||
|
||||
lazy_static! {
|
||||
static ref LEV_BUILDER: HashMap<(u8, bool), LevenshteinAutomatonBuilder> = {
|
||||
let mut lev_builder_cache = HashMap::new();
|
||||
// TODO make population lazy on a `(distance, val)` basis
|
||||
for distance in 0..3 {
|
||||
for &transposition in &[false, true] {
|
||||
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
|
||||
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
|
||||
}
|
||||
static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
|
||||
let mut lev_builder_cache = HashMap::new();
|
||||
// TODO make population lazy on a `(distance, val)` basis
|
||||
for distance in 0..3 {
|
||||
for &transposition in &[false, true] {
|
||||
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
|
||||
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
|
||||
}
|
||||
lev_builder_cache
|
||||
};
|
||||
}
|
||||
}
|
||||
lev_builder_cache
|
||||
});
|
||||
|
||||
/// A Fuzzy Query matches all of the documents
|
||||
/// containing a specific term that is within
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::common::BinarySerializable;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::borrow::Borrow;
|
||||
@@ -183,9 +184,7 @@ impl Display for Facet {
|
||||
}
|
||||
|
||||
fn escape_slashes(s: &str) -> Cow<'_, str> {
|
||||
lazy_static! {
|
||||
static ref SLASH_PTN: Regex = Regex::new(r"[\\/]").unwrap();
|
||||
}
|
||||
static SLASH_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(r"[\\/]").unwrap());
|
||||
SLASH_PTN.replace_all(s, "\\/")
|
||||
}
|
||||
|
||||
|
||||
@@ -146,6 +146,7 @@ pub use self::flags::{FAST, INDEXED, STORED};
|
||||
pub use self::int_options::Cardinality;
|
||||
pub use self::int_options::IntOptions;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
/// Validator for a potential `field_name`.
|
||||
@@ -154,9 +155,8 @@ use regex::Regex;
|
||||
/// A field name must start by a letter `[a-zA-Z]`.
|
||||
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
|
||||
pub fn is_valid_field_name(field_name: &str) -> bool {
|
||||
lazy_static! {
|
||||
static ref FIELD_NAME_PTN: Regex = Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap();
|
||||
}
|
||||
static FIELD_NAME_PTN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
|
||||
FIELD_NAME_PTN.is_match(field_name)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user