mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-08 01:52:54 +00:00
1307 lines
51 KiB
Rust
1307 lines
51 KiB
Rust
//! Column oriented field storage for tantivy.
|
|
//!
|
|
//! It is the equivalent of `Lucene`'s `DocValues`.
|
|
//!
|
|
//! A fast field is a column-oriented fashion storage for `tantivy`.
|
|
//!
|
|
//! It is designed for the fast random access of some document
|
|
//! fields given a document id.
|
|
//!
|
|
//! Fast fields are useful when a field is required for all or most of
|
|
//! the `DocSet`: for instance for scoring, grouping, aggregation, filtering, or faceting.
|
|
//!
|
|
//!
|
|
//! Fields have to be declared as `FAST` in the schema.
|
|
//! Currently supported fields are: u64, i64, f64, bytes, ip and text.
|
|
//!
|
|
//! Fast fields are stored in with [different codecs](columnar). The best codec is detected
|
|
//! automatically, when serializing.
|
|
//!
|
|
//! Read access performance is comparable to that of an array lookup.
|
|
|
|
pub use columnar::Column;
|
|
use columnar::MonotonicallyMappableToU64;
|
|
|
|
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
|
|
pub use self::error::{FastFieldNotAvailableError, Result};
|
|
pub use self::facet_reader::FacetReader;
|
|
pub use self::readers::FastFieldReaders;
|
|
pub use self::writer::FastFieldsWriter;
|
|
use crate::schema::Type;
|
|
use crate::DateTime;
|
|
|
|
mod alive_bitset;
|
|
mod error;
|
|
mod facet_reader;
|
|
mod readers;
|
|
mod writer;
|
|
|
|
/// Trait for types that are allowed for fast fields:
|
|
/// (u64, i64 and f64, bool, DateTime).
|
|
pub trait FastValue: MonotonicallyMappableToU64 {
|
|
/// Returns the `schema::Type` for this FastValue.
|
|
fn to_type() -> Type;
|
|
}
|
|
|
|
impl FastValue for u64 {
|
|
fn to_type() -> Type {
|
|
Type::U64
|
|
}
|
|
}
|
|
|
|
impl FastValue for i64 {
|
|
fn to_type() -> Type {
|
|
Type::I64
|
|
}
|
|
}
|
|
|
|
impl FastValue for f64 {
|
|
fn to_type() -> Type {
|
|
Type::F64
|
|
}
|
|
}
|
|
|
|
impl FastValue for bool {
|
|
fn to_type() -> Type {
|
|
Type::Bool
|
|
}
|
|
}
|
|
impl FastValue for DateTime {
|
|
fn to_type() -> Type {
|
|
Type::Date
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use std::net::Ipv6Addr;
|
|
use std::ops::{Range, RangeInclusive};
|
|
use std::path::Path;
|
|
|
|
use columnar::StrColumn;
|
|
use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
|
|
use once_cell::sync::Lazy;
|
|
use rand::prelude::SliceRandom;
|
|
use rand::rngs::StdRng;
|
|
use rand::{Rng, SeedableRng};
|
|
|
|
use super::*;
|
|
use crate::directory::{Directory, RamDirectory, WritePtr};
|
|
use crate::index::SegmentId;
|
|
use crate::merge_policy::NoMergePolicy;
|
|
use crate::schema::{
|
|
DateOptions, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder,
|
|
TantivyDocument, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
|
|
};
|
|
use crate::time::OffsetDateTime;
|
|
use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
|
|
use crate::{Index, IndexWriter, SegmentReader};
|
|
|
|
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_u64_field("field", FAST);
|
|
schema_builder.build()
|
|
});
|
|
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
|
|
|
|
#[test]
|
|
pub fn test_convert_i64_u64() {
|
|
let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH);
|
|
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_small() -> crate::Result<()> {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>13u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>14u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>2u64))
|
|
.unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
|
|
assert_eq!(file.len(), 80);
|
|
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
|
let column = fast_field_readers
|
|
.u64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
assert_eq!(column.get_val(0), 13u64);
|
|
assert_eq!(column.get_val(1), 14u64);
|
|
assert_eq!(column.get_val(2), 2u64);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_large() {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>4u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>14_082_001u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>3_052u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>9_002u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>15_001u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>777u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>1_002u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>1_501u64))
|
|
.unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>215u64))
|
|
.unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 108);
|
|
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
|
let col = fast_field_readers
|
|
.u64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
assert_eq!(col.get_val(0), 4u64);
|
|
assert_eq!(col.get_val(1), 14_082_001u64);
|
|
assert_eq!(col.get_val(2), 3_052u64);
|
|
assert_eq!(col.get_val(3), 9002u64);
|
|
assert_eq!(col.get_val(4), 15_001u64);
|
|
assert_eq!(col.get_val(5), 777u64);
|
|
assert_eq!(col.get_val(6), 1_002u64);
|
|
assert_eq!(col.get_val(7), 1_501u64);
|
|
assert_eq!(col.get_val(8), 215u64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_null_amplitude() {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
|
for _ in 0..10_000 {
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>100_000u64))
|
|
.unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 81);
|
|
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
|
let fast_field_reader = fast_field_readers
|
|
.u64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
for doc in 0..10_000 {
|
|
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_large_numbers() {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
|
// forcing the amplitude to be high
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>0u64))
|
|
.unwrap();
|
|
for doc_id in 1u64..10_000u64 {
|
|
fast_field_writers
|
|
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id))
|
|
.unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 4476);
|
|
{
|
|
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
|
let col = fast_field_readers
|
|
.u64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
for doc in 1..10_000 {
|
|
assert_eq!(col.get_val(doc), 5_000_000_000_000_000_000u64 + doc as u64);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_signed_intfastfield_normal() -> crate::Result<()> {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
let mut schema_builder = Schema::builder();
|
|
|
|
let i64_field = schema_builder.add_i64_field("field", FAST);
|
|
let schema = schema_builder.build();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
for i in -100i64..10_000i64 {
|
|
let mut doc = TantivyDocument::default();
|
|
doc.add_i64(i64_field, i);
|
|
fast_field_writers.add_document(&doc).unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 252);
|
|
|
|
{
|
|
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let col = fast_field_readers
|
|
.i64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
assert_eq!(col.min_value(), -100i64);
|
|
assert_eq!(col.max_value(), 9_999i64);
|
|
for (doc, i) in (-100i64..10_000i64).enumerate() {
|
|
assert_eq!(col.get_val(doc as u32), i);
|
|
}
|
|
let mut buffer = vec![0i64; 100];
|
|
col.get_range(53, &mut buffer[..]);
|
|
for i in 0..100 {
|
|
assert_eq!(buffer[i], -100i64 + 53i64 + i as i64);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_signed_intfastfield_default_val() {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_i64_field("field", FAST);
|
|
let schema = schema_builder.build();
|
|
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
let doc = TantivyDocument::default();
|
|
fast_field_writers.add_document(&doc).unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
|
|
let file = directory.open_read(path).unwrap();
|
|
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let col = fast_field_readers.i64("field").unwrap();
|
|
assert_eq!(col.first(0), None);
|
|
|
|
let col = fast_field_readers
|
|
.i64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
assert_eq!(col.get_val(0), 0);
|
|
let col = fast_field_readers
|
|
.i64("field")
|
|
.unwrap()
|
|
.first_or_default_col(-100);
|
|
assert_eq!(col.get_val(0), -100);
|
|
}
|
|
|
|
#[test]
|
|
fn test_date_fastfield_default() {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_date_field("date", FAST);
|
|
let schema = schema_builder.build();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
let doc = TantivyDocument::default();
|
|
fast_field_writers.add_document(&doc).unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
|
|
let file = directory.open_read(path).unwrap();
|
|
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let col = fast_field_readers
|
|
.date("date")
|
|
.unwrap()
|
|
.first_or_default_col(DateTime::default());
|
|
assert_eq!(col.get_val(0), DateTime::default());
|
|
}
|
|
|
|
// Warning: this generates the same permutation at each call
|
|
pub fn generate_permutation() -> Vec<u64> {
|
|
let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
|
|
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
|
|
permutation
|
|
}
|
|
|
|
// Warning: this generates the same permutation at each call
|
|
pub fn generate_permutation_gcd() -> Vec<u64> {
|
|
let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
|
|
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
|
|
permutation
|
|
}
|
|
|
|
fn test_intfastfield_permutation_with_data(permutation: Vec<u64>) {
|
|
let path = Path::new("test");
|
|
let n = permutation.len();
|
|
let directory = RamDirectory::create();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
|
for &x in &permutation {
|
|
fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
|
let col = fast_field_readers
|
|
.u64("field")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
for a in 0..n {
|
|
assert_eq!(col.get_val(a as u32), permutation[a]);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_permutation_gcd() {
|
|
let permutation = generate_permutation_gcd();
|
|
test_intfastfield_permutation_with_data(permutation);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_permutation() {
|
|
let permutation = generate_permutation();
|
|
test_intfastfield_permutation_with_data(permutation);
|
|
}
|
|
|
|
#[test]
|
|
fn test_merge_missing_date_fast_field() {
|
|
let mut schema_builder = Schema::builder();
|
|
let date_field = schema_builder.add_date_field("date", FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
|
index_writer
|
|
.add_document(doc!(date_field => DateTime::from_utc(OffsetDateTime::now_utc())))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
index_writer.add_document(doc!()).unwrap();
|
|
index_writer.commit().unwrap();
|
|
let reader = index.reader().unwrap();
|
|
let segment_ids: Vec<SegmentId> = reader
|
|
.searcher()
|
|
.segment_readers()
|
|
.iter()
|
|
.map(SegmentReader::segment_id)
|
|
.collect();
|
|
assert_eq!(segment_ids.len(), 2);
|
|
index_writer.merge(&segment_ids[..]).wait().unwrap();
|
|
reader.reload().unwrap();
|
|
assert_eq!(reader.searcher().segment_readers().len(), 1);
|
|
}
|
|
|
|
fn get_vals_for_docs(column: &Column<u64>, docs: Range<u32>) -> Vec<u64> {
|
|
docs.into_iter()
|
|
.flat_map(|doc| column.values_for_doc(doc))
|
|
.collect()
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_fastfield() {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT | FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
|
|
{
|
|
// first segment
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "BBBBB", // term ord 1
|
|
text_field => "AAAAA", // term ord 0
|
|
))
|
|
.unwrap();
|
|
index_writer.add_document(doc!()).unwrap();
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "AAAAA", // term_ord 0
|
|
))
|
|
.unwrap();
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "AAAAA",
|
|
text_field => "BBBBB",
|
|
))
|
|
.unwrap();
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
|
))
|
|
.unwrap();
|
|
|
|
index_writer.add_document(doc!()).unwrap();
|
|
index_writer.commit().unwrap();
|
|
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.segment_readers().len(), 1);
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let str_column = fast_fields.str("text").unwrap().unwrap();
|
|
assert!(str_column.ords().values_for_doc(0u32).eq([1, 0]),);
|
|
assert!(str_column.ords().values_for_doc(1u32).next().is_none());
|
|
assert!(str_column.ords().values_for_doc(2u32).eq([0]),);
|
|
assert!(str_column.ords().values_for_doc(3u32).eq([0, 1]),);
|
|
assert!(str_column.ords().values_for_doc(4u32).eq([2]),);
|
|
|
|
let mut str_term = String::default();
|
|
assert!(str_column.ord_to_str(0, &mut str_term).unwrap());
|
|
assert_eq!("AAAAA", &str_term);
|
|
|
|
let inverted_index = segment_reader.inverted_index(text_field).unwrap();
|
|
assert_eq!(inverted_index.terms().num_terms(), 3);
|
|
let mut bytes = vec![];
|
|
assert!(inverted_index.terms().ord_to_term(0, &mut bytes).unwrap());
|
|
assert_eq!(bytes, "aaaaa".as_bytes());
|
|
}
|
|
|
|
{
|
|
// second segment
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "AAAAA", // term_ord 0
|
|
))
|
|
.unwrap();
|
|
|
|
index_writer
|
|
.add_document(doc!(
|
|
text_field => "CCCCC AAAAA", // term_ord 1, after merge 2
|
|
))
|
|
.unwrap();
|
|
|
|
index_writer.add_document(doc!()).unwrap();
|
|
index_writer.commit().unwrap();
|
|
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.segment_readers().len(), 2);
|
|
let segment_reader = searcher.segment_reader(1);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let text_fast_field = fast_fields.str("text").unwrap().unwrap();
|
|
|
|
assert_eq!(&get_vals_for_docs(text_fast_field.ords(), 0..2), &[0, 1]);
|
|
}
|
|
|
|
// TODO uncomment once merging is available
|
|
// Merging the segments
|
|
{
|
|
let segment_ids = index.searchable_segment_ids().unwrap();
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer.merge(&segment_ids).wait().unwrap();
|
|
index_writer.wait_merging_threads().unwrap();
|
|
}
|
|
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let text_column = fast_fields.str("text").unwrap().unwrap();
|
|
|
|
assert_eq!(
|
|
get_vals_for_docs(text_column.ords(), 0..8),
|
|
vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_string_fastfield_simple() {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT | FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer.add_document(doc!(text_field=>"hello happy tax payer", text_field=>"aaa this string comes lexicographically before the other one.")).unwrap();
|
|
writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let str_column = segment_reader.fast_fields().str("text").unwrap().unwrap();
|
|
// The string values are not sorted here.
|
|
let term_ords: Vec<u64> = str_column.term_ords(0u32).collect();
|
|
assert_eq!(&term_ords, &[1, 0]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_facet_fastfield_simple() {
|
|
let mut schema_builder = Schema::builder();
|
|
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer
|
|
.add_document(doc!(facet_field=>Facet::from("/a/2"), facet_field=>Facet::from("/a/1")))
|
|
.unwrap();
|
|
writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let facet_reader = segment_reader.facet_reader("facet").unwrap();
|
|
// facets, contrary to strings are sorted.
|
|
let mut facet_ords = Vec::new();
|
|
facet_ords.extend(facet_reader.facet_ords(0u32));
|
|
assert_eq!(&facet_ords, &[0, 1]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_string_fastfield() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", STRING | FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
|
|
{
|
|
// first segment
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
|
index_writer.add_document(doc!(
|
|
text_field => "BBBBB", // term_ord 1
|
|
))?;
|
|
index_writer.add_document(doc!())?;
|
|
index_writer.add_document(doc!(
|
|
text_field => "AAAAA", // term_ord 0
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
text_field => "AAAAA", // term_ord 0
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
|
))?;
|
|
|
|
index_writer.add_document(doc!())?;
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.segment_readers().len(), 1);
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let text_col = fast_fields.str("text").unwrap().unwrap();
|
|
|
|
assert_eq!(get_vals_for_docs(text_col.ords(), 0..6), vec![1, 0, 0, 2]);
|
|
|
|
let inverted_index = segment_reader.inverted_index(text_field)?;
|
|
assert_eq!(inverted_index.terms().num_terms(), 3);
|
|
let mut bytes = vec![];
|
|
assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
|
|
assert_eq!(bytes, "AAAAA".as_bytes());
|
|
}
|
|
|
|
{
|
|
// second segment
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
|
|
index_writer.add_document(doc!(
|
|
text_field => "AAAAA", // term_ord 0
|
|
))?;
|
|
|
|
index_writer.add_document(doc!(
|
|
text_field => "CCCCC", // term_ord 1, after merge 2
|
|
))?;
|
|
|
|
index_writer.add_document(doc!())?;
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.segment_readers().len(), 2);
|
|
let segment_reader = searcher.segment_reader(1);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let text_fast_field = fast_fields.str("text").unwrap().unwrap();
|
|
|
|
assert_eq!(&get_vals_for_docs(text_fast_field.ords(), 0..2), &[0, 1]);
|
|
}
|
|
// Merging the segments
|
|
{
|
|
let segment_ids = index.searchable_segment_ids()?;
|
|
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
|
index_writer.merge(&segment_ids).wait()?;
|
|
index_writer.wait_merging_threads()?;
|
|
}
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let text_fast_field = fast_fields.str("text").unwrap().unwrap();
|
|
|
|
assert_eq!(
|
|
get_vals_for_docs(text_fast_field.ords(), 0..9),
|
|
vec![1, 0, 0, 3 /* next segment */, 0, 2]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_datefastfield() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let date_field = schema_builder.add_date_field(
|
|
"date",
|
|
DateOptions::from(FAST).set_precision(DateTimePrecision::Nanoseconds),
|
|
);
|
|
let multi_date_field = schema_builder.add_date_field(
|
|
"multi_date",
|
|
DateOptions::default()
|
|
.set_precision(DateTimePrecision::Nanoseconds)
|
|
.set_fast(),
|
|
);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_u64(1i64.to_u64()),
|
|
multi_date_field => DateTime::from_u64(2i64.to_u64()),
|
|
multi_date_field => DateTime::from_u64(3i64.to_u64())
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_u64(4i64.to_u64())
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
multi_date_field => DateTime::from_u64(5i64.to_u64()),
|
|
multi_date_field => DateTime::from_u64(6i64.to_u64())
|
|
))?;
|
|
index_writer.commit()?;
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.segment_readers().len(), 1);
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let date_fast_field = fast_fields
|
|
.column_opt::<DateTime>("date")
|
|
.unwrap()
|
|
.unwrap()
|
|
.first_or_default_col(Default::default());
|
|
let dates_fast_field = fast_fields
|
|
.column_opt::<DateTime>("multi_date")
|
|
.unwrap()
|
|
.unwrap();
|
|
let mut dates = Vec::new();
|
|
{
|
|
assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64);
|
|
dates_fast_field.fill_vals(0u32, &mut dates);
|
|
assert_eq!(dates.len(), 2);
|
|
assert_eq!(dates[0].into_timestamp_nanos(), 2i64);
|
|
assert_eq!(dates[1].into_timestamp_nanos(), 3i64);
|
|
}
|
|
{
|
|
assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64);
|
|
dates_fast_field.fill_vals(1u32, &mut dates);
|
|
assert!(dates.is_empty());
|
|
}
|
|
{
|
|
assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64);
|
|
dates_fast_field.fill_vals(2u32, &mut dates);
|
|
assert_eq!(dates.len(), 2);
|
|
assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
|
|
assert_eq!(dates[1].into_timestamp_nanos(), 6i64);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_fastfield_bool_small() {
|
|
let path = Path::new("test_bool");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_bool_field("field_bool", FAST);
|
|
let schema = schema_builder.build();
|
|
let field = schema.get_field("field_bool").unwrap();
|
|
|
|
{
|
|
let mut write: WritePtr = directory.open_write(path).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(field=>false))
|
|
.unwrap();
|
|
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(field=>false))
|
|
.unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 84);
|
|
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let bool_col = fast_field_readers.bool("field_bool").unwrap();
|
|
assert_eq!(bool_col.first(0), Some(true));
|
|
assert_eq!(bool_col.first(1), Some(false));
|
|
assert_eq!(bool_col.first(2), Some(true));
|
|
assert_eq!(bool_col.first(3), Some(false));
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_fastfield_bool_large() {
|
|
let path = Path::new("test_bool");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_bool_field("field_bool", FAST);
|
|
let schema = schema_builder.build();
|
|
let field = schema.get_field("field_bool").unwrap();
|
|
|
|
{
|
|
let mut write: WritePtr = directory.open_write(path).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
for _ in 0..50 {
|
|
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
|
fast_field_writers
|
|
.add_document(&doc!(field=>false))
|
|
.unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 96);
|
|
let readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let bool_col = readers.bool("field_bool").unwrap();
|
|
for i in 0..25 {
|
|
assert_eq!(bool_col.first(i * 2), Some(true));
|
|
assert_eq!(bool_col.first(i * 2 + 1), Some(false));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_fastfield_bool_default_value() {
|
|
let path = Path::new("test_bool");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_bool_field("field_bool", FAST);
|
|
let schema = schema_builder.build();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(path).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
|
let doc = TantivyDocument::default();
|
|
fast_field_writers.add_document(&doc).unwrap();
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
let file = directory.open_read(path).unwrap();
|
|
assert_eq!(file.len(), 86);
|
|
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let col = fastfield_readers.bool("field_bool").unwrap();
|
|
assert_eq!(col.first(0), None);
|
|
let col = fastfield_readers
|
|
.bool("field_bool")
|
|
.unwrap()
|
|
.first_or_default_col(false);
|
|
assert_eq!(col.get_val(0), false);
|
|
let col = fastfield_readers
|
|
.bool("field_bool")
|
|
.unwrap()
|
|
.first_or_default_col(true);
|
|
assert_eq!(col.get_val(0), true);
|
|
}
|
|
|
|
fn get_index(docs: &[crate::TantivyDocument], schema: &Schema) -> crate::Result<RamDirectory> {
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
{
|
|
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut fast_field_writers = FastFieldsWriter::from_schema(schema).unwrap();
|
|
for doc in docs {
|
|
fast_field_writers.add_document(doc).unwrap();
|
|
}
|
|
fast_field_writers.serialize(&mut write).unwrap();
|
|
write.terminate().unwrap();
|
|
}
|
|
Ok(directory)
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_gcd_date() {
|
|
let size_prec_sec = test_gcd_date_with_codec(DateTimePrecision::Seconds);
|
|
assert!((1000 * 13 / 8..100 + 1000 * 13 / 8).contains(&size_prec_sec.get_bytes())); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
|
|
let size_prec_micros = test_gcd_date_with_codec(DateTimePrecision::Microseconds);
|
|
assert!((1000 * 33 / 8..100 + 1000 * 33 / 8).contains(&size_prec_micros.get_bytes()));
|
|
// 33 bits per
|
|
// val = ceil(log_2(number
|
|
// of microsecsseconds
|
|
// in 2hours);
|
|
}
|
|
|
|
fn test_gcd_date_with_codec(precision: DateTimePrecision) -> ByteCount {
|
|
let mut rng = StdRng::seed_from_u64(2u64);
|
|
const T0: i64 = 1_662_345_825_012_529i64;
|
|
const ONE_HOUR_IN_MICROSECS: i64 = 3_600 * 1_000_000;
|
|
let times: Vec<DateTime> = std::iter::repeat_with(|| {
|
|
// +- One hour.
|
|
let t = T0 + rng.gen_range(-ONE_HOUR_IN_MICROSECS..ONE_HOUR_IN_MICROSECS);
|
|
DateTime::from_timestamp_micros(t)
|
|
})
|
|
.take(1_000)
|
|
.collect();
|
|
let date_options = DateOptions::default().set_fast().set_precision(precision);
|
|
let mut schema_builder = SchemaBuilder::default();
|
|
let field = schema_builder.add_date_field("field", date_options);
|
|
let schema = schema_builder.build();
|
|
|
|
let docs: Vec<TantivyDocument> = times.iter().map(|time| doc!(field=>*time)).collect();
|
|
|
|
let directory = get_index(&docs[..], &schema).unwrap();
|
|
let path = Path::new("test");
|
|
let file = directory.open_read(path).unwrap();
|
|
let readers = FastFieldReaders::open(file, schema).unwrap();
|
|
let col = readers.date("field").unwrap();
|
|
|
|
for (i, time) in times.iter().enumerate() {
|
|
let dt: DateTime = col.first(i as u32).unwrap();
|
|
assert_eq!(dt, time.truncate(precision));
|
|
}
|
|
readers.column_num_bytes("field").unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn test_gcd_bug_regression_1757() {
|
|
let mut schema_builder = Schema::builder();
|
|
let num_field = schema_builder.add_u64_field("url_norm_hash", FAST | INDEXED);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
{
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 100u64,
|
|
})
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 200u64,
|
|
})
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 300u64,
|
|
})
|
|
.unwrap();
|
|
|
|
writer.commit().unwrap();
|
|
}
|
|
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
let segment = &searcher.segment_readers()[0];
|
|
let field = segment
|
|
.fast_fields()
|
|
.u64("url_norm_hash")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
|
|
let numbers = [100, 200, 300];
|
|
let test_range = |range: RangeInclusive<u64>| {
|
|
let expected_count = numbers.iter().filter(|num| range.contains(*num)).count();
|
|
let mut vec = vec![];
|
|
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
|
assert_eq!(vec.len(), expected_count);
|
|
};
|
|
test_range(50..=50);
|
|
test_range(150..=150);
|
|
test_range(350..=350);
|
|
test_range(100..=250);
|
|
test_range(101..=200);
|
|
test_range(101..=199);
|
|
test_range(100..=300);
|
|
test_range(100..=299);
|
|
}
|
|
|
|
#[test]
|
|
fn test_ip_addr_columnar_simple() {
|
|
let mut schema_builder = Schema::builder();
|
|
let ip_field = schema_builder.add_u64_field("ip", FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
let ip_addr = Ipv6Addr::new(1, 2, 3, 4, 5, 1, 2, 3);
|
|
index_writer
|
|
.add_document(TantivyDocument::default())
|
|
.unwrap();
|
|
index_writer.add_document(doc!(ip_field=>ip_addr)).unwrap();
|
|
index_writer
|
|
.add_document(TantivyDocument::default())
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fastfields = searcher.segment_reader(0u32).fast_fields();
|
|
let column: Column<Ipv6Addr> = fastfields.column_opt("ip").unwrap().unwrap();
|
|
assert_eq!(column.num_docs(), 3);
|
|
assert_eq!(column.first(0), None);
|
|
assert_eq!(column.first(1), Some(ip_addr));
|
|
assert_eq!(column.first(2), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mapping_bug_docids_for_value_range() {
|
|
let mut schema_builder = Schema::builder();
|
|
let num_field = schema_builder.add_u64_field("url_norm_hash", FAST | INDEXED);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
{
|
|
// Values without gcd, but with min_value
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 1000u64,
|
|
})
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 1001u64,
|
|
})
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc! {
|
|
num_field => 1003u64,
|
|
})
|
|
.unwrap();
|
|
writer.commit().unwrap();
|
|
}
|
|
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
let segment = &searcher.segment_readers()[0];
|
|
let field = segment
|
|
.fast_fields()
|
|
.u64("url_norm_hash")
|
|
.unwrap()
|
|
.first_or_default_col(0);
|
|
|
|
let numbers = [1000, 1001, 1003];
|
|
let test_range = |range: RangeInclusive<u64>| {
|
|
let expected_count = numbers.iter().filter(|num| range.contains(*num)).count();
|
|
let mut vec = vec![];
|
|
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
|
assert_eq!(vec.len(), expected_count);
|
|
};
|
|
let test_range_variant = |start, stop| {
|
|
let start_range = start..=stop;
|
|
test_range(start_range);
|
|
let start_range = start..=(stop - 1);
|
|
test_range(start_range);
|
|
let start_range = start..=(stop + 1);
|
|
test_range(start_range);
|
|
let start_range = (start - 1)..=stop;
|
|
test_range(start_range);
|
|
let start_range = (start - 1)..=(stop - 1);
|
|
test_range(start_range);
|
|
let start_range = (start - 1)..=(stop + 1);
|
|
test_range(start_range);
|
|
let start_range = (start + 1)..=stop;
|
|
test_range(start_range);
|
|
let start_range = (start + 1)..=(stop - 1);
|
|
test_range(start_range);
|
|
let start_range = (start + 1)..=(stop + 1);
|
|
test_range(start_range);
|
|
};
|
|
test_range_variant(50, 50);
|
|
test_range_variant(1000, 1000);
|
|
test_range_variant(1000, 1002);
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_object_fast_field() {
|
|
let mut schema_builder = Schema::builder();
|
|
let without_fast_field = schema_builder.add_json_field("without", STORED);
|
|
let with_fast_field = schema_builder.add_json_field("with", STORED | FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer
|
|
.add_document(doc!(without_fast_field=>json!({"hello": "without"})))
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc!(with_fast_field=>json!({"hello": "with"})))
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc!(with_fast_field=>json!({"hello": "with2"})))
|
|
.unwrap();
|
|
writer
|
|
.add_document(doc!(with_fast_field=>json!({"hello": "with1"})))
|
|
.unwrap();
|
|
writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let segment_reader = searcher.segment_reader(0u32);
|
|
let fast_fields = segment_reader.fast_fields();
|
|
let column_without_opt = fast_fields.str("without.hello");
|
|
assert!(column_without_opt.is_err());
|
|
let column_with_opt: Option<StrColumn> = fast_fields.str("with.hello").unwrap();
|
|
let column_with: StrColumn = column_with_opt.unwrap();
|
|
assert!(column_with.term_ords(0).next().is_none());
|
|
assert!(column_with.term_ords(1).eq([0]));
|
|
assert!(column_with.term_ords(2).eq([2]));
|
|
assert!(column_with.term_ords(3).eq([1]));
|
|
}
|
|
|
|
#[test]
|
|
fn test_fast_field_in_json_field_expand_dots_disabled() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_option = JsonObjectOptions::default().set_fast(None);
|
|
let json = schema_builder.add_json_field("json", json_option);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(json => json!({"attr.age": 32})))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
assert!(fast_field_reader
|
|
.column_opt::<i64>("json.attr.age")
|
|
.unwrap()
|
|
.is_none());
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>(r"json.attr\.age")
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[32])
|
|
}
|
|
|
|
#[test]
|
|
fn test_fast_field_in_json_field_with_tokenizer() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_option = JsonObjectOptions::default().set_fast(Some("default"));
|
|
let json = schema_builder.add_json_field("json", json_option);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(json => json!({"age": 32})))
|
|
.unwrap();
|
|
index_writer
|
|
.add_document(doc!(json => json!({"age": "NEW"})))
|
|
.unwrap();
|
|
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_fields = searcher.segment_reader(0u32).fast_fields();
|
|
|
|
let ff_str = fast_fields.str("json.age").unwrap().unwrap();
|
|
let mut output = String::new();
|
|
ff_str.ord_to_str(0, &mut output).unwrap();
|
|
assert_eq!(output, "new");
|
|
}
|
|
|
|
#[test]
|
|
fn test_fast_field_in_json_field_expand_dots_enabled() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_option = JsonObjectOptions::default()
|
|
.set_fast(None)
|
|
.set_expand_dots_enabled();
|
|
let json = schema_builder.add_json_field("json", json_option);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(json => json!({"attr.age": 32})))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
for test_column_name in &["json.attr.age", "json.attr\\.age"] {
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>(test_column_name)
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[32]);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_fast_field_dot_in_schema_field_name() {
|
|
let mut schema_builder = Schema::builder();
|
|
let field_with_dot = schema_builder.add_i64_field("field.with.dot", FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(field_with_dot => 32i64))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>("field.with.dot")
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[32]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_shadowing_fast_field() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_field = schema_builder.add_json_field("jsonfield", FAST);
|
|
let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", FAST);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(json_field=> json!({"attr": {"age": 32}}), shadowing_json_field=>json!({"age": 33})))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>("jsonfield.attr.age")
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[33]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_fast_field_tokenizer() {
|
|
let mut schema_builder = Schema::builder();
|
|
let opt = TextOptions::default().set_fast(Some("custom_lowercase"));
|
|
let text_field = schema_builder.add_text_field("text", opt);
|
|
let schema = schema_builder.build();
|
|
let ff_tokenizer_manager = TokenizerManager::default();
|
|
ff_tokenizer_manager.register(
|
|
"custom_lowercase",
|
|
TextAnalyzer::builder(RawTokenizer::default())
|
|
.filter(LowerCaser)
|
|
.build(),
|
|
);
|
|
|
|
let mut index = Index::create_in_ram(schema);
|
|
index.set_fast_field_tokenizers(ff_tokenizer_manager);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(text_field => "Test1 test2"))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
let column = fast_field_reader.str("text").unwrap().unwrap();
|
|
let mut out = String::new();
|
|
column.ord_to_str(0u64, &mut out).unwrap();
|
|
assert_eq!(&out, "test1 test2");
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_fast_field_tokenizer() {
|
|
let mut schema_builder = Schema::builder();
|
|
|
|
let text_fieldtype = crate::schema::TextOptions::default()
|
|
.set_indexing_options(
|
|
crate::schema::TextFieldIndexing::default()
|
|
.set_index_option(crate::schema::IndexRecordOption::WithFreqs)
|
|
.set_tokenizer("raw"),
|
|
)
|
|
.set_fast(Some("default"))
|
|
.set_stored();
|
|
|
|
let log_field = schema_builder.add_text_field("log_level", text_fieldtype);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(log_field => "info"))
|
|
.unwrap();
|
|
index_writer
|
|
.add_document(doc!(log_field => "INFO"))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
|
|
let text_fast_field = fast_field_reader.str("log_level").unwrap().unwrap();
|
|
let mut buffer = String::new();
|
|
assert!(text_fast_field.ord_to_str(0, &mut buffer).unwrap());
|
|
assert_eq!(buffer, "info");
|
|
assert!(!text_fast_field.ord_to_str(1, &mut buffer).unwrap());
|
|
|
|
assert!(text_fast_field.term_ords(0).eq([0].into_iter()));
|
|
assert!(text_fast_field.term_ords(1).eq([0].into_iter()));
|
|
assert!(text_fast_field.ords().values_for_doc(0u32).eq([0]));
|
|
assert!(text_fast_field.ords().values_for_doc(1u32).eq([0]));
|
|
}
|
|
|
|
#[test]
|
|
fn test_shadowing_fast_field_with_expand_dots() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_option = JsonObjectOptions::default()
|
|
.set_fast(None)
|
|
.set_expand_dots_enabled();
|
|
let json_field = schema_builder.add_json_field("jsonfield", json_option.clone());
|
|
let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", json_option);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
|
index_writer
|
|
.add_document(doc!(json_field=> json!({"attr.age": 32}), shadowing_json_field=>json!({"age": 33})))
|
|
.unwrap();
|
|
index_writer.commit().unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
|
// Supported for now, maybe dropped in the future.
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>("jsonfield.attr.age")
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[33]);
|
|
let column = fast_field_reader
|
|
.column_opt::<i64>("jsonfield\\.attr.age")
|
|
.unwrap()
|
|
.unwrap();
|
|
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
|
assert_eq!(&vals, &[33]);
|
|
}
|
|
}
|