This commit is contained in:
Paul Masurel
2023-01-18 15:12:19 +09:00
parent 7587656f1e
commit 58eec2c214
31 changed files with 642 additions and 718 deletions

View File

@@ -26,6 +26,7 @@ Add alignment?
Consider another codec to bridge the gap between few and 5k elements
# Cleanup and rationalization
remove the 6 bit limitation of columntype. use 4 + 4 bits instead.
in benchmark, unify percent vs ratio, f32 vs f64.
investigate if should have better errors? io::Error is overused at the moment.
rename rank/select in unit tests

View File

@@ -20,7 +20,7 @@ pub struct Column<T> {
use crate::column_index::Set;
impl<T: PartialOrd> Column<T> {
impl<T: PartialOrd + Copy + Send + Sync + 'static> Column<T> {
pub fn first(&self, row_id: RowId) -> Option<T> {
match &self.idx {
ColumnIndex::Full => Some(self.values.get_val(row_id)),
@@ -33,6 +33,13 @@ impl<T: PartialOrd> Column<T> {
}
}
}
pub fn first_or_default_col(self, default_value: T) -> Arc<dyn ColumnValues<T>> {
Arc::new(FirstValueWithDefault {
column: self,
default_value,
})
}
}
impl<T> Deref for Column<T> {
@@ -54,3 +61,27 @@ impl BinarySerializable for Cardinality {
Ok(cardinality)
}
}
// TODO simplify or optimize
struct FirstValueWithDefault<T: Copy> {
column: Column<T>,
default_value: T,
}
impl<T: PartialOrd + Send + Sync + Copy + 'static> ColumnValues<T> for FirstValueWithDefault<T> {
fn get_val(&self, idx: u32) -> T {
self.column.first(idx).unwrap_or(self.default_value)
}
fn min_value(&self) -> T {
self.column.values.min_value()
}
fn max_value(&self) -> T {
self.column.values.max_value()
}
fn num_vals(&self) -> u32 {
self.column.idx.num_rows()
}
}

View File

@@ -194,6 +194,20 @@ impl MonotonicallyMappableToU64 for i64 {
}
}
impl MonotonicallyMappableToU64 for crate::DateTime {
#[inline(always)]
fn to_u64(self) -> u64 {
common::i64_to_u64(self.timestamp_micros)
}
#[inline(always)]
fn from_u64(val: u64) -> Self {
crate::DateTime {
timestamp_micros: common::u64_to_i64(val),
}
}
}
impl MonotonicallyMappableToU64 for bool {
#[inline(always)]
fn to_u64(self) -> u64 {

View File

@@ -11,6 +11,7 @@ pub enum ColumnType {
Bytes,
Numerical(NumericalType),
Bool,
DateTime,
}
impl ColumnType {
@@ -31,6 +32,10 @@ impl ColumnType {
column_type_category = ColumnTypeCategory::Bool;
numerical_type_code = 0u8;
}
ColumnType::DateTime => {
column_type_category = ColumnTypeCategory::DateTime;
numerical_type_code = 0u8;
}
}
place_bits::<0, 3>(column_type_category.to_code()) | place_bits::<3, 6>(numerical_type_code)
}
@@ -59,10 +64,50 @@ impl ColumnType {
let numerical_type = NumericalType::try_from_code(numerical_type_code)?;
Ok(ColumnType::Numerical(numerical_type))
}
ColumnTypeCategory::DateTime => {
if numerical_type_code != 0u8 {
return Err(InvalidData);
}
Ok(ColumnType::DateTime)
}
}
}
}
pub trait HasAssociatedColumnType: 'static + Send + Sync + Copy + PartialOrd {
fn column_type() -> ColumnType;
}
impl HasAssociatedColumnType for u64 {
fn column_type() -> ColumnType {
ColumnType::Numerical(NumericalType::U64)
}
}
impl HasAssociatedColumnType for i64 {
fn column_type() -> ColumnType {
ColumnType::Numerical(NumericalType::I64)
}
}
impl HasAssociatedColumnType for f64 {
fn column_type() -> ColumnType {
ColumnType::Numerical(NumericalType::F64)
}
}
impl HasAssociatedColumnType for bool {
fn column_type() -> ColumnType {
ColumnType::Bool
}
}
impl HasAssociatedColumnType for crate::DateTime {
fn column_type() -> ColumnType {
ColumnType::DateTime
}
}
/// Column types are grouped into different categories that
/// corresponds to the different types of `JsonValue` types.
///
@@ -76,6 +121,7 @@ pub(crate) enum ColumnTypeCategory {
Bool = 0u8,
Str = 1u8,
Numerical = 2u8,
DateTime = 3u8,
}
impl ColumnTypeCategory {
@@ -88,6 +134,7 @@ impl ColumnTypeCategory {
0u8 => Ok(Self::Bool),
1u8 => Ok(Self::Str),
2u8 => Ok(Self::Numerical),
3u8 => Ok(Self::Numerical),
_ => Err(InvalidData),
}
}

View File

@@ -23,6 +23,6 @@ mod format_version;
mod reader;
mod writer;
pub use column_type::ColumnType;
pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use reader::ColumnarReader;
pub use writer::ColumnarWriter;

View File

@@ -85,13 +85,12 @@ fn mutate_or_create_column<V, TMutator>(
}
impl ColumnarWriter {
pub fn mem_usage(&self) -> usize {
// TODO add dictionary builders.
self.arena.mem_usage() +
self.numerical_field_hash_map.mem_usage() +
self.bool_field_hash_map.mem_usage() +
self.bytes_field_hash_map.mem_usage()
self.arena.mem_usage()
+ self.numerical_field_hash_map.mem_usage()
+ self.bool_field_hash_map.mem_usage()
+ self.bytes_field_hash_map.mem_usage()
}
pub fn force_numerical_type(&mut self, column_name: &str, numerical_type: NumericalType) {
@@ -223,6 +222,22 @@ impl ColumnarWriter {
&mut column_serializer,
)?;
}
ColumnTypeCategory::DateTime => {
let numerical_column_writer: NumericalColumnWriter =
self.numerical_field_hash_map.read(addr);
let (_numerical_type, cardinality) =
numerical_column_writer.column_type_and_cardinality(num_docs);
let mut column_serializer =
serializer.serialize_column(column_name, ColumnType::DateTime);
serialize_numerical_column(
cardinality,
num_docs,
NumericalType::I64,
numerical_column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
buffers,
&mut column_serializer,
)?;
}
};
}
serializer.finalize()?;

View File

@@ -6,7 +6,6 @@ use common::{HasLen, OwnedBytes};
use crate::column::{BytesColumn, Column};
use crate::columnar::ColumnType;
use crate::DateTime;
#[derive(Clone)]
pub enum DynamicColumn {
@@ -15,33 +14,35 @@ pub enum DynamicColumn {
U64(Column<u64>),
F64(Column<f64>),
IpAddr(Column<IpAddr>),
DateTime(Column<DateTime>),
Str(BytesColumn),
DateTime(Column<crate::DateTime>),
}
impl From<Column<i64>> for DynamicColumn {
fn from(column_i64: Column<i64>) -> Self {
DynamicColumn::I64(column_i64)
}
macro_rules! static_dynamic_conversions {
($typ:ty, $enum_name:ident) => {
impl Into<Option<Column<$typ>>> for DynamicColumn {
fn into(self) -> Option<Column<$typ>> {
if let Self::$enum_name(col) = self {
Some(col)
} else {
None
}
}
}
impl From<Column<$typ>> for DynamicColumn {
fn from(typed_column: Column<$typ>) -> Self {
DynamicColumn::$enum_name(typed_column)
}
}
};
}
impl From<Column<u64>> for DynamicColumn {
fn from(column_u64: Column<u64>) -> Self {
DynamicColumn::U64(column_u64)
}
}
impl From<Column<f64>> for DynamicColumn {
fn from(column_f64: Column<f64>) -> Self {
DynamicColumn::F64(column_f64)
}
}
impl From<Column<bool>> for DynamicColumn {
fn from(bool_column: Column<bool>) -> Self {
DynamicColumn::Bool(bool_column)
}
}
static_dynamic_conversions!(bool, Bool);
static_dynamic_conversions!(u64, U64);
static_dynamic_conversions!(i64, I64);
static_dynamic_conversions!(f64, F64);
static_dynamic_conversions!(crate::DateTime, DateTime);
impl From<BytesColumn> for DynamicColumn {
fn from(dictionary_encoded_col: BytesColumn) -> Self {
@@ -56,11 +57,13 @@ pub struct DynamicColumnHandle {
}
impl DynamicColumnHandle {
// TODO rename load
pub fn open(&self) -> io::Result<DynamicColumn> {
let column_bytes: OwnedBytes = self.file_slice.read_bytes()?;
self.open_internal(column_bytes)
}
// TODO rename load_async
pub async fn open_async(&self) -> io::Result<DynamicColumn> {
let column_bytes: OwnedBytes = self.file_slice.read_bytes_async().await?;
self.open_internal(column_bytes)
@@ -81,6 +84,9 @@ impl DynamicColumnHandle {
}
},
ColumnType::Bool => crate::column::open_column_u64::<bool>(column_bytes)?.into(),
ColumnType::DateTime => {
crate::column::open_column_u64::<crate::DateTime>(column_bytes)?.into()
}
};
Ok(dynamic_column)
}

View File

@@ -18,16 +18,18 @@ mod dynamic_column;
pub(crate) mod utils;
mod value;
pub use columnar::{ColumnarReader, ColumnarWriter};
pub use column::Column;
pub use column_values::ColumnValues;
pub use columnar::{ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType};
pub use value::{NumericalType, NumericalValue};
// pub use self::dynamic_column::DynamicColumnHandle;
pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
pub type RowId = u32;
#[derive(Clone, Copy)]
#[derive(Clone, Copy, PartialOrd, PartialEq, Default)]
pub struct DateTime {
timestamp_micros: i64,
pub timestamp_micros: i64,
}
#[derive(Copy, Clone, Debug)]

View File

@@ -1,4 +1,4 @@
use crate::InvalidData;
use crate::{Column, ColumnType, InvalidData};
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum NumericalValue {

View File

@@ -14,6 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
[dependencies]
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
columnar = { version= "0.1", path="../columnar", package="tantivy-columnar" }
prettytable-rs = {version="0.10.0", optional= true}
rand = {version="0.8.3", optional= true}
fastdivide = "0.4"

View File

@@ -2,81 +2,11 @@ use std::fmt::{self, Debug};
use std::marker::PhantomData;
use std::ops::{Range, RangeInclusive};
pub use columnar::ColumnValues as Column;
use tantivy_bitpacker::minmax;
use crate::monotonic_mapping::StrictlyMonotonicFn;
/// `Column` provides columnar access on a field.
pub trait Column<T: PartialOrd + Debug = u64>: Send + Sync {
/// Return the value associated with the given idx.
///
/// This accessor should return as fast as possible.
///
/// # Panics
///
/// May panic if `idx` is greater than the column length.
fn get_val(&self, idx: u32) -> T;
/// Fills an output buffer with the fast field values
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
///
/// # Panics
///
/// Must panic if `start + output.len()` is greater than
/// the segment's `maxdoc`.
#[inline]
fn get_range(&self, start: u64, output: &mut [T]) {
for (out, idx) in output.iter_mut().zip(start..) {
*out = self.get_val(idx as u32);
}
}
/// Get the positions of values which are in the provided value range.
///
/// Note that position == docid for single value fast fields
#[inline]
fn get_docids_for_value_range(
&self,
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
positions: &mut Vec<u32>,
) {
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
for idx in doc_id_range.start..doc_id_range.end {
let val = self.get_val(idx);
if value_range.contains(&val) {
positions.push(idx);
}
}
}
/// Returns the minimum value for this fast field.
///
/// This min_value may not be exact.
/// For instance, the min value does not take in account of possible
/// deleted document. All values are however guaranteed to be higher than
/// `.min_value()`.
fn min_value(&self) -> T;
/// Returns the maximum value for this fast field.
///
/// This max_value may not be exact.
/// For instance, the max value does not take in account of possible
/// deleted document. All values are however guaranteed to be higher than
/// `.max_value()`.
fn max_value(&self) -> T;
/// The number of values in the column.
fn num_vals(&self) -> u32;
/// Returns a iterator over the data
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
}
}
/// VecColumn provides `Column` over a slice.
pub struct VecColumn<'a, T = u64> {
values: &'a [T],
@@ -84,32 +14,6 @@ pub struct VecColumn<'a, T = u64> {
max_value: T,
}
impl<'a, C: Column<T>, T: Copy + PartialOrd + fmt::Debug> Column<T> for &'a C {
fn get_val(&self, idx: u32) -> T {
(*self).get_val(idx)
}
fn min_value(&self) -> T {
(*self).min_value()
}
fn max_value(&self) -> T {
(*self).max_value()
}
fn num_vals(&self) -> u32 {
(*self).num_vals()
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
(*self).iter()
}
fn get_range(&self, start: u64, output: &mut [T]) {
(*self).get_range(start, output)
}
}
impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> Column<T> for VecColumn<'a, T> {
fn get_val(&self, position: u32) -> T {
self.values[position as usize]

View File

@@ -12,10 +12,10 @@
use std::marker::PhantomData;
use std::sync::Arc;
use columnar::{DynamicColumn, HasAssociatedColumnType};
use fastfield_codecs::Column;
use crate::collector::{Collector, SegmentCollector};
use crate::fastfield::FastValue;
use crate::schema::Field;
use crate::{Score, SegmentReader, TantivyError};
@@ -61,7 +61,7 @@ use crate::{Score, SegmentReader, TantivyError};
/// # Ok(())
/// # }
/// ```
pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: FastValue>
pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: Default>
where TPredicate: 'static + Clone
{
field: Field,
@@ -70,7 +70,7 @@ where TPredicate: 'static + Clone
t_predicate_value: PhantomData<TPredicateValue>,
}
impl<TCollector, TPredicate, TPredicateValue: FastValue>
impl<TCollector, TPredicate, TPredicateValue: Default>
FilterCollector<TCollector, TPredicate, TPredicateValue>
where
TCollector: Collector + Send + Sync,
@@ -91,12 +91,13 @@ where
}
}
impl<TCollector, TPredicate, TPredicateValue: FastValue> Collector
impl<TCollector, TPredicate, TPredicateValue: Default> Collector
for FilterCollector<TCollector, TPredicate, TPredicateValue>
where
TCollector: Collector + Send + Sync,
TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync + Clone,
TPredicateValue: FastValue,
TPredicateValue: HasAssociatedColumnType,
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
{
// That's the type of our result.
// Our standard deviation will be a float.
@@ -117,20 +118,10 @@ where
field_entry.name()
)));
}
let requested_type = TPredicateValue::to_type();
let field_schema_type = field_entry.field_type().value_type();
if requested_type != field_schema_type {
return Err(TantivyError::SchemaError(format!(
"Field {:?} is of type {:?}!={:?}",
field_entry.name(),
requested_type,
field_schema_type
)));
}
let fast_field_reader = segment_reader
.fast_fields()
.typed_fast_field_reader(schema.get_field_name(self.field))?;
.typed_column_first_or_default(schema.get_field_name(self.field))?;
let segment_collector = self
.collector
@@ -159,7 +150,7 @@ where
pub struct FilterSegmentCollector<TSegmentCollector, TPredicate, TPredicateValue>
where
TPredicate: 'static,
TPredicateValue: FastValue,
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
{
fast_field_reader: Arc<dyn Column<TPredicateValue>>,
segment_collector: TSegmentCollector,
@@ -171,8 +162,9 @@ impl<TSegmentCollector, TPredicate, TPredicateValue> SegmentCollector
for FilterSegmentCollector<TSegmentCollector, TPredicate, TPredicateValue>
where
TSegmentCollector: SegmentCollector,
TPredicateValue: HasAssociatedColumnType,
TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync,
TPredicateValue: FastValue,
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
{
type Fruit = TSegmentCollector::Fruit;

View File

@@ -104,7 +104,6 @@ pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer};
mod tweak_score_top_collector;
pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker};
// mod facet_collector;
// pub use self::facet_collector::{FacetCollector, FacetCounts};
use crate::query::Weight;

View File

@@ -57,9 +57,10 @@ pub fn test_filter_collector() -> crate::Result<()> {
assert_eq!(filtered_top_docs.len(), 0);
fn date_filter(value: DateTime) -> bool {
(value.into_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
.whole_weeks()
fn date_filter(value: columnar::DateTime) -> bool {
(crate::DateTime::from(value).into_utc()
- OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
.whole_weeks()
> 0
}
@@ -164,7 +165,9 @@ pub struct FastFieldSegmentCollector {
impl FastFieldTestCollector {
pub fn for_field(field: impl ToString) -> FastFieldTestCollector {
FastFieldTestCollector { field: field.to_string() }
FastFieldTestCollector {
field: field.to_string(),
}
}
}

View File

@@ -154,10 +154,11 @@ impl CustomScorer<u64> for ScorerByField {
// mapping is monotonic, so it is sufficient to compute our top-K docs.
//
// The conversion will then happen only on the top-K docs.
let ff_reader = segment_reader
.fast_fields()
.typed_fast_field_reader(segment_reader.schema().get_field_name(self.field))?;
Ok(ScorerByFastFieldReader { ff_reader })
todo!();
// let ff_reader = segment_reader
// .fast_fields()
// .typed_column(&self.field)?;
// Ok(ScorerByFastFieldReader { ff_reader })
}
}

View File

@@ -153,8 +153,7 @@ impl SegmentReader {
let schema = segment.schema();
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
let fast_fields_readers =
Arc::new(FastFieldReaders::open(fast_fields_data)?);
let fast_fields_readers = Arc::new(FastFieldReaders::open(fast_fields_data)?);
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;

View File

@@ -27,7 +27,6 @@ pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveB
// pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::error::{FastFieldNotAvailableError, Result};
// pub use self::facet_reader::FacetReader;
pub use self::readers::FastFieldReaders;
pub use self::serializer::{Column, CompositeFastFieldSerializer};
use self::writer::unexpected_value;
@@ -171,9 +170,7 @@ mod tests {
use super::*;
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::merge_policy::NoMergePolicy;
use crate::schema::{
Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT,
};
use crate::schema::{Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT};
use crate::time::OffsetDateTime;
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
@@ -184,7 +181,6 @@ mod tests {
});
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
#[test]
pub fn test_convert_i64_u64() {
let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH);
@@ -207,27 +203,25 @@ mod tests {
fast_field_writers
.add_document(&doc!(*FIELD=>2u64))
.unwrap();
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 164);
let fast_field_readers = FastFieldReaders::open(file).unwrap();
// let column = fast_field_readers.u64("field").unwrap();
// assert_eq!(column.get_val(0), 13u64);
// assert_eq!(column.get_val(1), 14u64);
// assert_eq!(column.get_val(2), 2u64);
let column = fast_field_readers.u64("field").unwrap();
assert_eq!(column.get_val(0), 13u64);
assert_eq!(column.get_val(1), 14u64);
assert_eq!(column.get_val(2), 2u64);
Ok(())
}
#[test]
fn test_intfastfield_large() -> crate::Result<()> {
fn test_intfastfield_large() {
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
{
let mut write: WritePtr = directory.open_write(Path::new("test"))?;
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
fast_field_writers
.add_document(&doc!(*FIELD=>4u64))
@@ -256,36 +250,28 @@ mod tests {
fast_field_writers
.add_document(&doc!(*FIELD=>215u64))
.unwrap();
fast_field_writers.serialize(&mut write, None)?;
write.terminate()?;
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path)?;
assert_eq!(file.len(), 62);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite
.open_read(*FIELD)
.unwrap()
.read_bytes()?;
let fast_field_reader = open::<u64>(data)?;
assert_eq!(fast_field_reader.get_val(0), 4u64);
assert_eq!(fast_field_reader.get_val(1), 14_082_001u64);
assert_eq!(fast_field_reader.get_val(2), 3_052u64);
assert_eq!(fast_field_reader.get_val(3), 9002u64);
assert_eq!(fast_field_reader.get_val(4), 15_001u64);
assert_eq!(fast_field_reader.get_val(5), 777u64);
assert_eq!(fast_field_reader.get_val(6), 1_002u64);
assert_eq!(fast_field_reader.get_val(7), 1_501u64);
assert_eq!(fast_field_reader.get_val(8), 215u64);
}
Ok(())
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 192);
let fast_field_readers = FastFieldReaders::open(file).unwrap();
let col = fast_field_readers.u64("field").unwrap();
assert_eq!(col.get_val(0), 4u64);
assert_eq!(col.get_val(1), 14_082_001u64);
assert_eq!(col.get_val(2), 3_052u64);
assert_eq!(col.get_val(3), 9002u64);
assert_eq!(col.get_val(4), 15_001u64);
assert_eq!(col.get_val(5), 777u64);
assert_eq!(col.get_val(6), 1_002u64);
assert_eq!(col.get_val(7), 1_501u64);
assert_eq!(col.get_val(8), 215u64);
}
#[test]
fn test_intfastfield_null_amplitude() -> crate::Result<()> {
fn test_intfastfield_null_amplitude() {
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
{
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
@@ -294,29 +280,20 @@ mod tests {
.add_document(&doc!(*FIELD=>100_000u64))
.unwrap();
}
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 35);
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite
.open_read(*FIELD)
.unwrap()
.read_bytes()?;
let fast_field_reader = open::<u64>(data)?;
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
}
assert_eq!(file.len(), 165);
let fast_field_readers = FastFieldReaders::open(file).unwrap();
let fast_field_reader = fast_field_readers.u64("field").unwrap();
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
}
Ok(())
}
#[test]
fn test_intfastfield_large_numbers() -> crate::Result<()> {
fn test_intfastfield_large_numbers() {
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
@@ -327,34 +304,23 @@ mod tests {
fast_field_writers
.add_document(&doc!(*FIELD=>0u64))
.unwrap();
for doc_id in 1u64..10_001u64 {
for doc_id in 1u64..10_000u64 {
fast_field_writers
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64))
.unwrap();
}
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 80049);
assert_eq!(file.len(), 80173);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite
.open_read(*FIELD)
.unwrap()
.read_bytes()?;
let fast_field_reader = open::<u64>(data)?;
assert_eq!(fast_field_reader.get_val(0), 0u64);
for doc in 1..10_001 {
assert_eq!(
fast_field_reader.get_val(doc),
5_000_000_000_000_000_000u64 + doc as u64 - 1u64
);
let fast_field_readers = FastFieldReaders::open(file).unwrap();
let col = fast_field_readers.u64("field").unwrap();
for doc in 1..10_000 {
assert_eq!(col.get_val(doc), 5_000_000_000_000_000_000u64 + doc as u64);
}
}
Ok(())
}
#[test]
@@ -373,29 +339,22 @@ mod tests {
doc.add_i64(i64_field, i);
fast_field_writers.add_document(&doc).unwrap();
}
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 49_usize);
assert_eq!(file.len(), 179_usize);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite
.open_read(i64_field)
.unwrap()
.read_bytes()?;
let fast_field_reader = open::<i64>(data)?;
assert_eq!(fast_field_reader.min_value(), -100i64);
assert_eq!(fast_field_reader.max_value(), 9_999i64);
let fast_field_readers = FastFieldReaders::open(file).unwrap();
let col = fast_field_readers.i64("field").unwrap();
assert_eq!(col.min_value(), -100i64);
assert_eq!(col.max_value(), 9_999i64);
for (doc, i) in (-100i64..10_000i64).enumerate() {
assert_eq!(fast_field_reader.get_val(doc as u32), i);
assert_eq!(col.get_val(doc as u32), i);
}
let mut buffer = vec![0i64; 100];
fast_field_reader.get_range(53, &mut buffer[..]);
col.get_range(53, &mut buffer[..]);
for i in 0..100 {
assert_eq!(buffer[i], -100i64 + 53i64 + i as i64);
}
@@ -533,105 +492,103 @@ mod tests {
// all
// }
/*
#[test]
fn test_text_fastfield() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT | FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// first segment
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
text_field => "BBBBB AAAAA", // term_ord 1,2
))?;
index_writer.add_document(doc!())?;
index_writer.add_document(doc!(
text_field => "AAAAA", // term_ord 0
))?;
index_writer.add_document(doc!(
text_field => "AAAAA BBBBB", // term_ord 0
))?;
index_writer.add_document(doc!(
text_field => "zumberthree", // term_ord 2, after merge term_ord 3
))?;
index_writer.add_document(doc!())?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let segment_reader = searcher.segment_reader(0);
let fast_fields = segment_reader.fast_fields();
let text_fast_field = fast_fields.u64s("text").unwrap();
assert_eq!(
get_vals_for_docs(&text_fast_field, 0..5),
vec![1, 0, 0, 0, 1, 2]
);
let mut out = vec![];
text_fast_field.get_vals(3, &mut out);
assert_eq!(out, vec![0, 1]);
let inverted_index = segment_reader.inverted_index(text_field)?;
assert_eq!(inverted_index.terms().num_terms(), 3);
let mut bytes = vec![];
assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
// default tokenizer applies lower case
assert_eq!(bytes, "aaaaa".as_bytes());
}
{
// second segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
text_field => "AAAAA", // term_ord 0
))?;
index_writer.add_document(doc!(
text_field => "CCCCC AAAAA", // term_ord 1, after merge 2
))?;
index_writer.add_document(doc!())?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 2);
let segment_reader = searcher.segment_reader(1);
let fast_fields = segment_reader.fast_fields();
let text_fast_field = fast_fields.u64s("text").unwrap();
assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]);
}
// Merging the segments
{
let segment_ids = index.searchable_segment_ids()?;
let mut index_writer = index.writer_for_tests()?;
index_writer.merge(&segment_ids).wait()?;
index_writer.wait_merging_threads()?;
}
let reader = index.reader()?;
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0);
let fast_fields = segment_reader.fast_fields();
let text_fast_field = fast_fields.u64s("text").unwrap();
assert_eq!(
get_vals_for_docs(&text_fast_field, 0..8),
vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0]
);
Ok(())
}
*/
// #[test]
// fn test_text_fastfield() -> crate::Result<()> {
// let mut schema_builder = Schema::builder();
// let text_field = schema_builder.add_text_field("text", TEXT | FAST);
// let schema = schema_builder.build();
// let index = Index::create_in_ram(schema);
//
// {
// first segment
// let mut index_writer = index.writer_for_tests()?;
// index_writer.set_merge_policy(Box::new(NoMergePolicy));
// index_writer.add_document(doc!(
// text_field => "BBBBB AAAAA", // term_ord 1,2
// ))?;
// index_writer.add_document(doc!())?;
// index_writer.add_document(doc!(
// text_field => "AAAAA", // term_ord 0
// ))?;
// index_writer.add_document(doc!(
// text_field => "AAAAA BBBBB", // term_ord 0
// ))?;
// index_writer.add_document(doc!(
// text_field => "zumberthree", // term_ord 2, after merge term_ord 3
// ))?;
//
// index_writer.add_document(doc!())?;
// index_writer.commit()?;
//
// let reader = index.reader()?;
// let searcher = reader.searcher();
// assert_eq!(searcher.segment_readers().len(), 1);
// let segment_reader = searcher.segment_reader(0);
// let fast_fields = segment_reader.fast_fields();
// let text_fast_field = fast_fields.u64s("text").unwrap();
//
// assert_eq!(
// get_vals_for_docs(&text_fast_field, 0..5),
// vec![1, 0, 0, 0, 1, 2]
// );
//
// let mut out = vec![];
// text_fast_field.get_vals(3, &mut out);
// assert_eq!(out, vec![0, 1]);
//
// let inverted_index = segment_reader.inverted_index(text_field)?;
// assert_eq!(inverted_index.terms().num_terms(), 3);
// let mut bytes = vec![];
// assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
// default tokenizer applies lower case
// assert_eq!(bytes, "aaaaa".as_bytes());
// }
//
// {
// second segment
// let mut index_writer = index.writer_for_tests()?;
//
// index_writer.add_document(doc!(
// text_field => "AAAAA", // term_ord 0
// ))?;
//
// index_writer.add_document(doc!(
// text_field => "CCCCC AAAAA", // term_ord 1, after merge 2
// ))?;
//
// index_writer.add_document(doc!())?;
// index_writer.commit()?;
//
// let reader = index.reader()?;
// let searcher = reader.searcher();
// assert_eq!(searcher.segment_readers().len(), 2);
// let segment_reader = searcher.segment_reader(1);
// let fast_fields = segment_reader.fast_fields();
// let text_fast_field = fast_fields.u64s("text").unwrap();
//
// assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]);
// }
// Merging the segments
// {
// let segment_ids = index.searchable_segment_ids()?;
// let mut index_writer = index.writer_for_tests()?;
// index_writer.merge(&segment_ids).wait()?;
// index_writer.wait_merging_threads()?;
// }
//
// let reader = index.reader()?;
// let searcher = reader.searcher();
// let segment_reader = searcher.segment_reader(0);
// let fast_fields = segment_reader.fast_fields();
// let text_fast_field = fast_fields.u64s("text").unwrap();
//
// assert_eq!(
// get_vals_for_docs(&text_fast_field, 0..8),
// vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0]
// );
//
// Ok(())
// }
// #[test]
// fn test_string_fastfield() -> crate::Result<()> {
@@ -661,7 +618,6 @@ mod tests {
// index_writer.add_document(doc!())?;
// index_writer.commit()?;
// let reader = index.reader()?;
// let searcher = reader.searcher();
// assert_eq!(searcher.segment_readers().len(), 1);
@@ -693,7 +649,6 @@ mod tests {
// index_writer.add_document(doc!())?;
// index_writer.commit()?;
// let reader = index.reader()?;
// let searcher = reader.searcher();
// assert_eq!(searcher.segment_readers().len(), 2);
@@ -816,9 +771,7 @@ mod tests {
fast_field_writers
.add_document(&doc!(field=>false))
.unwrap();
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
@@ -853,9 +806,7 @@ mod tests {
.add_document(&doc!(field=>false))
.unwrap();
}
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
@@ -898,10 +849,7 @@ mod tests {
Ok(())
}
fn get_index(
docs: &[crate::Document],
schema: &Schema,
) -> crate::Result<RamDirectory> {
fn get_index(docs: &[crate::Document], schema: &Schema) -> crate::Result<RamDirectory> {
let directory: RamDirectory = RamDirectory::create();
{
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
@@ -909,9 +857,7 @@ mod tests {
for (doc_id, doc) in docs.into_iter().enumerate() {
fast_field_writers.add_document(doc).unwrap();
}
fast_field_writers
.serialize(&mut write, None)
.unwrap();
fast_field_writers.serialize(&mut write, None).unwrap();
write.terminate().unwrap();
}
Ok(directory)
@@ -942,9 +888,7 @@ mod tests {
})
.take(1_000)
.collect();
let date_options = DateOptions::default()
.set_fast()
.set_precision(precision);
let date_options = DateOptions::default().set_fast().set_precision(precision);
let mut schema_builder = SchemaBuilder::default();
let field = schema_builder.add_date_field("field", date_options);
let schema = schema_builder.build();

View File

@@ -2,7 +2,9 @@ use std::io;
use std::net::Ipv6Addr;
use std::sync::Arc;
use columnar::ColumnarReader;
use columnar::{
ColumnType, ColumnValues, ColumnarReader, DynamicColumn, HasAssociatedColumnType, NumericalType,
};
use fastfield_codecs::{open, open_u128, Column};
use crate::directory::{CompositeFile, FileSlice};
@@ -19,73 +21,52 @@ use crate::{DateTime, TantivyError};
pub struct FastFieldReaders {
columnar: Arc<ColumnarReader>,
}
#[derive(Eq, PartialEq, Debug)]
pub(crate) enum FastType {
I64,
U64,
U128,
F64,
Bool,
Date,
}
pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<FastType> {
todo!();
// match field_type {
// FieldType::U64(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::U64, cardinality)),
// FieldType::I64(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::I64, cardinality)),
// FieldType::F64(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::F64, cardinality)),
// FieldType::Bool(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::Bool, cardinality)),
// FieldType::Date(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::Date, cardinality)),
// FieldType::Facet(_) => Some((FastType::U64, Cardinality::MultiValues)),
// FieldType::Str(options) if options.is_fast() => {
// Some((FastType::U64, Cardinality::MultiValues))
// }
// FieldType::IpAddr(options) => options
// .get_fastfield_cardinality()
// .map(|cardinality| (FastType::U128, cardinality)),
// _ => None,
// }
}
impl FastFieldReaders {
pub(crate) fn open(fast_field_file: FileSlice) -> io::Result<FastFieldReaders> {
let columnar = Arc::new(ColumnarReader::open(fast_field_file)?);
Ok(FastFieldReaders {
columnar,
})
Ok(FastFieldReaders { columnar })
}
pub(crate) fn space_usage(&self) -> PerFieldSpaceUsage {
todo!()
}
pub fn column(&self, column_name: &str) {
todo!()
// TODO make opt
pub fn typed_column<T>(&self, field: &str) -> crate::Result<columnar::Column<T>>
where
T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static,
DynamicColumn: Into<Option<columnar::Column<T>>>,
{
let column_type = T::column_type();
let Some(dynamic_column_handle) = self.columnar.read_columns(field)?
.into_iter()
.filter(|column| column.column_type() == column_type)
.next() else {
// TODO Option would make more sense.
return Err(crate::TantivyError::SchemaError(format!("No fast field of with this name")));
};
let dynamic_column = dynamic_column_handle.open()?;
let col: columnar::Column<T> = dynamic_column
.into()
.ok_or_else(|| crate::TantivyError::SchemaError(format!("Invalid type")))?;
Ok(col)
}
pub(crate) fn typed_fast_field_reader<TFastValue: FastValue>(
&self,
field_name: &str,
) -> crate::Result<Arc<dyn Column<TFastValue>>> {
todo!();
pub fn typed_column_first_or_default<T>(&self, field: &str) -> crate::Result<Arc<dyn Column<T>>>
where
T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static,
DynamicColumn: Into<Option<columnar::Column<T>>>,
{
let col = self.typed_column(field)?;
Ok(col.first_or_default_col(T::default()))
}
/// Returns the `u64` fast field reader reader associated with `field`.
///
/// If `field` is not a u64 fast field, this method returns an Error.
pub fn u64(&self, field: &str) -> crate::Result<Arc<dyn Column<u64>>> {
todo!();
pub fn u64(&self, field: &str) -> crate::Result<Arc<dyn ColumnValues<u64>>> {
self.typed_column_first_or_default(field)
}
/// Returns the `ip` fast field reader reader associated to `field`.
@@ -111,14 +92,15 @@ impl FastFieldReaders {
/// If not, the fastfield reader will returns the u64-value associated with the original
/// FastValue.
pub fn u64_lenient(&self, field_name: &str) -> crate::Result<Arc<dyn Column<u64>>> {
self.typed_fast_field_reader(field_name)
todo!();
// self.typed_fast_field_reader(field_name)
}
/// Returns the `i64` fast field reader reader associated with `field`.
///
/// If `field` is not a i64 fast field, this method returns an Error.
pub fn i64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<i64>>> {
todo!()
self.typed_column_first_or_default(field_name)
}
/// Returns the `date` fast field reader reader associated with `field`.
@@ -126,41 +108,42 @@ impl FastFieldReaders {
/// If `field` is not a date fast field, this method returns an Error.
pub fn date(&self, field_name: &str) -> crate::Result<Arc<dyn Column<DateTime>>> {
todo!()
// self.numerical_column(field_name)
}
/// Returns the `f64` fast field reader reader associated with `field`.
///
/// If `field` is not a f64 fast field, this method returns an Error.
pub fn f64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<f64>>> {
todo!();
self.typed_column_first_or_default(field_name)
}
/// Returns the `bool` fast field reader reader associated with `field`.
///
/// If `field` is not a bool fast field, this method returns an Error.
pub fn bool(&self, field_name: &str) -> crate::Result<Arc<dyn Column<bool>>> {
todo!()
self.typed_column_first_or_default(field_name)
}
// Returns the `bytes` fast field reader associated with `field`.
//
// If `field` is not a bytes fast field, returns an Error.
// pub fn bytes(&self, field: Field) -> crate::Result<BytesFastFieldReader> {
// let field_entry = self.schema.get_field_entry(field);
// if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
// if !bytes_option.is_fast() {
// return Err(crate::TantivyError::SchemaError(format!(
// "Field {:?} is not a fast field.",
// field_entry.name()
// )));
// }
// let fast_field_idx_file = self.fast_field_data(field, 0)?;
// let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
// let idx_reader = open(fast_field_idx_bytes)?;
// let data = self.fast_field_data(field, 1)?;
// BytesFastFieldReader::open(idx_reader, data)
// } else {
// Err(FastFieldNotAvailableError::new(field_entry).into())
// }
// let field_entry = self.schema.get_field_entry(field);
// if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
// if !bytes_option.is_fast() {
// return Err(crate::TantivyError::SchemaError(format!(
// "Field {:?} is not a fast field.",
// field_entry.name()
// )));
// }
// let fast_field_idx_file = self.fast_field_data(field, 0)?;
// let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
// let idx_reader = open(fast_field_idx_bytes)?;
// let data = self.fast_field_data(field, 1)?;
// BytesFastFieldReader::open(idx_reader, data)
// } else {
// Err(FastFieldNotAvailableError::new(field_entry).into())
// }
// }
}

View File

@@ -1,14 +1,14 @@
use std::collections::HashMap;
use std::io;
use super::FastFieldType;
use crate::fastfield::{CompositeFastFieldSerializer};
use columnar::{ColumnarWriter, NumericalType, NumericalValue};
use common;
use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
use rustc_hash::FxHashMap;
use tantivy_bitpacker::BlockedBitpacker;
use super::FastFieldType;
use crate::fastfield::CompositeFastFieldSerializer;
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Value};
@@ -57,48 +57,46 @@ fn fast_numerical_type(field_type: &FieldType) -> Option<FastFieldTyp> {
} else {
None
}
},
}
FieldType::I64(numerical_option) => {
if numerical_option.is_fast() {
Some(FastFieldTyp::Numerical(NumericalType::I64))
} else {
None
}
},
}
FieldType::F64(numerical_option) => {
if numerical_option.is_fast() {
Some(FastFieldTyp::Numerical(NumericalType::F64))
} else {
None
}
},
}
FieldType::Str(str_option) => {
if str_option.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
}
FieldType::Bool(int_options) => {
if int_options.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
}
FieldType::Date(date_options) => {
if date_options.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
}
FieldType::Facet(_) => todo!(),
FieldType::Bytes(_) => todo!(),
FieldType::JsonObject(_) => todo!(),
FieldType::IpAddr(_) => todo!(),
}
}
@@ -109,12 +107,12 @@ impl FastFieldsWriter {
let mut fast_fields = vec![None; schema.num_fields()];
// TODO see other types
for (field, field_entry) in schema.fields() {
if let Some(fast_field_typ) =fast_numerical_type(field_entry.field_type()) {
if let Some(fast_field_typ) = fast_numerical_type(field_entry.field_type()) {
match fast_field_typ {
FastFieldTyp::Numerical(numerical_type) => {
columnar_writer.force_numerical_type(field_entry.name(), numerical_type);
},
FastFieldTyp::Other => {},
}
FastFieldTyp::Other => {}
}
fast_fields[field.field_id() as usize] = Some(field_entry.name().to_string());
}
@@ -132,20 +130,34 @@ impl FastFieldsWriter {
}
/// Indexes all of the fastfields of a new document.
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
let doc_id = self.num_docs;
for field_value in doc.field_values() {
if let Some(field_name) = self.fast_fields[field_value.field().field_id() as usize].as_ref() {
if let Some(field_name) =
self.fast_fields[field_value.field().field_id() as usize].as_ref()
{
match &field_value.value {
Value::U64(u64_val) => {
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*u64_val));
},
self.columnar_writer.record_numerical(
doc_id,
field_name.as_str(),
NumericalValue::from(*u64_val),
);
}
Value::I64(i64_val) => {
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*i64_val));
},
self.columnar_writer.record_numerical(
doc_id,
field_name.as_str(),
NumericalValue::from(*i64_val),
);
}
Value::F64(f64_val) => {
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*f64_val));
},
self.columnar_writer.record_numerical(
doc_id,
field_name.as_str(),
NumericalValue::from(*f64_val),
);
}
Value::Str(_) => todo!(),
Value::PreTokStr(_) => todo!(),
Value::Bool(_) => todo!(),

View File

@@ -115,8 +115,8 @@ pub(crate) fn get_doc_id_mapping_from_field(
) -> crate::Result<DocIdMapping> {
todo!()
// let schema = segment_writer.segment_serializer.segment().schema();
// let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required
// let fast_field = segment_writer
// let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect
// fastfield, but not strictly required let fast_field = segment_writer
// .fast_field_writers
// .get_field_writer(field_id)
// .ok_or_else(|| {
@@ -160,15 +160,11 @@ mod tests_indexsorting {
let my_text_field = schema_builder.add_text_field("text_field", text_field_options);
let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED);
let my_number = schema_builder.add_u64_field(
"my_number",
NumericOptions::default().set_fast(),
);
let my_number =
schema_builder.add_u64_field("my_number", NumericOptions::default().set_fast());
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
NumericOptions::default().set_fast(),
);
let multi_numbers =
schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
let schema = schema_builder.build();
let mut index_builder = Index::builder().schema(schema);
@@ -459,7 +455,6 @@ mod tests_indexsorting {
// "my_number".to_string()
// );
// let searcher = index.reader()?.searcher();
// assert_eq!(searcher.segment_readers().len(), 1);
// let segment_reader = searcher.segment_reader(0);

View File

@@ -813,8 +813,8 @@ mod tests {
use crate::indexer::NoMergePolicy;
use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
use crate::schema::{
self, IndexRecordOption, IpAddrOptions, NumericOptions,
TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
self, IndexRecordOption, IpAddrOptions, NumericOptions, TextFieldIndexing, TextOptions,
FAST, INDEXED, STORED, STRING, TEXT,
};
use crate::store::DOCSTORE_CACHE_CAPACITY;
use crate::{
@@ -1636,7 +1636,8 @@ mod tests {
// );
// let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED);
// let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | STORED);
// let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT |
// STORED);
// let multi_numbers = schema_builder.add_u64_field(
// "multi_numbers",
@@ -2038,8 +2039,8 @@ mod tests {
// // Test date
// let term =
// Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64));
// assert_eq!(do_search2(term).len() as u64, 0);
// Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as
// i64)); assert_eq!(do_search2(term).len() as u64, 0);
// }
// // search ip address
// //
@@ -2194,39 +2195,38 @@ mod tests {
// proptest! {
// #![proptest_config(ProptestConfig::with_cases(20))]
// #[test]
// fn test_delete_with_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
// assert!(test_operation_strategy(&ops[..], true, false).is_ok());
// }
// fn test_delete_with_sort_proptest_adding(ops in
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
// (test_operation_strategy(&ops[..], true, false).is_ok()); }
// #[test]
// fn test_delete_without_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
// assert!(test_operation_strategy(&ops[..], false, false).is_ok());
// }
// fn test_delete_without_sort_proptest_adding(ops in
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
// (test_operation_strategy(&ops[..], false, false).is_ok()); }
// #[test]
// fn test_delete_with_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
// assert!(test_operation_strategy(&ops[..], true, true).is_ok());
// }
// fn test_delete_with_sort_proptest_with_merge_adding(ops in
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
// (test_operation_strategy(&ops[..], true, true).is_ok()); }
// #[test]
// fn test_delete_without_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
// assert!(test_operation_strategy(&ops[..], false, true).is_ok());
// }
// fn test_delete_without_sort_proptest_with_merge_adding(ops in
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
// (test_operation_strategy(&ops[..], false, true).is_ok()); }
// #[test]
// fn test_delete_with_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
// assert!(test_operation_strategy(&ops[..], true, false).is_ok());
// }
// fn test_delete_with_sort_proptest(ops in
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
// (test_operation_strategy(&ops[..], true, false).is_ok()); }
// #[test]
// fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
// assert!(test_operation_strategy(&ops[..], false, false).is_ok());
// }
// fn test_delete_without_sort_proptest(ops in
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
// (test_operation_strategy(&ops[..], false, false).is_ok()); }
// #[test]
// fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
// assert!(test_operation_strategy(&ops[..], true, true).is_ok());
// }
// fn test_delete_with_sort_proptest_with_merge(ops in
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
// (test_operation_strategy(&ops[..], true, true).is_ok()); }
// #[test]
// fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) {
// assert!(test_operation_strategy(&ops[..], false, true).is_ok());
// }
// fn test_delete_without_sort_proptest_with_merge(ops in
// proptest::collection::vec(balanced_operation_strategy(), 1..100)) { assert!
// (test_operation_strategy(&ops[..], false, true).is_ok()); }
// }

View File

@@ -12,12 +12,9 @@ use crate::core::{Segment, SegmentReader};
use crate::directory::WritePtr;
use crate::docset::{DocSet, TERMINATED};
use crate::error::DataCorruption;
use crate::fastfield::{
AliveBitSet, Column, CompositeFastFieldSerializer,
};
use crate::fastfield::{AliveBitSet, Column, CompositeFastFieldSerializer};
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
use crate::indexer::sorted_doc_id_column::RemappedDocIdColumn;
// use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn;
use crate::indexer::SegmentSerializer;
use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
@@ -255,60 +252,57 @@ impl IndexMerger {
) -> crate::Result<()> {
debug_time!("wrie-fast-fields");
todo!();
/*
for (field, field_entry) in self.schema.fields() {
let field_type = field_entry.field_type();
match field_type {
FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => {
let term_ordinal_mapping = term_ord_mappings.remove(&field).expect(
"Logic Error in Tantivy (Please report). Facet field should have required \
a`term_ordinal_mapping`.",
);
self.write_term_id_fast_field(
field,
&term_ordinal_mapping,
fast_field_serializer,
doc_id_mapping,
)?;
}
FieldType::U64(ref options)
| FieldType::I64(ref options)
| FieldType::F64(ref options)
| FieldType::Bool(ref options) => {
todo!()
}
FieldType::Date(ref options) => {
if options.is_fast() {
todo!();
}
// Some(Cardinality::SingleValue) => {
// self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// Some(Cardinality::MultiValues) => {
// self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// None => {}
},
FieldType::Bytes(byte_options) => {
if byte_options.is_fast() {
self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?;
}
}
FieldType::IpAddr(options) => {
if options.is_fast() {
todo!();
}
},
FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => {
// We don't handle json fast field for the moment
// They can be implemented using what is done
// for facets in the future
}
}
}
*/
// for (field, field_entry) in self.schema.fields() {
// let field_type = field_entry.field_type();
// match field_type {
// FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => {
// let term_ordinal_mapping = term_ord_mappings.remove(&field).expect(
// "Logic Error in Tantivy (Please report). Facet field should have required \
// a`term_ordinal_mapping`.",
// );
// self.write_term_id_fast_field(
// field,
// &term_ordinal_mapping,
// fast_field_serializer,
// doc_id_mapping,
// )?;
// }
// FieldType::U64(ref options)
// | FieldType::I64(ref options)
// | FieldType::F64(ref options)
// | FieldType::Bool(ref options) => {
// todo!()
// }
// FieldType::Date(ref options) => {
// if options.is_fast() {
// todo!();
// }
// Some(Cardinality::SingleValue) => {
// self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// Some(Cardinality::MultiValues) => {
// self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// None => {}
// },
// FieldType::Bytes(byte_options) => {
// if byte_options.is_fast() {
// self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// }
// FieldType::IpAddr(options) => {
// if options.is_fast() {
// todo!();
// }
// },
//
// FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => {
// We don't handle json fast field for the moment
// They can be implemented using what is done
// for facets in the future
// }
// }
// }
Ok(())
}
@@ -356,12 +350,13 @@ impl IndexMerger {
fast_field_serializer: &mut CompositeFastFieldSerializer,
doc_id_mapping: &SegmentDocIdMapping,
) -> crate::Result<()> {
let fast_field_accessor = RemappedDocIdColumn::new(
&self.readers,
doc_id_mapping,
self.schema.get_field_name(field),
);
fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
todo!();
// let fast_field_accessor = RemappedDocIdColumn::new(
// &self.readers,
// doc_id_mapping,
// self.schema.get_field_name(field),
// );
// fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
Ok(())
}
@@ -817,15 +812,13 @@ mod tests {
use byteorder::{BigEndian, ReadBytesExt};
use schema::FAST;
use crate::collector::tests::{
FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
};
use crate::collector::tests::{FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE};
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
use crate::schema::{
Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
TextFieldIndexing, INDEXED, TEXT,
Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, TextFieldIndexing,
INDEXED, TEXT,
};
use crate::time::OffsetDateTime;
use crate::{
@@ -1015,8 +1008,7 @@ mod tests {
// }
// scores
// })
searcher
.search(&term_query, &collector)
searcher.search(&term_query, &collector)
};
let empty_vec = Vec::<u64>::new();
@@ -1296,7 +1288,6 @@ mod tests {
Ok(())
}
// TODO re-enable
// #[test]
// fn test_merge_facets_sort_none() {
@@ -1316,8 +1307,8 @@ mod tests {
// }),
// true,
// );
// // In the merge case this will not go through the doc_id mapping code, because the data is
// // sorted and disjunct
// // In the merge case this will not go through the doc_id mapping code, because the data
// is // sorted and disjunct
// test_merge_facets(
// Some(IndexSettings {
// sort_by_field: Some(IndexSortByField {
@@ -1343,8 +1334,8 @@ mod tests {
// }),
// true,
// );
// // In the merge case this will not go through the doc_id mapping code, because the data is
// // sorted and disjunct
// // In the merge case this will not go through the doc_id mapping code, because the data
// is // sorted and disjunct
// test_merge_facets(
// Some(IndexSettings {
// sort_by_field: Some(IndexSortByField {
@@ -1359,8 +1350,8 @@ mod tests {
// force_segment_value_overlap forces the int value for sorting to have overlapping min and max
// ranges between segments so that merge algorithm can't apply certain optimizations
// fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
// let mut schema_builder = schema::Schema::builder();
// fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap:
// bool) { let mut schema_builder = schema::Schema::builder();
// let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
// let int_options = NumericOptions::default()
// .set_fast()
@@ -1529,9 +1520,7 @@ mod tests {
#[test]
fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast()
.set_indexed();
let int_options = NumericOptions::default().set_fast().set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader()?;
@@ -1566,9 +1555,7 @@ mod tests {
#[test]
fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast()
.set_indexed();
let int_options = NumericOptions::default().set_fast().set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build());

View File

@@ -12,9 +12,7 @@ mod tests {
fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast()
.set_indexed();
let int_options = NumericOptions::default().set_fast().set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
@@ -71,10 +69,8 @@ mod tests {
let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
NumericOptions::default().set_fast(),
);
let multi_numbers =
schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
let text_field_options = TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
@@ -363,7 +359,6 @@ mod tests {
// )
// .unwrap();
// let int_field = index.schema().get_field("intval").unwrap();
// let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
// let bytes_field = index.schema().get_field("bytes").unwrap();
@@ -490,9 +485,7 @@ mod bench_sorted_index_merge {
use crate::{IndexSettings, IndexSortByField, IndexWriter, Order};
fn create_index(sort_by_field: Option<IndexSortByField>) -> Index {
let mut schema_builder = Schema::builder();
let int_options = NumericOptions::default()
.set_fast()
.set_indexed();
let int_options = NumericOptions::default().set_fast().set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
let schema = schema_builder.build();

View File

@@ -19,7 +19,7 @@ mod segment_register;
pub mod segment_serializer;
pub mod segment_updater;
mod segment_writer;
mod sorted_doc_id_column;
// mod sorted_doc_id_column;
// mod sorted_doc_id_multivalue_column;
mod stamper;

View File

@@ -408,10 +408,7 @@ fn remap_and_write(
serializer.get_postings_serializer(),
)?;
debug!("fastfield-serialize");
fast_field_writers.serialize(
serializer.get_fast_field_write(),
doc_id_map,
)?;
fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
// finalize temp docstore and create version, which reflects the doc_id_map
if let Some(doc_id_map) = doc_id_map {

View File

@@ -147,6 +147,14 @@ pub struct DateTime {
pub(crate) timestamp_micros: i64,
}
impl From<columnar::DateTime> for DateTime {
fn from(columnar_datetime: columnar::DateTime) -> Self {
DateTime {
timestamp_micros: columnar_datetime.timestamp_micros,
}
}
}
impl DateTime {
/// Create new from UNIX timestamp in seconds
pub const fn from_timestamp_secs(seconds: i64) -> Self {
@@ -1166,5 +1174,4 @@ pub mod tests {
);
assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
}
}

View File

@@ -15,9 +15,17 @@ use crate::indexer::{
};
// use crate::query::range_query::is_type_valid_for_fastfield_range_query;
use crate::query::{
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query,
AllQuery,
BooleanQuery,
BoostQuery,
EmptyQuery,
FuzzyTermQuery,
Occur,
PhraseQuery,
Query,
// RangeQuery,
TermQuery, TermSetQuery,
TermQuery,
TermSetQuery,
};
use crate::schema::{
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions,
@@ -336,91 +344,89 @@ impl QueryParser {
phrase: &str,
) -> Result<Term, QueryParserError> {
todo!();
/*
let field_entry = self.schema.get_field_entry(field);
let field_type = field_entry.field_type();
let field_supports_ff_range_queries = field_type.is_fast()
&& is_type_valid_for_fastfield_range_query(field_type.value_type());
if !field_type.is_indexed() && !field_supports_ff_range_queries {
return Err(QueryParserError::FieldNotIndexed(
field_entry.name().to_string(),
));
}
if !json_path.is_empty() && field_type.value_type() != Type::Json {
return Err(QueryParserError::UnsupportedQuery(format!(
"Json path is not supported for field {:?}",
field_entry.name()
)));
}
match *field_type {
FieldType::U64(_) => {
let val: u64 = u64::from_str(phrase)?;
Ok(Term::from_field_u64(field, val))
}
FieldType::I64(_) => {
let val: i64 = i64::from_str(phrase)?;
Ok(Term::from_field_i64(field, val))
}
FieldType::F64(_) => {
let val: f64 = f64::from_str(phrase)?;
Ok(Term::from_field_f64(field, val))
}
FieldType::Bool(_) => {
let val: bool = bool::from_str(phrase)?;
Ok(Term::from_field_bool(field, val))
}
FieldType::Date(_) => {
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
Ok(Term::from_field_date(field, DateTime::from_utc(dt)))
}
FieldType::Str(ref str_options) => {
let option = str_options.get_indexing_options().ok_or_else(|| {
// This should have been seen earlier really.
QueryParserError::FieldNotIndexed(field_entry.name().to_string())
})?;
let text_analyzer =
self.tokenizer_manager
.get(option.tokenizer())
.ok_or_else(|| QueryParserError::UnknownTokenizer {
field: field_entry.name().to_string(),
tokenizer: option.tokenizer().to_string(),
})?;
let mut terms: Vec<Term> = Vec::new();
let mut token_stream = text_analyzer.token_stream(phrase);
token_stream.process(&mut |token| {
let term = Term::from_field_text(field, &token.text);
terms.push(term);
});
if terms.len() != 1 {
return Err(QueryParserError::UnsupportedQuery(format!(
"Range query boundary cannot have multiple tokens: {phrase:?}."
)));
}
Ok(terms.into_iter().next().unwrap())
}
FieldType::JsonObject(_) => {
// Json range are not supported.
Err(QueryParserError::UnsupportedQuery(
"Range query are not supported on json field.".to_string(),
))
}
FieldType::Facet(_) => match Facet::from_text(phrase) {
Ok(facet) => Ok(Term::from_facet(field, &facet)),
Err(e) => Err(QueryParserError::from(e)),
},
FieldType::Bytes(_) => {
let bytes = BASE64
.decode(phrase)
.map_err(QueryParserError::ExpectedBase64)?;
Ok(Term::from_field_bytes(field, &bytes))
}
FieldType::IpAddr(_) => {
let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
Ok(Term::from_field_ip_addr(field, ip_v6))
}
}
*/
// let field_entry = self.schema.get_field_entry(field);
// let field_type = field_entry.field_type();
// let field_supports_ff_range_queries = field_type.is_fast()
// && is_type_valid_for_fastfield_range_query(field_type.value_type());
//
// if !field_type.is_indexed() && !field_supports_ff_range_queries {
// return Err(QueryParserError::FieldNotIndexed(
// field_entry.name().to_string(),
// ));
// }
// if !json_path.is_empty() && field_type.value_type() != Type::Json {
// return Err(QueryParserError::UnsupportedQuery(format!(
// "Json path is not supported for field {:?}",
// field_entry.name()
// )));
// }
// match *field_type {
// FieldType::U64(_) => {
// let val: u64 = u64::from_str(phrase)?;
// Ok(Term::from_field_u64(field, val))
// }
// FieldType::I64(_) => {
// let val: i64 = i64::from_str(phrase)?;
// Ok(Term::from_field_i64(field, val))
// }
// FieldType::F64(_) => {
// let val: f64 = f64::from_str(phrase)?;
// Ok(Term::from_field_f64(field, val))
// }
// FieldType::Bool(_) => {
// let val: bool = bool::from_str(phrase)?;
// Ok(Term::from_field_bool(field, val))
// }
// FieldType::Date(_) => {
// let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
// Ok(Term::from_field_date(field, DateTime::from_utc(dt)))
// }
// FieldType::Str(ref str_options) => {
// let option = str_options.get_indexing_options().ok_or_else(|| {
// This should have been seen earlier really.
// QueryParserError::FieldNotIndexed(field_entry.name().to_string())
// })?;
// let text_analyzer =
// self.tokenizer_manager
// .get(option.tokenizer())
// .ok_or_else(|| QueryParserError::UnknownTokenizer {
// field: field_entry.name().to_string(),
// tokenizer: option.tokenizer().to_string(),
// })?;
// let mut terms: Vec<Term> = Vec::new();
// let mut token_stream = text_analyzer.token_stream(phrase);
// token_stream.process(&mut |token| {
// let term = Term::from_field_text(field, &token.text);
// terms.push(term);
// });
// if terms.len() != 1 {
// return Err(QueryParserError::UnsupportedQuery(format!(
// "Range query boundary cannot have multiple tokens: {phrase:?}."
// )));
// }
// Ok(terms.into_iter().next().unwrap())
// }
// FieldType::JsonObject(_) => {
// Json range are not supported.
// Err(QueryParserError::UnsupportedQuery(
// "Range query are not supported on json field.".to_string(),
// ))
// }
// FieldType::Facet(_) => match Facet::from_text(phrase) {
// Ok(facet) => Ok(Term::from_facet(field, &facet)),
// Err(e) => Err(QueryParserError::from(e)),
// },
// FieldType::Bytes(_) => {
// let bytes = BASE64
// .decode(phrase)
// .map_err(QueryParserError::ExpectedBase64)?;
// Ok(Term::from_field_bytes(field, &bytes))
// }
// FieldType::IpAddr(_) => {
// let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
// Ok(Term::from_field_ip_addr(field, ip_v6))
// }
// }
}
fn compute_logical_ast_for_leaf(
@@ -744,11 +750,12 @@ fn convert_literal_to_query(
value_type,
lower,
upper,
} => { todo!();
// Box::new(RangeQuery::new_term_bounds(
// field, value_type, &lower, &upper,
// ))
} ,
} => {
todo!();
// Box::new(RangeQuery::new_term_bounds(
// field, value_type, &lower, &upper,
// ))
}
LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)),
LogicalLiteral::All => Box::new(AllQuery),
}

View File

@@ -8,7 +8,7 @@ use serde_json::Value as JsonValue;
use thiserror::Error;
use super::ip_options::IpAddrOptions;
use super:: IntoIpv6Addr;
use super::IntoIpv6Addr;
use crate::schema::bytes_options::BytesOptions;
use crate::schema::facet_options::FacetOptions;
use crate::schema::{

View File

@@ -87,7 +87,7 @@ impl IpAddrOptions {
/// If more than one value is associated with a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self,) -> Self {
pub fn set_fast(mut self) -> Self {
self.fast = true;
self
}

View File

@@ -141,9 +141,9 @@ pub use self::index_record_option::IndexRecordOption;
pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
pub use self::json_object_options::JsonObjectOptions;
pub use self::named_field_document::NamedFieldDocument;
pub use self::numeric_options::NumericOptions;
#[allow(deprecated)]
pub use self::numeric_options::IntOptions;
pub use self::numeric_options::NumericOptions;
pub use self::schema::{DocParsingError, Schema, SchemaBuilder};
pub use self::term::Term;
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};

View File

@@ -505,19 +505,13 @@ mod tests {
#[test]
pub fn test_schema_serialization() {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast();
let popularity_options = NumericOptions::default()
.set_stored()
.set_fast();
let count_options = NumericOptions::default().set_stored().set_fast();
let popularity_options = NumericOptions::default().set_stored().set_fast();
let score_options = NumericOptions::default()
.set_indexed()
.set_fieldnorm()
.set_fast();
let is_read_options = NumericOptions::default()
.set_stored()
.set_fast();
let is_read_options = NumericOptions::default().set_stored().set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field(
"author",
@@ -642,12 +636,8 @@ mod tests {
#[test]
pub fn test_document_to_json() {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast();
let is_read_options = NumericOptions::default()
.set_stored()
.set_fast();
let count_options = NumericOptions::default().set_stored().set_fast();
let is_read_options = NumericOptions::default().set_stored().set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u64_field("count", count_options);
@@ -747,15 +737,9 @@ mod tests {
#[test]
pub fn test_parse_document() {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast();
let popularity_options = NumericOptions::default()
.set_stored()
.set_fast();
let score_options = NumericOptions::default()
.set_indexed()
.set_fast();
let count_options = NumericOptions::default().set_stored().set_fast();
let popularity_options = NumericOptions::default().set_stored().set_fast();
let score_options = NumericOptions::default().set_indexed().set_fast();
let title_field = schema_builder.add_text_field("title", TEXT);
let author_field = schema_builder.add_text_field("author", STRING);
let count_field = schema_builder.add_u64_field("count", count_options);