mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-25 12:40:41 +00:00
blop
This commit is contained in:
@@ -26,6 +26,7 @@ Add alignment?
|
||||
Consider another codec to bridge the gap between few and 5k elements
|
||||
|
||||
# Cleanup and rationalization
|
||||
remove the 6 bit limitation of columntype. use 4 + 4 bits instead.
|
||||
in benchmark, unify percent vs ratio, f32 vs f64.
|
||||
investigate if should have better errors? io::Error is overused at the moment.
|
||||
rename rank/select in unit tests
|
||||
|
||||
@@ -20,7 +20,7 @@ pub struct Column<T> {
|
||||
|
||||
use crate::column_index::Set;
|
||||
|
||||
impl<T: PartialOrd> Column<T> {
|
||||
impl<T: PartialOrd + Copy + Send + Sync + 'static> Column<T> {
|
||||
pub fn first(&self, row_id: RowId) -> Option<T> {
|
||||
match &self.idx {
|
||||
ColumnIndex::Full => Some(self.values.get_val(row_id)),
|
||||
@@ -33,6 +33,13 @@ impl<T: PartialOrd> Column<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn first_or_default_col(self, default_value: T) -> Arc<dyn ColumnValues<T>> {
|
||||
Arc::new(FirstValueWithDefault {
|
||||
column: self,
|
||||
default_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Column<T> {
|
||||
@@ -54,3 +61,27 @@ impl BinarySerializable for Cardinality {
|
||||
Ok(cardinality)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO simplify or optimize
|
||||
struct FirstValueWithDefault<T: Copy> {
|
||||
column: Column<T>,
|
||||
default_value: T,
|
||||
}
|
||||
|
||||
impl<T: PartialOrd + Send + Sync + Copy + 'static> ColumnValues<T> for FirstValueWithDefault<T> {
|
||||
fn get_val(&self, idx: u32) -> T {
|
||||
self.column.first(idx).unwrap_or(self.default_value)
|
||||
}
|
||||
|
||||
fn min_value(&self) -> T {
|
||||
self.column.values.min_value()
|
||||
}
|
||||
|
||||
fn max_value(&self) -> T {
|
||||
self.column.values.max_value()
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u32 {
|
||||
self.column.idx.num_rows()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,6 +194,20 @@ impl MonotonicallyMappableToU64 for i64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl MonotonicallyMappableToU64 for crate::DateTime {
|
||||
#[inline(always)]
|
||||
fn to_u64(self) -> u64 {
|
||||
common::i64_to_u64(self.timestamp_micros)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn from_u64(val: u64) -> Self {
|
||||
crate::DateTime {
|
||||
timestamp_micros: common::u64_to_i64(val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MonotonicallyMappableToU64 for bool {
|
||||
#[inline(always)]
|
||||
fn to_u64(self) -> u64 {
|
||||
|
||||
@@ -11,6 +11,7 @@ pub enum ColumnType {
|
||||
Bytes,
|
||||
Numerical(NumericalType),
|
||||
Bool,
|
||||
DateTime,
|
||||
}
|
||||
|
||||
impl ColumnType {
|
||||
@@ -31,6 +32,10 @@ impl ColumnType {
|
||||
column_type_category = ColumnTypeCategory::Bool;
|
||||
numerical_type_code = 0u8;
|
||||
}
|
||||
ColumnType::DateTime => {
|
||||
column_type_category = ColumnTypeCategory::DateTime;
|
||||
numerical_type_code = 0u8;
|
||||
}
|
||||
}
|
||||
place_bits::<0, 3>(column_type_category.to_code()) | place_bits::<3, 6>(numerical_type_code)
|
||||
}
|
||||
@@ -59,10 +64,50 @@ impl ColumnType {
|
||||
let numerical_type = NumericalType::try_from_code(numerical_type_code)?;
|
||||
Ok(ColumnType::Numerical(numerical_type))
|
||||
}
|
||||
ColumnTypeCategory::DateTime => {
|
||||
if numerical_type_code != 0u8 {
|
||||
return Err(InvalidData);
|
||||
}
|
||||
Ok(ColumnType::DateTime)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait HasAssociatedColumnType: 'static + Send + Sync + Copy + PartialOrd {
|
||||
fn column_type() -> ColumnType;
|
||||
}
|
||||
|
||||
impl HasAssociatedColumnType for u64 {
|
||||
fn column_type() -> ColumnType {
|
||||
ColumnType::Numerical(NumericalType::U64)
|
||||
}
|
||||
}
|
||||
|
||||
impl HasAssociatedColumnType for i64 {
|
||||
fn column_type() -> ColumnType {
|
||||
ColumnType::Numerical(NumericalType::I64)
|
||||
}
|
||||
}
|
||||
|
||||
impl HasAssociatedColumnType for f64 {
|
||||
fn column_type() -> ColumnType {
|
||||
ColumnType::Numerical(NumericalType::F64)
|
||||
}
|
||||
}
|
||||
|
||||
impl HasAssociatedColumnType for bool {
|
||||
fn column_type() -> ColumnType {
|
||||
ColumnType::Bool
|
||||
}
|
||||
}
|
||||
|
||||
impl HasAssociatedColumnType for crate::DateTime {
|
||||
fn column_type() -> ColumnType {
|
||||
ColumnType::DateTime
|
||||
}
|
||||
}
|
||||
|
||||
/// Column types are grouped into different categories that
|
||||
/// corresponds to the different types of `JsonValue` types.
|
||||
///
|
||||
@@ -76,6 +121,7 @@ pub(crate) enum ColumnTypeCategory {
|
||||
Bool = 0u8,
|
||||
Str = 1u8,
|
||||
Numerical = 2u8,
|
||||
DateTime = 3u8,
|
||||
}
|
||||
|
||||
impl ColumnTypeCategory {
|
||||
@@ -88,6 +134,7 @@ impl ColumnTypeCategory {
|
||||
0u8 => Ok(Self::Bool),
|
||||
1u8 => Ok(Self::Str),
|
||||
2u8 => Ok(Self::Numerical),
|
||||
3u8 => Ok(Self::Numerical),
|
||||
_ => Err(InvalidData),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,6 @@ mod format_version;
|
||||
mod reader;
|
||||
mod writer;
|
||||
|
||||
pub use column_type::ColumnType;
|
||||
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
||||
pub use reader::ColumnarReader;
|
||||
pub use writer::ColumnarWriter;
|
||||
|
||||
@@ -85,13 +85,12 @@ fn mutate_or_create_column<V, TMutator>(
|
||||
}
|
||||
|
||||
impl ColumnarWriter {
|
||||
|
||||
pub fn mem_usage(&self) -> usize {
|
||||
// TODO add dictionary builders.
|
||||
self.arena.mem_usage() +
|
||||
self.numerical_field_hash_map.mem_usage() +
|
||||
self.bool_field_hash_map.mem_usage() +
|
||||
self.bytes_field_hash_map.mem_usage()
|
||||
self.arena.mem_usage()
|
||||
+ self.numerical_field_hash_map.mem_usage()
|
||||
+ self.bool_field_hash_map.mem_usage()
|
||||
+ self.bytes_field_hash_map.mem_usage()
|
||||
}
|
||||
|
||||
pub fn force_numerical_type(&mut self, column_name: &str, numerical_type: NumericalType) {
|
||||
@@ -223,6 +222,22 @@ impl ColumnarWriter {
|
||||
&mut column_serializer,
|
||||
)?;
|
||||
}
|
||||
ColumnTypeCategory::DateTime => {
|
||||
let numerical_column_writer: NumericalColumnWriter =
|
||||
self.numerical_field_hash_map.read(addr);
|
||||
let (_numerical_type, cardinality) =
|
||||
numerical_column_writer.column_type_and_cardinality(num_docs);
|
||||
let mut column_serializer =
|
||||
serializer.serialize_column(column_name, ColumnType::DateTime);
|
||||
serialize_numerical_column(
|
||||
cardinality,
|
||||
num_docs,
|
||||
NumericalType::I64,
|
||||
numerical_column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
|
||||
buffers,
|
||||
&mut column_serializer,
|
||||
)?;
|
||||
}
|
||||
};
|
||||
}
|
||||
serializer.finalize()?;
|
||||
|
||||
@@ -6,7 +6,6 @@ use common::{HasLen, OwnedBytes};
|
||||
|
||||
use crate::column::{BytesColumn, Column};
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::DateTime;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum DynamicColumn {
|
||||
@@ -15,33 +14,35 @@ pub enum DynamicColumn {
|
||||
U64(Column<u64>),
|
||||
F64(Column<f64>),
|
||||
IpAddr(Column<IpAddr>),
|
||||
DateTime(Column<DateTime>),
|
||||
Str(BytesColumn),
|
||||
DateTime(Column<crate::DateTime>),
|
||||
}
|
||||
|
||||
impl From<Column<i64>> for DynamicColumn {
|
||||
fn from(column_i64: Column<i64>) -> Self {
|
||||
DynamicColumn::I64(column_i64)
|
||||
}
|
||||
macro_rules! static_dynamic_conversions {
|
||||
($typ:ty, $enum_name:ident) => {
|
||||
impl Into<Option<Column<$typ>>> for DynamicColumn {
|
||||
fn into(self) -> Option<Column<$typ>> {
|
||||
if let Self::$enum_name(col) = self {
|
||||
Some(col)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Column<$typ>> for DynamicColumn {
|
||||
fn from(typed_column: Column<$typ>) -> Self {
|
||||
DynamicColumn::$enum_name(typed_column)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl From<Column<u64>> for DynamicColumn {
|
||||
fn from(column_u64: Column<u64>) -> Self {
|
||||
DynamicColumn::U64(column_u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Column<f64>> for DynamicColumn {
|
||||
fn from(column_f64: Column<f64>) -> Self {
|
||||
DynamicColumn::F64(column_f64)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Column<bool>> for DynamicColumn {
|
||||
fn from(bool_column: Column<bool>) -> Self {
|
||||
DynamicColumn::Bool(bool_column)
|
||||
}
|
||||
}
|
||||
static_dynamic_conversions!(bool, Bool);
|
||||
static_dynamic_conversions!(u64, U64);
|
||||
static_dynamic_conversions!(i64, I64);
|
||||
static_dynamic_conversions!(f64, F64);
|
||||
static_dynamic_conversions!(crate::DateTime, DateTime);
|
||||
|
||||
impl From<BytesColumn> for DynamicColumn {
|
||||
fn from(dictionary_encoded_col: BytesColumn) -> Self {
|
||||
@@ -56,11 +57,13 @@ pub struct DynamicColumnHandle {
|
||||
}
|
||||
|
||||
impl DynamicColumnHandle {
|
||||
// TODO rename load
|
||||
pub fn open(&self) -> io::Result<DynamicColumn> {
|
||||
let column_bytes: OwnedBytes = self.file_slice.read_bytes()?;
|
||||
self.open_internal(column_bytes)
|
||||
}
|
||||
|
||||
// TODO rename load_async
|
||||
pub async fn open_async(&self) -> io::Result<DynamicColumn> {
|
||||
let column_bytes: OwnedBytes = self.file_slice.read_bytes_async().await?;
|
||||
self.open_internal(column_bytes)
|
||||
@@ -81,6 +84,9 @@ impl DynamicColumnHandle {
|
||||
}
|
||||
},
|
||||
ColumnType::Bool => crate::column::open_column_u64::<bool>(column_bytes)?.into(),
|
||||
ColumnType::DateTime => {
|
||||
crate::column::open_column_u64::<crate::DateTime>(column_bytes)?.into()
|
||||
}
|
||||
};
|
||||
Ok(dynamic_column)
|
||||
}
|
||||
|
||||
@@ -18,16 +18,18 @@ mod dynamic_column;
|
||||
pub(crate) mod utils;
|
||||
mod value;
|
||||
|
||||
pub use columnar::{ColumnarReader, ColumnarWriter};
|
||||
pub use column::Column;
|
||||
pub use column_values::ColumnValues;
|
||||
pub use columnar::{ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType};
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
|
||||
// pub use self::dynamic_column::DynamicColumnHandle;
|
||||
pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
|
||||
|
||||
pub type RowId = u32;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, PartialOrd, PartialEq, Default)]
|
||||
pub struct DateTime {
|
||||
timestamp_micros: i64,
|
||||
pub timestamp_micros: i64,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::InvalidData;
|
||||
use crate::{Column, ColumnType, InvalidData};
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub enum NumericalValue {
|
||||
|
||||
@@ -14,6 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
[dependencies]
|
||||
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
|
||||
columnar = { version= "0.1", path="../columnar", package="tantivy-columnar" }
|
||||
prettytable-rs = {version="0.10.0", optional= true}
|
||||
rand = {version="0.8.3", optional= true}
|
||||
fastdivide = "0.4"
|
||||
|
||||
@@ -2,81 +2,11 @@ use std::fmt::{self, Debug};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
|
||||
pub use columnar::ColumnValues as Column;
|
||||
use tantivy_bitpacker::minmax;
|
||||
|
||||
use crate::monotonic_mapping::StrictlyMonotonicFn;
|
||||
|
||||
/// `Column` provides columnar access on a field.
|
||||
pub trait Column<T: PartialOrd + Debug = u64>: Send + Sync {
|
||||
/// Return the value associated with the given idx.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `idx` is greater than the column length.
|
||||
fn get_val(&self, idx: u32) -> T;
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Must panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
#[inline]
|
||||
fn get_range(&self, start: u64, output: &mut [T]) {
|
||||
for (out, idx) in output.iter_mut().zip(start..) {
|
||||
*out = self.get_val(idx as u32);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the positions of values which are in the provided value range.
|
||||
///
|
||||
/// Note that position == docid for single value fast fields
|
||||
#[inline]
|
||||
fn get_docids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
||||
|
||||
for idx in doc_id_range.start..doc_id_range.end {
|
||||
let val = self.get_val(idx);
|
||||
if value_range.contains(&val) {
|
||||
positions.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// This min_value may not be exact.
|
||||
/// For instance, the min value does not take in account of possible
|
||||
/// deleted document. All values are however guaranteed to be higher than
|
||||
/// `.min_value()`.
|
||||
fn min_value(&self) -> T;
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// This max_value may not be exact.
|
||||
/// For instance, the max value does not take in account of possible
|
||||
/// deleted document. All values are however guaranteed to be higher than
|
||||
/// `.max_value()`.
|
||||
fn max_value(&self) -> T;
|
||||
|
||||
/// The number of values in the column.
|
||||
fn num_vals(&self) -> u32;
|
||||
|
||||
/// Returns a iterator over the data
|
||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
|
||||
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
|
||||
}
|
||||
}
|
||||
|
||||
/// VecColumn provides `Column` over a slice.
|
||||
pub struct VecColumn<'a, T = u64> {
|
||||
values: &'a [T],
|
||||
@@ -84,32 +14,6 @@ pub struct VecColumn<'a, T = u64> {
|
||||
max_value: T,
|
||||
}
|
||||
|
||||
impl<'a, C: Column<T>, T: Copy + PartialOrd + fmt::Debug> Column<T> for &'a C {
|
||||
fn get_val(&self, idx: u32) -> T {
|
||||
(*self).get_val(idx)
|
||||
}
|
||||
|
||||
fn min_value(&self) -> T {
|
||||
(*self).min_value()
|
||||
}
|
||||
|
||||
fn max_value(&self) -> T {
|
||||
(*self).max_value()
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u32 {
|
||||
(*self).num_vals()
|
||||
}
|
||||
|
||||
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
|
||||
(*self).iter()
|
||||
}
|
||||
|
||||
fn get_range(&self, start: u64, output: &mut [T]) {
|
||||
(*self).get_range(start, output)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> Column<T> for VecColumn<'a, T> {
|
||||
fn get_val(&self, position: u32) -> T {
|
||||
self.values[position as usize]
|
||||
|
||||
@@ -12,10 +12,10 @@
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use columnar::{DynamicColumn, HasAssociatedColumnType};
|
||||
use fastfield_codecs::Column;
|
||||
|
||||
use crate::collector::{Collector, SegmentCollector};
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::Field;
|
||||
use crate::{Score, SegmentReader, TantivyError};
|
||||
|
||||
@@ -61,7 +61,7 @@ use crate::{Score, SegmentReader, TantivyError};
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: FastValue>
|
||||
pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: Default>
|
||||
where TPredicate: 'static + Clone
|
||||
{
|
||||
field: Field,
|
||||
@@ -70,7 +70,7 @@ where TPredicate: 'static + Clone
|
||||
t_predicate_value: PhantomData<TPredicateValue>,
|
||||
}
|
||||
|
||||
impl<TCollector, TPredicate, TPredicateValue: FastValue>
|
||||
impl<TCollector, TPredicate, TPredicateValue: Default>
|
||||
FilterCollector<TCollector, TPredicate, TPredicateValue>
|
||||
where
|
||||
TCollector: Collector + Send + Sync,
|
||||
@@ -91,12 +91,13 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<TCollector, TPredicate, TPredicateValue: FastValue> Collector
|
||||
impl<TCollector, TPredicate, TPredicateValue: Default> Collector
|
||||
for FilterCollector<TCollector, TPredicate, TPredicateValue>
|
||||
where
|
||||
TCollector: Collector + Send + Sync,
|
||||
TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync + Clone,
|
||||
TPredicateValue: FastValue,
|
||||
TPredicateValue: HasAssociatedColumnType,
|
||||
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
|
||||
{
|
||||
// That's the type of our result.
|
||||
// Our standard deviation will be a float.
|
||||
@@ -117,20 +118,10 @@ where
|
||||
field_entry.name()
|
||||
)));
|
||||
}
|
||||
let requested_type = TPredicateValue::to_type();
|
||||
let field_schema_type = field_entry.field_type().value_type();
|
||||
if requested_type != field_schema_type {
|
||||
return Err(TantivyError::SchemaError(format!(
|
||||
"Field {:?} is of type {:?}!={:?}",
|
||||
field_entry.name(),
|
||||
requested_type,
|
||||
field_schema_type
|
||||
)));
|
||||
}
|
||||
|
||||
let fast_field_reader = segment_reader
|
||||
.fast_fields()
|
||||
.typed_fast_field_reader(schema.get_field_name(self.field))?;
|
||||
.typed_column_first_or_default(schema.get_field_name(self.field))?;
|
||||
|
||||
let segment_collector = self
|
||||
.collector
|
||||
@@ -159,7 +150,7 @@ where
|
||||
pub struct FilterSegmentCollector<TSegmentCollector, TPredicate, TPredicateValue>
|
||||
where
|
||||
TPredicate: 'static,
|
||||
TPredicateValue: FastValue,
|
||||
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
|
||||
{
|
||||
fast_field_reader: Arc<dyn Column<TPredicateValue>>,
|
||||
segment_collector: TSegmentCollector,
|
||||
@@ -171,8 +162,9 @@ impl<TSegmentCollector, TPredicate, TPredicateValue> SegmentCollector
|
||||
for FilterSegmentCollector<TSegmentCollector, TPredicate, TPredicateValue>
|
||||
where
|
||||
TSegmentCollector: SegmentCollector,
|
||||
TPredicateValue: HasAssociatedColumnType,
|
||||
TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync,
|
||||
TPredicateValue: FastValue,
|
||||
DynamicColumn: Into<Option<columnar::Column<TPredicateValue>>>,
|
||||
{
|
||||
type Fruit = TSegmentCollector::Fruit;
|
||||
|
||||
|
||||
@@ -104,7 +104,6 @@ pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer};
|
||||
|
||||
mod tweak_score_top_collector;
|
||||
pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker};
|
||||
|
||||
// mod facet_collector;
|
||||
// pub use self::facet_collector::{FacetCollector, FacetCounts};
|
||||
use crate::query::Weight;
|
||||
|
||||
@@ -57,9 +57,10 @@ pub fn test_filter_collector() -> crate::Result<()> {
|
||||
|
||||
assert_eq!(filtered_top_docs.len(), 0);
|
||||
|
||||
fn date_filter(value: DateTime) -> bool {
|
||||
(value.into_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
|
||||
.whole_weeks()
|
||||
fn date_filter(value: columnar::DateTime) -> bool {
|
||||
(crate::DateTime::from(value).into_utc()
|
||||
- OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
|
||||
.whole_weeks()
|
||||
> 0
|
||||
}
|
||||
|
||||
@@ -164,7 +165,9 @@ pub struct FastFieldSegmentCollector {
|
||||
|
||||
impl FastFieldTestCollector {
|
||||
pub fn for_field(field: impl ToString) -> FastFieldTestCollector {
|
||||
FastFieldTestCollector { field: field.to_string() }
|
||||
FastFieldTestCollector {
|
||||
field: field.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,10 +154,11 @@ impl CustomScorer<u64> for ScorerByField {
|
||||
// mapping is monotonic, so it is sufficient to compute our top-K docs.
|
||||
//
|
||||
// The conversion will then happen only on the top-K docs.
|
||||
let ff_reader = segment_reader
|
||||
.fast_fields()
|
||||
.typed_fast_field_reader(segment_reader.schema().get_field_name(self.field))?;
|
||||
Ok(ScorerByFastFieldReader { ff_reader })
|
||||
todo!();
|
||||
// let ff_reader = segment_reader
|
||||
// .fast_fields()
|
||||
// .typed_column(&self.field)?;
|
||||
// Ok(ScorerByFastFieldReader { ff_reader })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -153,8 +153,7 @@ impl SegmentReader {
|
||||
let schema = segment.schema();
|
||||
|
||||
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
|
||||
let fast_fields_readers =
|
||||
Arc::new(FastFieldReaders::open(fast_fields_data)?);
|
||||
let fast_fields_readers = Arc::new(FastFieldReaders::open(fast_fields_data)?);
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveB
|
||||
// pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
// pub use self::facet_reader::FacetReader;
|
||||
|
||||
pub use self::readers::FastFieldReaders;
|
||||
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
||||
use self::writer::unexpected_value;
|
||||
@@ -171,9 +170,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{
|
||||
Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT,
|
||||
};
|
||||
use crate::schema::{Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
|
||||
|
||||
@@ -184,7 +181,6 @@ mod tests {
|
||||
});
|
||||
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
|
||||
|
||||
|
||||
#[test]
|
||||
pub fn test_convert_i64_u64() {
|
||||
let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH);
|
||||
@@ -207,27 +203,25 @@ mod tests {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>2u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 164);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
// let column = fast_field_readers.u64("field").unwrap();
|
||||
// assert_eq!(column.get_val(0), 13u64);
|
||||
// assert_eq!(column.get_val(1), 14u64);
|
||||
// assert_eq!(column.get_val(2), 2u64);
|
||||
let column = fast_field_readers.u64("field").unwrap();
|
||||
assert_eq!(column.get_val(0), 13u64);
|
||||
assert_eq!(column.get_val(1), 14u64);
|
||||
assert_eq!(column.get_val(2), 2u64);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intfastfield_large() -> crate::Result<()> {
|
||||
fn test_intfastfield_large() {
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>4u64))
|
||||
@@ -256,36 +250,28 @@ mod tests {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>215u64))
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None)?;
|
||||
write.terminate()?;
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path)?;
|
||||
assert_eq!(file.len(), 62);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite
|
||||
.open_read(*FIELD)
|
||||
.unwrap()
|
||||
.read_bytes()?;
|
||||
let fast_field_reader = open::<u64>(data)?;
|
||||
assert_eq!(fast_field_reader.get_val(0), 4u64);
|
||||
assert_eq!(fast_field_reader.get_val(1), 14_082_001u64);
|
||||
assert_eq!(fast_field_reader.get_val(2), 3_052u64);
|
||||
assert_eq!(fast_field_reader.get_val(3), 9002u64);
|
||||
assert_eq!(fast_field_reader.get_val(4), 15_001u64);
|
||||
assert_eq!(fast_field_reader.get_val(5), 777u64);
|
||||
assert_eq!(fast_field_reader.get_val(6), 1_002u64);
|
||||
assert_eq!(fast_field_reader.get_val(7), 1_501u64);
|
||||
assert_eq!(fast_field_reader.get_val(8), 215u64);
|
||||
}
|
||||
Ok(())
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 192);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let col = fast_field_readers.u64("field").unwrap();
|
||||
assert_eq!(col.get_val(0), 4u64);
|
||||
assert_eq!(col.get_val(1), 14_082_001u64);
|
||||
assert_eq!(col.get_val(2), 3_052u64);
|
||||
assert_eq!(col.get_val(3), 9002u64);
|
||||
assert_eq!(col.get_val(4), 15_001u64);
|
||||
assert_eq!(col.get_val(5), 777u64);
|
||||
assert_eq!(col.get_val(6), 1_002u64);
|
||||
assert_eq!(col.get_val(7), 1_501u64);
|
||||
assert_eq!(col.get_val(8), 215u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intfastfield_null_amplitude() -> crate::Result<()> {
|
||||
fn test_intfastfield_null_amplitude() {
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
@@ -294,29 +280,20 @@ mod tests {
|
||||
.add_document(&doc!(*FIELD=>100_000u64))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 35);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||
let data = fast_fields_composite
|
||||
.open_read(*FIELD)
|
||||
.unwrap()
|
||||
.read_bytes()?;
|
||||
let fast_field_reader = open::<u64>(data)?;
|
||||
for doc in 0..10_000 {
|
||||
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
||||
}
|
||||
assert_eq!(file.len(), 165);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_reader = fast_field_readers.u64("field").unwrap();
|
||||
for doc in 0..10_000 {
|
||||
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intfastfield_large_numbers() -> crate::Result<()> {
|
||||
fn test_intfastfield_large_numbers() {
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
|
||||
@@ -327,34 +304,23 @@ mod tests {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>0u64))
|
||||
.unwrap();
|
||||
for doc_id in 1u64..10_001u64 {
|
||||
for doc_id in 1u64..10_000u64 {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 80049);
|
||||
assert_eq!(file.len(), 80173);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite
|
||||
.open_read(*FIELD)
|
||||
.unwrap()
|
||||
.read_bytes()?;
|
||||
let fast_field_reader = open::<u64>(data)?;
|
||||
assert_eq!(fast_field_reader.get_val(0), 0u64);
|
||||
for doc in 1..10_001 {
|
||||
assert_eq!(
|
||||
fast_field_reader.get_val(doc),
|
||||
5_000_000_000_000_000_000u64 + doc as u64 - 1u64
|
||||
);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let col = fast_field_readers.u64("field").unwrap();
|
||||
for doc in 1..10_000 {
|
||||
assert_eq!(col.get_val(doc), 5_000_000_000_000_000_000u64 + doc as u64);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -373,29 +339,22 @@ mod tests {
|
||||
doc.add_i64(i64_field, i);
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 49_usize);
|
||||
assert_eq!(file.len(), 179_usize);
|
||||
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite
|
||||
.open_read(i64_field)
|
||||
.unwrap()
|
||||
.read_bytes()?;
|
||||
let fast_field_reader = open::<i64>(data)?;
|
||||
|
||||
assert_eq!(fast_field_reader.min_value(), -100i64);
|
||||
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let col = fast_field_readers.i64("field").unwrap();
|
||||
assert_eq!(col.min_value(), -100i64);
|
||||
assert_eq!(col.max_value(), 9_999i64);
|
||||
for (doc, i) in (-100i64..10_000i64).enumerate() {
|
||||
assert_eq!(fast_field_reader.get_val(doc as u32), i);
|
||||
assert_eq!(col.get_val(doc as u32), i);
|
||||
}
|
||||
let mut buffer = vec![0i64; 100];
|
||||
fast_field_reader.get_range(53, &mut buffer[..]);
|
||||
col.get_range(53, &mut buffer[..]);
|
||||
for i in 0..100 {
|
||||
assert_eq!(buffer[i], -100i64 + 53i64 + i as i64);
|
||||
}
|
||||
@@ -533,105 +492,103 @@ mod tests {
|
||||
// all
|
||||
// }
|
||||
|
||||
/*
|
||||
#[test]
|
||||
fn test_text_fastfield() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT | FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
// first segment
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "BBBBB AAAAA", // term_ord 1,2
|
||||
))?;
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA", // term_ord 0
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA BBBBB", // term_ord 0
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
||||
))?;
|
||||
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
get_vals_for_docs(&text_fast_field, 0..5),
|
||||
vec![1, 0, 0, 0, 1, 2]
|
||||
);
|
||||
|
||||
let mut out = vec![];
|
||||
text_fast_field.get_vals(3, &mut out);
|
||||
assert_eq!(out, vec![0, 1]);
|
||||
|
||||
let inverted_index = segment_reader.inverted_index(text_field)?;
|
||||
assert_eq!(inverted_index.terms().num_terms(), 3);
|
||||
let mut bytes = vec![];
|
||||
assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
|
||||
// default tokenizer applies lower case
|
||||
assert_eq!(bytes, "aaaaa".as_bytes());
|
||||
}
|
||||
|
||||
{
|
||||
// second segment
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA", // term_ord 0
|
||||
))?;
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "CCCCC AAAAA", // term_ord 1, after merge 2
|
||||
))?;
|
||||
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 2);
|
||||
let segment_reader = searcher.segment_reader(1);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
|
||||
assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]);
|
||||
}
|
||||
// Merging the segments
|
||||
{
|
||||
let segment_ids = index.searchable_segment_ids()?;
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.merge(&segment_ids).wait()?;
|
||||
index_writer.wait_merging_threads()?;
|
||||
}
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
get_vals_for_docs(&text_fast_field, 0..8),
|
||||
vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
*/
|
||||
// #[test]
|
||||
// fn test_text_fastfield() -> crate::Result<()> {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let text_field = schema_builder.add_text_field("text", TEXT | FAST);
|
||||
// let schema = schema_builder.build();
|
||||
// let index = Index::create_in_ram(schema);
|
||||
//
|
||||
// {
|
||||
// first segment
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
// index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "BBBBB AAAAA", // term_ord 1,2
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA", // term_ord 0
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA BBBBB", // term_ord 0
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
||||
// ))?;
|
||||
//
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
//
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
//
|
||||
// assert_eq!(
|
||||
// get_vals_for_docs(&text_fast_field, 0..5),
|
||||
// vec![1, 0, 0, 0, 1, 2]
|
||||
// );
|
||||
//
|
||||
// let mut out = vec![];
|
||||
// text_fast_field.get_vals(3, &mut out);
|
||||
// assert_eq!(out, vec![0, 1]);
|
||||
//
|
||||
// let inverted_index = segment_reader.inverted_index(text_field)?;
|
||||
// assert_eq!(inverted_index.terms().num_terms(), 3);
|
||||
// let mut bytes = vec![];
|
||||
// assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
|
||||
// default tokenizer applies lower case
|
||||
// assert_eq!(bytes, "aaaaa".as_bytes());
|
||||
// }
|
||||
//
|
||||
// {
|
||||
// second segment
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
//
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA", // term_ord 0
|
||||
// ))?;
|
||||
//
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "CCCCC AAAAA", // term_ord 1, after merge 2
|
||||
// ))?;
|
||||
//
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
//
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 2);
|
||||
// let segment_reader = searcher.segment_reader(1);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
//
|
||||
// assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]);
|
||||
// }
|
||||
// Merging the segments
|
||||
// {
|
||||
// let segment_ids = index.searchable_segment_ids()?;
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
// index_writer.merge(&segment_ids).wait()?;
|
||||
// index_writer.wait_merging_threads()?;
|
||||
// }
|
||||
//
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
//
|
||||
// assert_eq!(
|
||||
// get_vals_for_docs(&text_fast_field, 0..8),
|
||||
// vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0]
|
||||
// );
|
||||
//
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_string_fastfield() -> crate::Result<()> {
|
||||
@@ -661,7 +618,6 @@ mod tests {
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
|
||||
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
@@ -693,7 +649,6 @@ mod tests {
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
|
||||
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 2);
|
||||
@@ -816,9 +771,7 @@ mod tests {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
@@ -853,9 +806,7 @@ mod tests {
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
@@ -898,10 +849,7 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_index(
|
||||
docs: &[crate::Document],
|
||||
schema: &Schema,
|
||||
) -> crate::Result<RamDirectory> {
|
||||
fn get_index(docs: &[crate::Document], schema: &Schema) -> crate::Result<RamDirectory> {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
@@ -909,9 +857,7 @@ mod tests {
|
||||
for (doc_id, doc) in docs.into_iter().enumerate() {
|
||||
fast_field_writers.add_document(doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
Ok(directory)
|
||||
@@ -942,9 +888,7 @@ mod tests {
|
||||
})
|
||||
.take(1_000)
|
||||
.collect();
|
||||
let date_options = DateOptions::default()
|
||||
.set_fast()
|
||||
.set_precision(precision);
|
||||
let date_options = DateOptions::default().set_fast().set_precision(precision);
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_date_field("field", date_options);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
@@ -2,7 +2,9 @@ use std::io;
|
||||
use std::net::Ipv6Addr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use columnar::ColumnarReader;
|
||||
use columnar::{
|
||||
ColumnType, ColumnValues, ColumnarReader, DynamicColumn, HasAssociatedColumnType, NumericalType,
|
||||
};
|
||||
use fastfield_codecs::{open, open_u128, Column};
|
||||
|
||||
use crate::directory::{CompositeFile, FileSlice};
|
||||
@@ -19,73 +21,52 @@ use crate::{DateTime, TantivyError};
|
||||
pub struct FastFieldReaders {
|
||||
columnar: Arc<ColumnarReader>,
|
||||
}
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub(crate) enum FastType {
|
||||
I64,
|
||||
U64,
|
||||
U128,
|
||||
F64,
|
||||
Bool,
|
||||
Date,
|
||||
}
|
||||
|
||||
pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<FastType> {
|
||||
todo!();
|
||||
// match field_type {
|
||||
// FieldType::U64(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::U64, cardinality)),
|
||||
// FieldType::I64(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::I64, cardinality)),
|
||||
// FieldType::F64(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::F64, cardinality)),
|
||||
// FieldType::Bool(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::Bool, cardinality)),
|
||||
// FieldType::Date(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::Date, cardinality)),
|
||||
// FieldType::Facet(_) => Some((FastType::U64, Cardinality::MultiValues)),
|
||||
// FieldType::Str(options) if options.is_fast() => {
|
||||
// Some((FastType::U64, Cardinality::MultiValues))
|
||||
// }
|
||||
// FieldType::IpAddr(options) => options
|
||||
// .get_fastfield_cardinality()
|
||||
// .map(|cardinality| (FastType::U128, cardinality)),
|
||||
// _ => None,
|
||||
// }
|
||||
}
|
||||
|
||||
impl FastFieldReaders {
|
||||
pub(crate) fn open(fast_field_file: FileSlice) -> io::Result<FastFieldReaders> {
|
||||
let columnar = Arc::new(ColumnarReader::open(fast_field_file)?);
|
||||
Ok(FastFieldReaders {
|
||||
columnar,
|
||||
})
|
||||
Ok(FastFieldReaders { columnar })
|
||||
}
|
||||
|
||||
pub(crate) fn space_usage(&self) -> PerFieldSpaceUsage {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn column(&self, column_name: &str) {
|
||||
todo!()
|
||||
// TODO make opt
|
||||
pub fn typed_column<T>(&self, field: &str) -> crate::Result<columnar::Column<T>>
|
||||
where
|
||||
T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static,
|
||||
DynamicColumn: Into<Option<columnar::Column<T>>>,
|
||||
{
|
||||
let column_type = T::column_type();
|
||||
let Some(dynamic_column_handle) = self.columnar.read_columns(field)?
|
||||
.into_iter()
|
||||
.filter(|column| column.column_type() == column_type)
|
||||
.next() else {
|
||||
// TODO Option would make more sense.
|
||||
return Err(crate::TantivyError::SchemaError(format!("No fast field of with this name")));
|
||||
};
|
||||
let dynamic_column = dynamic_column_handle.open()?;
|
||||
let col: columnar::Column<T> = dynamic_column
|
||||
.into()
|
||||
.ok_or_else(|| crate::TantivyError::SchemaError(format!("Invalid type")))?;
|
||||
Ok(col)
|
||||
}
|
||||
|
||||
pub(crate) fn typed_fast_field_reader<TFastValue: FastValue>(
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<Arc<dyn Column<TFastValue>>> {
|
||||
todo!();
|
||||
pub fn typed_column_first_or_default<T>(&self, field: &str) -> crate::Result<Arc<dyn Column<T>>>
|
||||
where
|
||||
T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static,
|
||||
DynamicColumn: Into<Option<columnar::Column<T>>>,
|
||||
{
|
||||
let col = self.typed_column(field)?;
|
||||
Ok(col.first_or_default_col(T::default()))
|
||||
}
|
||||
|
||||
/// Returns the `u64` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a u64 fast field, this method returns an Error.
|
||||
pub fn u64(&self, field: &str) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||
todo!();
|
||||
pub fn u64(&self, field: &str) -> crate::Result<Arc<dyn ColumnValues<u64>>> {
|
||||
self.typed_column_first_or_default(field)
|
||||
}
|
||||
|
||||
/// Returns the `ip` fast field reader reader associated to `field`.
|
||||
@@ -111,14 +92,15 @@ impl FastFieldReaders {
|
||||
/// If not, the fastfield reader will returns the u64-value associated with the original
|
||||
/// FastValue.
|
||||
pub fn u64_lenient(&self, field_name: &str) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||
self.typed_fast_field_reader(field_name)
|
||||
todo!();
|
||||
// self.typed_fast_field_reader(field_name)
|
||||
}
|
||||
|
||||
/// Returns the `i64` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a i64 fast field, this method returns an Error.
|
||||
pub fn i64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<i64>>> {
|
||||
todo!()
|
||||
self.typed_column_first_or_default(field_name)
|
||||
}
|
||||
|
||||
/// Returns the `date` fast field reader reader associated with `field`.
|
||||
@@ -126,41 +108,42 @@ impl FastFieldReaders {
|
||||
/// If `field` is not a date fast field, this method returns an Error.
|
||||
pub fn date(&self, field_name: &str) -> crate::Result<Arc<dyn Column<DateTime>>> {
|
||||
todo!()
|
||||
// self.numerical_column(field_name)
|
||||
}
|
||||
|
||||
/// Returns the `f64` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a f64 fast field, this method returns an Error.
|
||||
pub fn f64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<f64>>> {
|
||||
todo!();
|
||||
self.typed_column_first_or_default(field_name)
|
||||
}
|
||||
|
||||
/// Returns the `bool` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a bool fast field, this method returns an Error.
|
||||
pub fn bool(&self, field_name: &str) -> crate::Result<Arc<dyn Column<bool>>> {
|
||||
todo!()
|
||||
self.typed_column_first_or_default(field_name)
|
||||
}
|
||||
|
||||
// Returns the `bytes` fast field reader associated with `field`.
|
||||
//
|
||||
// If `field` is not a bytes fast field, returns an Error.
|
||||
// pub fn bytes(&self, field: Field) -> crate::Result<BytesFastFieldReader> {
|
||||
// let field_entry = self.schema.get_field_entry(field);
|
||||
// if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
|
||||
// if !bytes_option.is_fast() {
|
||||
// return Err(crate::TantivyError::SchemaError(format!(
|
||||
// "Field {:?} is not a fast field.",
|
||||
// field_entry.name()
|
||||
// )));
|
||||
// }
|
||||
// let fast_field_idx_file = self.fast_field_data(field, 0)?;
|
||||
// let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
||||
// let idx_reader = open(fast_field_idx_bytes)?;
|
||||
// let data = self.fast_field_data(field, 1)?;
|
||||
// BytesFastFieldReader::open(idx_reader, data)
|
||||
// } else {
|
||||
// Err(FastFieldNotAvailableError::new(field_entry).into())
|
||||
// }
|
||||
// let field_entry = self.schema.get_field_entry(field);
|
||||
// if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
|
||||
// if !bytes_option.is_fast() {
|
||||
// return Err(crate::TantivyError::SchemaError(format!(
|
||||
// "Field {:?} is not a fast field.",
|
||||
// field_entry.name()
|
||||
// )));
|
||||
// }
|
||||
// let fast_field_idx_file = self.fast_field_data(field, 0)?;
|
||||
// let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
||||
// let idx_reader = open(fast_field_idx_bytes)?;
|
||||
// let data = self.fast_field_data(field, 1)?;
|
||||
// BytesFastFieldReader::open(idx_reader, data)
|
||||
// } else {
|
||||
// Err(FastFieldNotAvailableError::new(field_entry).into())
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use super::FastFieldType;
|
||||
use crate::fastfield::{CompositeFastFieldSerializer};
|
||||
use columnar::{ColumnarWriter, NumericalType, NumericalValue};
|
||||
use common;
|
||||
use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tantivy_bitpacker::BlockedBitpacker;
|
||||
|
||||
use super::FastFieldType;
|
||||
use crate::fastfield::CompositeFastFieldSerializer;
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Value};
|
||||
@@ -57,48 +57,46 @@ fn fast_numerical_type(field_type: &FieldType) -> Option<FastFieldTyp> {
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::I64(numerical_option) => {
|
||||
if numerical_option.is_fast() {
|
||||
Some(FastFieldTyp::Numerical(NumericalType::I64))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::F64(numerical_option) => {
|
||||
if numerical_option.is_fast() {
|
||||
Some(FastFieldTyp::Numerical(NumericalType::F64))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::Str(str_option) => {
|
||||
if str_option.is_fast() {
|
||||
Some(FastFieldTyp::Other)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::Bool(int_options) => {
|
||||
if int_options.is_fast() {
|
||||
Some(FastFieldTyp::Other)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::Date(date_options) => {
|
||||
if date_options.is_fast() {
|
||||
Some(FastFieldTyp::Other)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
FieldType::Facet(_) => todo!(),
|
||||
FieldType::Bytes(_) => todo!(),
|
||||
FieldType::JsonObject(_) => todo!(),
|
||||
FieldType::IpAddr(_) => todo!(),
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,12 +107,12 @@ impl FastFieldsWriter {
|
||||
let mut fast_fields = vec![None; schema.num_fields()];
|
||||
// TODO see other types
|
||||
for (field, field_entry) in schema.fields() {
|
||||
if let Some(fast_field_typ) =fast_numerical_type(field_entry.field_type()) {
|
||||
if let Some(fast_field_typ) = fast_numerical_type(field_entry.field_type()) {
|
||||
match fast_field_typ {
|
||||
FastFieldTyp::Numerical(numerical_type) => {
|
||||
columnar_writer.force_numerical_type(field_entry.name(), numerical_type);
|
||||
},
|
||||
FastFieldTyp::Other => {},
|
||||
}
|
||||
FastFieldTyp::Other => {}
|
||||
}
|
||||
fast_fields[field.field_id() as usize] = Some(field_entry.name().to_string());
|
||||
}
|
||||
@@ -132,20 +130,34 @@ impl FastFieldsWriter {
|
||||
}
|
||||
|
||||
/// Indexes all of the fastfields of a new document.
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
let doc_id = self.num_docs;
|
||||
for field_value in doc.field_values() {
|
||||
if let Some(field_name) = self.fast_fields[field_value.field().field_id() as usize].as_ref() {
|
||||
if let Some(field_name) =
|
||||
self.fast_fields[field_value.field().field_id() as usize].as_ref()
|
||||
{
|
||||
match &field_value.value {
|
||||
Value::U64(u64_val) => {
|
||||
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*u64_val));
|
||||
},
|
||||
self.columnar_writer.record_numerical(
|
||||
doc_id,
|
||||
field_name.as_str(),
|
||||
NumericalValue::from(*u64_val),
|
||||
);
|
||||
}
|
||||
Value::I64(i64_val) => {
|
||||
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*i64_val));
|
||||
},
|
||||
self.columnar_writer.record_numerical(
|
||||
doc_id,
|
||||
field_name.as_str(),
|
||||
NumericalValue::from(*i64_val),
|
||||
);
|
||||
}
|
||||
Value::F64(f64_val) => {
|
||||
self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*f64_val));
|
||||
},
|
||||
self.columnar_writer.record_numerical(
|
||||
doc_id,
|
||||
field_name.as_str(),
|
||||
NumericalValue::from(*f64_val),
|
||||
);
|
||||
}
|
||||
Value::Str(_) => todo!(),
|
||||
Value::PreTokStr(_) => todo!(),
|
||||
Value::Bool(_) => todo!(),
|
||||
|
||||
@@ -115,8 +115,8 @@ pub(crate) fn get_doc_id_mapping_from_field(
|
||||
) -> crate::Result<DocIdMapping> {
|
||||
todo!()
|
||||
// let schema = segment_writer.segment_serializer.segment().schema();
|
||||
// let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required
|
||||
// let fast_field = segment_writer
|
||||
// let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect
|
||||
// fastfield, but not strictly required let fast_field = segment_writer
|
||||
// .fast_field_writers
|
||||
// .get_field_writer(field_id)
|
||||
// .ok_or_else(|| {
|
||||
@@ -160,15 +160,11 @@ mod tests_indexsorting {
|
||||
|
||||
let my_text_field = schema_builder.add_text_field("text_field", text_field_options);
|
||||
let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED);
|
||||
let my_number = schema_builder.add_u64_field(
|
||||
"my_number",
|
||||
NumericOptions::default().set_fast(),
|
||||
);
|
||||
let my_number =
|
||||
schema_builder.add_u64_field("my_number", NumericOptions::default().set_fast());
|
||||
|
||||
let multi_numbers = schema_builder.add_u64_field(
|
||||
"multi_numbers",
|
||||
NumericOptions::default().set_fast(),
|
||||
);
|
||||
let multi_numbers =
|
||||
schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let mut index_builder = Index::builder().schema(schema);
|
||||
@@ -459,7 +455,6 @@ mod tests_indexsorting {
|
||||
// "my_number".to_string()
|
||||
// );
|
||||
|
||||
|
||||
// let searcher = index.reader()?.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
|
||||
@@ -813,8 +813,8 @@ mod tests {
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
|
||||
use crate::schema::{
|
||||
self, IndexRecordOption, IpAddrOptions, NumericOptions,
|
||||
TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
|
||||
self, IndexRecordOption, IpAddrOptions, NumericOptions, TextFieldIndexing, TextOptions,
|
||||
FAST, INDEXED, STORED, STRING, TEXT,
|
||||
};
|
||||
use crate::store::DOCSTORE_CACHE_CAPACITY;
|
||||
use crate::{
|
||||
@@ -1636,7 +1636,8 @@ mod tests {
|
||||
// );
|
||||
|
||||
// let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED);
|
||||
// let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | STORED);
|
||||
// let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT |
|
||||
// STORED);
|
||||
|
||||
// let multi_numbers = schema_builder.add_u64_field(
|
||||
// "multi_numbers",
|
||||
@@ -2038,8 +2039,8 @@ mod tests {
|
||||
|
||||
// // Test date
|
||||
// let term =
|
||||
// Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64));
|
||||
// assert_eq!(do_search2(term).len() as u64, 0);
|
||||
// Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as
|
||||
// i64)); assert_eq!(do_search2(term).len() as u64, 0);
|
||||
// }
|
||||
// // search ip address
|
||||
// //
|
||||
@@ -2194,39 +2195,38 @@ mod tests {
|
||||
// proptest! {
|
||||
// #![proptest_config(ProptestConfig::with_cases(20))]
|
||||
// #[test]
|
||||
// fn test_delete_with_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
|
||||
// assert!(test_operation_strategy(&ops[..], true, false).is_ok());
|
||||
// }
|
||||
// fn test_delete_with_sort_proptest_adding(ops in
|
||||
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
|
||||
// (test_operation_strategy(&ops[..], true, false).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_without_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
|
||||
// assert!(test_operation_strategy(&ops[..], false, false).is_ok());
|
||||
// }
|
||||
// fn test_delete_without_sort_proptest_adding(ops in
|
||||
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
|
||||
// (test_operation_strategy(&ops[..], false, false).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_with_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
|
||||
// assert!(test_operation_strategy(&ops[..], true, true).is_ok());
|
||||
// }
|
||||
// fn test_delete_with_sort_proptest_with_merge_adding(ops in
|
||||
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
|
||||
// (test_operation_strategy(&ops[..], true, true).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_without_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
|
||||
// assert!(test_operation_strategy(&ops[..], false, true).is_ok());
|
||||
// }
|
||||
// fn test_delete_without_sort_proptest_with_merge_adding(ops in
|
||||
// proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert!
|
||||
// (test_operation_strategy(&ops[..], false, true).is_ok()); }
|
||||
|
||||
// #[test]
|
||||
// fn test_delete_with_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
|
||||
// assert!(test_operation_strategy(&ops[..], true, false).is_ok());
|
||||
// }
|
||||
// fn test_delete_with_sort_proptest(ops in
|
||||
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
|
||||
// (test_operation_strategy(&ops[..], true, false).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
|
||||
// assert!(test_operation_strategy(&ops[..], false, false).is_ok());
|
||||
// }
|
||||
// fn test_delete_without_sort_proptest(ops in
|
||||
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
|
||||
// (test_operation_strategy(&ops[..], false, false).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
|
||||
// assert!(test_operation_strategy(&ops[..], true, true).is_ok());
|
||||
// }
|
||||
// fn test_delete_with_sort_proptest_with_merge(ops in
|
||||
// proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert!
|
||||
// (test_operation_strategy(&ops[..], true, true).is_ok()); }
|
||||
// #[test]
|
||||
// fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) {
|
||||
// assert!(test_operation_strategy(&ops[..], false, true).is_ok());
|
||||
// }
|
||||
|
||||
// fn test_delete_without_sort_proptest_with_merge(ops in
|
||||
// proptest::collection::vec(balanced_operation_strategy(), 1..100)) { assert!
|
||||
// (test_operation_strategy(&ops[..], false, true).is_ok()); }
|
||||
|
||||
// }
|
||||
|
||||
|
||||
@@ -12,12 +12,9 @@ use crate::core::{Segment, SegmentReader};
|
||||
use crate::directory::WritePtr;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{
|
||||
AliveBitSet, Column, CompositeFastFieldSerializer,
|
||||
};
|
||||
use crate::fastfield::{AliveBitSet, Column, CompositeFastFieldSerializer};
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
||||
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
|
||||
use crate::indexer::sorted_doc_id_column::RemappedDocIdColumn;
|
||||
// use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn;
|
||||
use crate::indexer::SegmentSerializer;
|
||||
use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
|
||||
@@ -255,60 +252,57 @@ impl IndexMerger {
|
||||
) -> crate::Result<()> {
|
||||
debug_time!("wrie-fast-fields");
|
||||
todo!();
|
||||
/*
|
||||
|
||||
for (field, field_entry) in self.schema.fields() {
|
||||
let field_type = field_entry.field_type();
|
||||
match field_type {
|
||||
FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => {
|
||||
let term_ordinal_mapping = term_ord_mappings.remove(&field).expect(
|
||||
"Logic Error in Tantivy (Please report). Facet field should have required \
|
||||
a`term_ordinal_mapping`.",
|
||||
);
|
||||
self.write_term_id_fast_field(
|
||||
field,
|
||||
&term_ordinal_mapping,
|
||||
fast_field_serializer,
|
||||
doc_id_mapping,
|
||||
)?;
|
||||
}
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Bool(ref options) => {
|
||||
todo!()
|
||||
}
|
||||
FieldType::Date(ref options) => {
|
||||
if options.is_fast() {
|
||||
todo!();
|
||||
}
|
||||
// Some(Cardinality::SingleValue) => {
|
||||
// self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
// }
|
||||
// Some(Cardinality::MultiValues) => {
|
||||
// self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
// }
|
||||
// None => {}
|
||||
},
|
||||
FieldType::Bytes(byte_options) => {
|
||||
if byte_options.is_fast() {
|
||||
self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
}
|
||||
}
|
||||
FieldType::IpAddr(options) => {
|
||||
if options.is_fast() {
|
||||
todo!();
|
||||
}
|
||||
},
|
||||
|
||||
FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => {
|
||||
// We don't handle json fast field for the moment
|
||||
// They can be implemented using what is done
|
||||
// for facets in the future
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
// for (field, field_entry) in self.schema.fields() {
|
||||
// let field_type = field_entry.field_type();
|
||||
// match field_type {
|
||||
// FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => {
|
||||
// let term_ordinal_mapping = term_ord_mappings.remove(&field).expect(
|
||||
// "Logic Error in Tantivy (Please report). Facet field should have required \
|
||||
// a`term_ordinal_mapping`.",
|
||||
// );
|
||||
// self.write_term_id_fast_field(
|
||||
// field,
|
||||
// &term_ordinal_mapping,
|
||||
// fast_field_serializer,
|
||||
// doc_id_mapping,
|
||||
// )?;
|
||||
// }
|
||||
// FieldType::U64(ref options)
|
||||
// | FieldType::I64(ref options)
|
||||
// | FieldType::F64(ref options)
|
||||
// | FieldType::Bool(ref options) => {
|
||||
// todo!()
|
||||
// }
|
||||
// FieldType::Date(ref options) => {
|
||||
// if options.is_fast() {
|
||||
// todo!();
|
||||
// }
|
||||
// Some(Cardinality::SingleValue) => {
|
||||
// self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
// }
|
||||
// Some(Cardinality::MultiValues) => {
|
||||
// self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
// }
|
||||
// None => {}
|
||||
// },
|
||||
// FieldType::Bytes(byte_options) => {
|
||||
// if byte_options.is_fast() {
|
||||
// self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
// }
|
||||
// }
|
||||
// FieldType::IpAddr(options) => {
|
||||
// if options.is_fast() {
|
||||
// todo!();
|
||||
// }
|
||||
// },
|
||||
//
|
||||
// FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => {
|
||||
// We don't handle json fast field for the moment
|
||||
// They can be implemented using what is done
|
||||
// for facets in the future
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -356,12 +350,13 @@ impl IndexMerger {
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
let fast_field_accessor = RemappedDocIdColumn::new(
|
||||
&self.readers,
|
||||
doc_id_mapping,
|
||||
self.schema.get_field_name(field),
|
||||
);
|
||||
fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
|
||||
todo!();
|
||||
// let fast_field_accessor = RemappedDocIdColumn::new(
|
||||
// &self.readers,
|
||||
// doc_id_mapping,
|
||||
// self.schema.get_field_name(field),
|
||||
// );
|
||||
// fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -817,15 +812,13 @@ mod tests {
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use schema::FAST;
|
||||
|
||||
use crate::collector::tests::{
|
||||
FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
|
||||
};
|
||||
use crate::collector::tests::{FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE};
|
||||
use crate::collector::Count;
|
||||
use crate::core::Index;
|
||||
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
|
||||
use crate::schema::{
|
||||
Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
||||
TextFieldIndexing, INDEXED, TEXT,
|
||||
Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, TextFieldIndexing,
|
||||
INDEXED, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{
|
||||
@@ -1015,8 +1008,7 @@ mod tests {
|
||||
// }
|
||||
// scores
|
||||
// })
|
||||
searcher
|
||||
.search(&term_query, &collector)
|
||||
searcher.search(&term_query, &collector)
|
||||
};
|
||||
|
||||
let empty_vec = Vec::<u64>::new();
|
||||
@@ -1296,7 +1288,6 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
// TODO re-enable
|
||||
// #[test]
|
||||
// fn test_merge_facets_sort_none() {
|
||||
@@ -1316,8 +1307,8 @@ mod tests {
|
||||
// }),
|
||||
// true,
|
||||
// );
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// // sorted and disjunct
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data
|
||||
// is // sorted and disjunct
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
@@ -1343,8 +1334,8 @@ mod tests {
|
||||
// }),
|
||||
// true,
|
||||
// );
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// // sorted and disjunct
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data
|
||||
// is // sorted and disjunct
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
@@ -1359,8 +1350,8 @@ mod tests {
|
||||
|
||||
// force_segment_value_overlap forces the int value for sorting to have overlapping min and max
|
||||
// ranges between segments so that merge algorithm can't apply certain optimizations
|
||||
// fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
|
||||
// let mut schema_builder = schema::Schema::builder();
|
||||
// fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap:
|
||||
// bool) { let mut schema_builder = schema::Schema::builder();
|
||||
// let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
||||
// let int_options = NumericOptions::default()
|
||||
// .set_fast()
|
||||
@@ -1529,9 +1520,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let int_options = NumericOptions::default()
|
||||
.set_fast()
|
||||
.set_indexed();
|
||||
let int_options = NumericOptions::default().set_fast().set_indexed();
|
||||
let int_field = schema_builder.add_u64_field("intvals", int_options);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let reader = index.reader()?;
|
||||
@@ -1566,9 +1555,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let int_options = NumericOptions::default()
|
||||
.set_fast()
|
||||
.set_indexed();
|
||||
let int_options = NumericOptions::default().set_fast().set_indexed();
|
||||
let int_field = schema_builder.add_u64_field("intvals", int_options);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
|
||||
|
||||
@@ -12,9 +12,7 @@ mod tests {
|
||||
|
||||
fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let int_options = NumericOptions::default()
|
||||
.set_fast()
|
||||
.set_indexed();
|
||||
let int_options = NumericOptions::default().set_fast().set_indexed();
|
||||
let int_field = schema_builder.add_u64_field("intval", int_options);
|
||||
|
||||
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
||||
@@ -71,10 +69,8 @@ mod tests {
|
||||
let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
|
||||
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
||||
|
||||
let multi_numbers = schema_builder.add_u64_field(
|
||||
"multi_numbers",
|
||||
NumericOptions::default().set_fast(),
|
||||
);
|
||||
let multi_numbers =
|
||||
schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
|
||||
let text_field_options = TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
@@ -363,7 +359,6 @@ mod tests {
|
||||
// )
|
||||
// .unwrap();
|
||||
|
||||
|
||||
// let int_field = index.schema().get_field("intval").unwrap();
|
||||
// let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
||||
// let bytes_field = index.schema().get_field("bytes").unwrap();
|
||||
@@ -490,9 +485,7 @@ mod bench_sorted_index_merge {
|
||||
use crate::{IndexSettings, IndexSortByField, IndexWriter, Order};
|
||||
fn create_index(sort_by_field: Option<IndexSortByField>) -> Index {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let int_options = NumericOptions::default()
|
||||
.set_fast()
|
||||
.set_indexed();
|
||||
let int_options = NumericOptions::default().set_fast().set_indexed();
|
||||
let int_field = schema_builder.add_u64_field("intval", int_options);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ mod segment_register;
|
||||
pub mod segment_serializer;
|
||||
pub mod segment_updater;
|
||||
mod segment_writer;
|
||||
mod sorted_doc_id_column;
|
||||
// mod sorted_doc_id_column;
|
||||
// mod sorted_doc_id_multivalue_column;
|
||||
mod stamper;
|
||||
|
||||
|
||||
@@ -408,10 +408,7 @@ fn remap_and_write(
|
||||
serializer.get_postings_serializer(),
|
||||
)?;
|
||||
debug!("fastfield-serialize");
|
||||
fast_field_writers.serialize(
|
||||
serializer.get_fast_field_write(),
|
||||
doc_id_map,
|
||||
)?;
|
||||
fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
|
||||
|
||||
// finalize temp docstore and create version, which reflects the doc_id_map
|
||||
if let Some(doc_id_map) = doc_id_map {
|
||||
|
||||
@@ -147,6 +147,14 @@ pub struct DateTime {
|
||||
pub(crate) timestamp_micros: i64,
|
||||
}
|
||||
|
||||
impl From<columnar::DateTime> for DateTime {
|
||||
fn from(columnar_datetime: columnar::DateTime) -> Self {
|
||||
DateTime {
|
||||
timestamp_micros: columnar_datetime.timestamp_micros,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DateTime {
|
||||
/// Create new from UNIX timestamp in seconds
|
||||
pub const fn from_timestamp_secs(seconds: i64) -> Self {
|
||||
@@ -1166,5 +1174,4 @@ pub mod tests {
|
||||
);
|
||||
assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -15,9 +15,17 @@ use crate::indexer::{
|
||||
};
|
||||
// use crate::query::range_query::is_type_valid_for_fastfield_range_query;
|
||||
use crate::query::{
|
||||
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query,
|
||||
AllQuery,
|
||||
BooleanQuery,
|
||||
BoostQuery,
|
||||
EmptyQuery,
|
||||
FuzzyTermQuery,
|
||||
Occur,
|
||||
PhraseQuery,
|
||||
Query,
|
||||
// RangeQuery,
|
||||
TermQuery, TermSetQuery,
|
||||
TermQuery,
|
||||
TermSetQuery,
|
||||
};
|
||||
use crate::schema::{
|
||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions,
|
||||
@@ -336,91 +344,89 @@ impl QueryParser {
|
||||
phrase: &str,
|
||||
) -> Result<Term, QueryParserError> {
|
||||
todo!();
|
||||
/*
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
let field_supports_ff_range_queries = field_type.is_fast()
|
||||
&& is_type_valid_for_fastfield_range_query(field_type.value_type());
|
||||
|
||||
if !field_type.is_indexed() && !field_supports_ff_range_queries {
|
||||
return Err(QueryParserError::FieldNotIndexed(
|
||||
field_entry.name().to_string(),
|
||||
));
|
||||
}
|
||||
if !json_path.is_empty() && field_type.value_type() != Type::Json {
|
||||
return Err(QueryParserError::UnsupportedQuery(format!(
|
||||
"Json path is not supported for field {:?}",
|
||||
field_entry.name()
|
||||
)));
|
||||
}
|
||||
match *field_type {
|
||||
FieldType::U64(_) => {
|
||||
let val: u64 = u64::from_str(phrase)?;
|
||||
Ok(Term::from_field_u64(field, val))
|
||||
}
|
||||
FieldType::I64(_) => {
|
||||
let val: i64 = i64::from_str(phrase)?;
|
||||
Ok(Term::from_field_i64(field, val))
|
||||
}
|
||||
FieldType::F64(_) => {
|
||||
let val: f64 = f64::from_str(phrase)?;
|
||||
Ok(Term::from_field_f64(field, val))
|
||||
}
|
||||
FieldType::Bool(_) => {
|
||||
let val: bool = bool::from_str(phrase)?;
|
||||
Ok(Term::from_field_bool(field, val))
|
||||
}
|
||||
FieldType::Date(_) => {
|
||||
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
|
||||
Ok(Term::from_field_date(field, DateTime::from_utc(dt)))
|
||||
}
|
||||
FieldType::Str(ref str_options) => {
|
||||
let option = str_options.get_indexing_options().ok_or_else(|| {
|
||||
// This should have been seen earlier really.
|
||||
QueryParserError::FieldNotIndexed(field_entry.name().to_string())
|
||||
})?;
|
||||
let text_analyzer =
|
||||
self.tokenizer_manager
|
||||
.get(option.tokenizer())
|
||||
.ok_or_else(|| QueryParserError::UnknownTokenizer {
|
||||
field: field_entry.name().to_string(),
|
||||
tokenizer: option.tokenizer().to_string(),
|
||||
})?;
|
||||
let mut terms: Vec<Term> = Vec::new();
|
||||
let mut token_stream = text_analyzer.token_stream(phrase);
|
||||
token_stream.process(&mut |token| {
|
||||
let term = Term::from_field_text(field, &token.text);
|
||||
terms.push(term);
|
||||
});
|
||||
if terms.len() != 1 {
|
||||
return Err(QueryParserError::UnsupportedQuery(format!(
|
||||
"Range query boundary cannot have multiple tokens: {phrase:?}."
|
||||
)));
|
||||
}
|
||||
Ok(terms.into_iter().next().unwrap())
|
||||
}
|
||||
FieldType::JsonObject(_) => {
|
||||
// Json range are not supported.
|
||||
Err(QueryParserError::UnsupportedQuery(
|
||||
"Range query are not supported on json field.".to_string(),
|
||||
))
|
||||
}
|
||||
FieldType::Facet(_) => match Facet::from_text(phrase) {
|
||||
Ok(facet) => Ok(Term::from_facet(field, &facet)),
|
||||
Err(e) => Err(QueryParserError::from(e)),
|
||||
},
|
||||
FieldType::Bytes(_) => {
|
||||
let bytes = BASE64
|
||||
.decode(phrase)
|
||||
.map_err(QueryParserError::ExpectedBase64)?;
|
||||
Ok(Term::from_field_bytes(field, &bytes))
|
||||
}
|
||||
FieldType::IpAddr(_) => {
|
||||
let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
|
||||
Ok(Term::from_field_ip_addr(field, ip_v6))
|
||||
}
|
||||
}
|
||||
*/
|
||||
// let field_entry = self.schema.get_field_entry(field);
|
||||
// let field_type = field_entry.field_type();
|
||||
// let field_supports_ff_range_queries = field_type.is_fast()
|
||||
// && is_type_valid_for_fastfield_range_query(field_type.value_type());
|
||||
//
|
||||
// if !field_type.is_indexed() && !field_supports_ff_range_queries {
|
||||
// return Err(QueryParserError::FieldNotIndexed(
|
||||
// field_entry.name().to_string(),
|
||||
// ));
|
||||
// }
|
||||
// if !json_path.is_empty() && field_type.value_type() != Type::Json {
|
||||
// return Err(QueryParserError::UnsupportedQuery(format!(
|
||||
// "Json path is not supported for field {:?}",
|
||||
// field_entry.name()
|
||||
// )));
|
||||
// }
|
||||
// match *field_type {
|
||||
// FieldType::U64(_) => {
|
||||
// let val: u64 = u64::from_str(phrase)?;
|
||||
// Ok(Term::from_field_u64(field, val))
|
||||
// }
|
||||
// FieldType::I64(_) => {
|
||||
// let val: i64 = i64::from_str(phrase)?;
|
||||
// Ok(Term::from_field_i64(field, val))
|
||||
// }
|
||||
// FieldType::F64(_) => {
|
||||
// let val: f64 = f64::from_str(phrase)?;
|
||||
// Ok(Term::from_field_f64(field, val))
|
||||
// }
|
||||
// FieldType::Bool(_) => {
|
||||
// let val: bool = bool::from_str(phrase)?;
|
||||
// Ok(Term::from_field_bool(field, val))
|
||||
// }
|
||||
// FieldType::Date(_) => {
|
||||
// let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
|
||||
// Ok(Term::from_field_date(field, DateTime::from_utc(dt)))
|
||||
// }
|
||||
// FieldType::Str(ref str_options) => {
|
||||
// let option = str_options.get_indexing_options().ok_or_else(|| {
|
||||
// This should have been seen earlier really.
|
||||
// QueryParserError::FieldNotIndexed(field_entry.name().to_string())
|
||||
// })?;
|
||||
// let text_analyzer =
|
||||
// self.tokenizer_manager
|
||||
// .get(option.tokenizer())
|
||||
// .ok_or_else(|| QueryParserError::UnknownTokenizer {
|
||||
// field: field_entry.name().to_string(),
|
||||
// tokenizer: option.tokenizer().to_string(),
|
||||
// })?;
|
||||
// let mut terms: Vec<Term> = Vec::new();
|
||||
// let mut token_stream = text_analyzer.token_stream(phrase);
|
||||
// token_stream.process(&mut |token| {
|
||||
// let term = Term::from_field_text(field, &token.text);
|
||||
// terms.push(term);
|
||||
// });
|
||||
// if terms.len() != 1 {
|
||||
// return Err(QueryParserError::UnsupportedQuery(format!(
|
||||
// "Range query boundary cannot have multiple tokens: {phrase:?}."
|
||||
// )));
|
||||
// }
|
||||
// Ok(terms.into_iter().next().unwrap())
|
||||
// }
|
||||
// FieldType::JsonObject(_) => {
|
||||
// Json range are not supported.
|
||||
// Err(QueryParserError::UnsupportedQuery(
|
||||
// "Range query are not supported on json field.".to_string(),
|
||||
// ))
|
||||
// }
|
||||
// FieldType::Facet(_) => match Facet::from_text(phrase) {
|
||||
// Ok(facet) => Ok(Term::from_facet(field, &facet)),
|
||||
// Err(e) => Err(QueryParserError::from(e)),
|
||||
// },
|
||||
// FieldType::Bytes(_) => {
|
||||
// let bytes = BASE64
|
||||
// .decode(phrase)
|
||||
// .map_err(QueryParserError::ExpectedBase64)?;
|
||||
// Ok(Term::from_field_bytes(field, &bytes))
|
||||
// }
|
||||
// FieldType::IpAddr(_) => {
|
||||
// let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
|
||||
// Ok(Term::from_field_ip_addr(field, ip_v6))
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
fn compute_logical_ast_for_leaf(
|
||||
@@ -744,11 +750,12 @@ fn convert_literal_to_query(
|
||||
value_type,
|
||||
lower,
|
||||
upper,
|
||||
} => { todo!();
|
||||
// Box::new(RangeQuery::new_term_bounds(
|
||||
// field, value_type, &lower, &upper,
|
||||
// ))
|
||||
} ,
|
||||
} => {
|
||||
todo!();
|
||||
// Box::new(RangeQuery::new_term_bounds(
|
||||
// field, value_type, &lower, &upper,
|
||||
// ))
|
||||
}
|
||||
LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)),
|
||||
LogicalLiteral::All => Box::new(AllQuery),
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
|
||||
use super::ip_options::IpAddrOptions;
|
||||
use super:: IntoIpv6Addr;
|
||||
use super::IntoIpv6Addr;
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
use crate::schema::{
|
||||
|
||||
@@ -87,7 +87,7 @@ impl IpAddrOptions {
|
||||
/// If more than one value is associated with a fast field, only the last one is
|
||||
/// kept.
|
||||
#[must_use]
|
||||
pub fn set_fast(mut self,) -> Self {
|
||||
pub fn set_fast(mut self) -> Self {
|
||||
self.fast = true;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -141,9 +141,9 @@ pub use self::index_record_option::IndexRecordOption;
|
||||
pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
|
||||
pub use self::json_object_options::JsonObjectOptions;
|
||||
pub use self::named_field_document::NamedFieldDocument;
|
||||
pub use self::numeric_options::NumericOptions;
|
||||
#[allow(deprecated)]
|
||||
pub use self::numeric_options::IntOptions;
|
||||
pub use self::numeric_options::NumericOptions;
|
||||
pub use self::schema::{DocParsingError, Schema, SchemaBuilder};
|
||||
pub use self::term::Term;
|
||||
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
|
||||
|
||||
@@ -505,19 +505,13 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_schema_serialization() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let count_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let popularity_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let count_options = NumericOptions::default().set_stored().set_fast();
|
||||
let popularity_options = NumericOptions::default().set_stored().set_fast();
|
||||
let score_options = NumericOptions::default()
|
||||
.set_indexed()
|
||||
.set_fieldnorm()
|
||||
.set_fast();
|
||||
let is_read_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let is_read_options = NumericOptions::default().set_stored().set_fast();
|
||||
schema_builder.add_text_field("title", TEXT);
|
||||
schema_builder.add_text_field(
|
||||
"author",
|
||||
@@ -642,12 +636,8 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_document_to_json() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let count_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let is_read_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let count_options = NumericOptions::default().set_stored().set_fast();
|
||||
let is_read_options = NumericOptions::default().set_stored().set_fast();
|
||||
schema_builder.add_text_field("title", TEXT);
|
||||
schema_builder.add_text_field("author", STRING);
|
||||
schema_builder.add_u64_field("count", count_options);
|
||||
@@ -747,15 +737,9 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_parse_document() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let count_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let popularity_options = NumericOptions::default()
|
||||
.set_stored()
|
||||
.set_fast();
|
||||
let score_options = NumericOptions::default()
|
||||
.set_indexed()
|
||||
.set_fast();
|
||||
let count_options = NumericOptions::default().set_stored().set_fast();
|
||||
let popularity_options = NumericOptions::default().set_stored().set_fast();
|
||||
let score_options = NumericOptions::default().set_indexed().set_fast();
|
||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||
let author_field = schema_builder.add_text_field("author", STRING);
|
||||
let count_field = schema_builder.add_u64_field("count", count_options);
|
||||
|
||||
Reference in New Issue
Block a user