mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-26 12:09:57 +00:00
Improvements to doc linking, grammar, etc.
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
//! Contains the aggregation request tree. Used to build an
|
//! Contains the aggregation request tree. Used to build an
|
||||||
//! [AggregationCollector](super::AggregationCollector).
|
//! [`AggregationCollector`](super::AggregationCollector).
|
||||||
//!
|
//!
|
||||||
//! [Aggregations] is the top level entry point to create a request, which is a `HashMap<String,
|
//! [`Aggregations`] is the top level entry point to create a request, which is a `HashMap<String,
|
||||||
//! Aggregation>`.
|
//! Aggregation>`.
|
||||||
//!
|
//!
|
||||||
//! Requests are compatible with the json format of elasticsearch.
|
//! Requests are compatible with the json format of elasticsearch.
|
||||||
@@ -54,8 +54,8 @@ use super::bucket::{HistogramAggregation, TermsAggregation};
|
|||||||
use super::metric::{AverageAggregation, StatsAggregation};
|
use super::metric::{AverageAggregation, StatsAggregation};
|
||||||
use super::VecWithNames;
|
use super::VecWithNames;
|
||||||
|
|
||||||
/// The top-level aggregation request structure, which contains [Aggregation] and their user defined
|
/// The top-level aggregation request structure, which contains [`Aggregation`] and their user
|
||||||
/// names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
|
/// defined names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
|
||||||
///
|
///
|
||||||
/// The key is the user defined name of the aggregation.
|
/// The key is the user defined name of the aggregation.
|
||||||
pub type Aggregations = HashMap<String, Aggregation>;
|
pub type Aggregations = HashMap<String, Aggregation>;
|
||||||
@@ -139,15 +139,15 @@ pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
|
|||||||
fast_field_names
|
fast_field_names
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Aggregation request of [BucketAggregation] or [MetricAggregation].
|
/// Aggregation request of [`BucketAggregation`] or [`MetricAggregation`].
|
||||||
///
|
///
|
||||||
/// An aggregation is either a bucket or a metric.
|
/// An aggregation is either a bucket or a metric.
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
pub enum Aggregation {
|
pub enum Aggregation {
|
||||||
/// Bucket aggregation, see [BucketAggregation] for details.
|
/// Bucket aggregation, see [`BucketAggregation`] for details.
|
||||||
Bucket(BucketAggregation),
|
Bucket(BucketAggregation),
|
||||||
/// Metric aggregation, see [MetricAggregation] for details.
|
/// Metric aggregation, see [`MetricAggregation`] for details.
|
||||||
Metric(MetricAggregation),
|
Metric(MetricAggregation),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -113,14 +113,14 @@ pub enum BucketResult {
|
|||||||
///
|
///
|
||||||
/// If there are holes depends on the request, if min_doc_count is 0, then there are no
|
/// If there are holes depends on the request, if min_doc_count is 0, then there are no
|
||||||
/// holes between the first and last bucket.
|
/// holes between the first and last bucket.
|
||||||
/// See [HistogramAggregation](super::bucket::HistogramAggregation)
|
/// See [`HistogramAggregation`](super::bucket::HistogramAggregation)
|
||||||
buckets: BucketEntries<BucketEntry>,
|
buckets: BucketEntries<BucketEntry>,
|
||||||
},
|
},
|
||||||
/// This is the term result
|
/// This is the term result
|
||||||
Terms {
|
Terms {
|
||||||
/// The buckets.
|
/// The buckets.
|
||||||
///
|
///
|
||||||
/// See [TermsAggregation](super::bucket::TermsAggregation)
|
/// See [`TermsAggregation`](super::bucket::TermsAggregation)
|
||||||
buckets: Vec<BucketEntry>,
|
buckets: Vec<BucketEntry>,
|
||||||
/// The number of documents that didn’t make it into to TOP N due to shard_size or size
|
/// The number of documents that didn’t make it into to TOP N due to shard_size or size
|
||||||
sum_other_doc_count: u64,
|
sum_other_doc_count: u64,
|
||||||
@@ -234,10 +234,10 @@ pub struct RangeBucketEntry {
|
|||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
/// sub-aggregations in this bucket.
|
/// sub-aggregations in this bucket.
|
||||||
pub sub_aggregation: AggregationResults,
|
pub sub_aggregation: AggregationResults,
|
||||||
/// The from range of the bucket. Equals f64::MIN when None.
|
/// The from range of the bucket. Equals `f64::MIN` when `None`.
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub from: Option<f64>,
|
pub from: Option<f64>,
|
||||||
/// The to range of the bucket. Equals f64::MAX when None.
|
/// The to range of the bucket. Equals `f64::MAX` when `None`.
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub to: Option<f64>,
|
pub to: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,14 +37,14 @@ use crate::{DocId, TantivyError};
|
|||||||
/// [hard_bounds](HistogramAggregation::hard_bounds).
|
/// [hard_bounds](HistogramAggregation::hard_bounds).
|
||||||
///
|
///
|
||||||
/// # Result
|
/// # Result
|
||||||
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
|
/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
|
||||||
/// [BucketEntry](crate::aggregation::agg_result::BucketEntry) on the
|
/// [`BucketEntry`](crate::aggregation::agg_result::BucketEntry) on the
|
||||||
/// AggregationCollector.
|
/// `AggregationCollector`.
|
||||||
///
|
///
|
||||||
/// Result type is
|
/// Result type is
|
||||||
/// [IntermediateBucketResult](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
||||||
/// [IntermediateHistogramBucketEntry](crate::aggregation::intermediate_agg_result::IntermediateHistogramBucketEntry) on the
|
/// [`IntermediateHistogramBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateHistogramBucketEntry) on the
|
||||||
/// DistributedAggregationCollector.
|
/// `DistributedAggregationCollector`.
|
||||||
///
|
///
|
||||||
/// # Limitations/Compatibility
|
/// # Limitations/Compatibility
|
||||||
///
|
///
|
||||||
@@ -61,7 +61,7 @@ use crate::{DocId, TantivyError};
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// Response
|
/// Response
|
||||||
/// See [BucketEntry](crate::aggregation::agg_result::BucketEntry)
|
/// See [`BucketEntry`](crate::aggregation::agg_result::BucketEntry)
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct HistogramAggregation {
|
pub struct HistogramAggregation {
|
||||||
@@ -518,7 +518,7 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
|
|||||||
|
|
||||||
/// Applies req extended_bounds/hard_bounds on the min_max value
|
/// Applies req extended_bounds/hard_bounds on the min_max value
|
||||||
///
|
///
|
||||||
/// May return (f64::MAX, f64::MIN), if there is no range.
|
/// May return `(f64::MAX, f64::MIN)`, if there is no range.
|
||||||
fn get_req_min_max(req: &HistogramAggregation, min_max: Option<(f64, f64)>) -> (f64, f64) {
|
fn get_req_min_max(req: &HistogramAggregation, min_max: Option<(f64, f64)>) -> (f64, f64) {
|
||||||
let (mut min, mut max) = min_max.unwrap_or((f64::MAX, f64::MIN));
|
let (mut min, mut max) = min_max.unwrap_or((f64::MAX, f64::MIN));
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
//! Module for all bucket aggregations.
|
//! Module for all bucket aggregations.
|
||||||
//!
|
//!
|
||||||
//! BucketAggregations create buckets of documents
|
//! BucketAggregations create buckets of documents
|
||||||
//! [BucketAggregation](super::agg_req::BucketAggregation).
|
//! [`BucketAggregation`](super::agg_req::BucketAggregation).
|
||||||
//!
|
//!
|
||||||
//! Results of final buckets are [BucketResult](super::agg_result::BucketResult).
|
//! Results of final buckets are [`BucketResult`](super::agg_result::BucketResult).
|
||||||
//! Results of intermediate buckets are
|
//! Results of intermediate buckets are
|
||||||
//! [IntermediateBucketResult](super::intermediate_agg_result::IntermediateBucketResult)
|
//! [`IntermediateBucketResult`](super::intermediate_agg_result::IntermediateBucketResult)
|
||||||
|
|
||||||
mod histogram;
|
mod histogram;
|
||||||
mod range;
|
mod range;
|
||||||
|
|||||||
@@ -22,14 +22,14 @@ use crate::{DocId, TantivyError};
|
|||||||
/// against each bucket range. Note that this aggregation includes the from value and excludes the
|
/// against each bucket range. Note that this aggregation includes the from value and excludes the
|
||||||
/// to value for each range.
|
/// to value for each range.
|
||||||
///
|
///
|
||||||
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
|
/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
|
||||||
/// [RangeBucketEntry](crate::aggregation::agg_result::RangeBucketEntry) on the
|
/// [`RangeBucketEntry`](crate::aggregation::agg_result::RangeBucketEntry) on the
|
||||||
/// AggregationCollector.
|
/// `AggregationCollector`.
|
||||||
///
|
///
|
||||||
/// Result type is
|
/// Result type is
|
||||||
/// [IntermediateBucketResult](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
||||||
/// [IntermediateRangeBucketEntry](crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry) on the
|
/// [`IntermediateRangeBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry) on the
|
||||||
/// DistributedAggregationCollector.
|
/// `DistributedAggregationCollector`.
|
||||||
///
|
///
|
||||||
/// # Limitations/Compatibility
|
/// # Limitations/Compatibility
|
||||||
/// Overlapping ranges are not yet supported.
|
/// Overlapping ranges are not yet supported.
|
||||||
@@ -67,11 +67,11 @@ pub struct RangeAggregationRange {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
pub key: Option<String>,
|
pub key: Option<String>,
|
||||||
/// The from range value, which is inclusive in the range.
|
/// The from range value, which is inclusive in the range.
|
||||||
/// None equals to an open ended interval.
|
/// `None` equals to an open ended interval.
|
||||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
pub from: Option<f64>,
|
pub from: Option<f64>,
|
||||||
/// The to range value, which is not inclusive in the range.
|
/// The to range value, which is not inclusive in the range.
|
||||||
/// None equals to an open ended interval.
|
/// `None` equals to an open ended interval.
|
||||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
pub to: Option<f64>,
|
pub to: Option<f64>,
|
||||||
}
|
}
|
||||||
@@ -101,7 +101,7 @@ impl From<Range<f64>> for RangeAggregationRange {
|
|||||||
pub(crate) struct InternalRangeAggregationRange {
|
pub(crate) struct InternalRangeAggregationRange {
|
||||||
/// Custom key for the range bucket
|
/// Custom key for the range bucket
|
||||||
key: Option<String>,
|
key: Option<String>,
|
||||||
/// u64 range value
|
/// `u64` range value
|
||||||
range: Range<u64>,
|
range: Range<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,9 +131,9 @@ pub(crate) struct SegmentRangeBucketEntry {
|
|||||||
pub key: Key,
|
pub key: Key,
|
||||||
pub doc_count: u64,
|
pub doc_count: u64,
|
||||||
pub sub_aggregation: Option<SegmentAggregationResultsCollector>,
|
pub sub_aggregation: Option<SegmentAggregationResultsCollector>,
|
||||||
/// The from range of the bucket. Equals f64::MIN when None.
|
/// The from range of the bucket. Equals `f64::MIN` when `None`.
|
||||||
pub from: Option<f64>,
|
pub from: Option<f64>,
|
||||||
/// The to range of the bucket. Equals f64::MAX when None. Open interval, `to` is not
|
/// The to range of the bucket. Equals `f64::MAX` when `None`. Open interval, `to` is not
|
||||||
/// inclusive.
|
/// inclusive.
|
||||||
pub to: Option<f64>,
|
pub to: Option<f64>,
|
||||||
}
|
}
|
||||||
@@ -261,7 +261,7 @@ impl SegmentRangeCollector {
|
|||||||
let accessor = bucket_with_accessor
|
let accessor = bucket_with_accessor
|
||||||
.accessor
|
.accessor
|
||||||
.as_single()
|
.as_single()
|
||||||
.expect("unexpected fast field cardinatility");
|
.expect("unexpected fast field cardinality");
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
let val1 = accessor.get_val(docs[0] as u64);
|
let val1 = accessor.get_val(docs[0] as u64);
|
||||||
let val2 = accessor.get_val(docs[1] as u64);
|
let val2 = accessor.get_val(docs[1] as u64);
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ use crate::{DocId, TantivyError};
|
|||||||
///
|
///
|
||||||
/// Even with a larger `segment_size` value, doc_count values for a terms aggregation may be
|
/// Even with a larger `segment_size` value, doc_count values for a terms aggregation may be
|
||||||
/// approximate. As a result, any sub-aggregations on the terms aggregation may also be approximate.
|
/// approximate. As a result, any sub-aggregations on the terms aggregation may also be approximate.
|
||||||
/// `sum_other_doc_count` is the number of documents that didn’t make it into the the top size
|
/// `sum_other_doc_count` is the number of documents that didn’t make it into the top size
|
||||||
/// terms. If this is greater than 0, you can be sure that the terms agg had to throw away some
|
/// terms. If this is greater than 0, you can be sure that the terms agg had to throw away some
|
||||||
/// buckets, either because they didn’t fit into size on the root node or they didn’t fit into
|
/// buckets, either because they didn’t fit into size on the root node or they didn’t fit into
|
||||||
/// `segment_size` on the segment node.
|
/// `segment_size` on the segment node.
|
||||||
@@ -42,14 +42,14 @@ use crate::{DocId, TantivyError};
|
|||||||
/// each segment. It’s the sum of the size of the largest bucket on each segment that didn’t fit
|
/// each segment. It’s the sum of the size of the largest bucket on each segment that didn’t fit
|
||||||
/// into segment_size.
|
/// into segment_size.
|
||||||
///
|
///
|
||||||
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
|
/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
|
||||||
/// [TermBucketEntry](crate::aggregation::agg_result::BucketEntry) on the
|
/// [`TermBucketEntry`](crate::aggregation::agg_result::BucketEntry) on the
|
||||||
/// AggregationCollector.
|
/// `AggregationCollector`.
|
||||||
///
|
///
|
||||||
/// Result type is
|
/// Result type is
|
||||||
/// [IntermediateBucketResult](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
|
||||||
/// [IntermediateTermBucketEntry](crate::aggregation::intermediate_agg_result::IntermediateTermBucketEntry) on the
|
/// [`IntermediateTermBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateTermBucketEntry) on the
|
||||||
/// DistributedAggregationCollector.
|
/// `DistributedAggregationCollector`.
|
||||||
///
|
///
|
||||||
/// # Limitations/Compatibility
|
/// # Limitations/Compatibility
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ fn merge_fruits(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// AggregationSegmentCollector does the aggregation collection on a segment.
|
/// `AggregationSegmentCollector` does the aggregation collection on a segment.
|
||||||
pub struct AggregationSegmentCollector {
|
pub struct AggregationSegmentCollector {
|
||||||
aggs_with_accessor: AggregationsWithAccessor,
|
aggs_with_accessor: AggregationsWithAccessor,
|
||||||
result: SegmentAggregationResultsCollector,
|
result: SegmentAggregationResultsCollector,
|
||||||
@@ -139,8 +139,8 @@ pub struct AggregationSegmentCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl AggregationSegmentCollector {
|
impl AggregationSegmentCollector {
|
||||||
/// Creates an AggregationSegmentCollector from an [Aggregations] request and a segment reader.
|
/// Creates an `AggregationSegmentCollector from` an [`Aggregations`] request and a segment
|
||||||
/// Also includes validation, e.g. checking field types and existence.
|
/// reader. Also includes validation, e.g. checking field types and existence.
|
||||||
pub fn from_agg_req_and_reader(
|
pub fn from_agg_req_and_reader(
|
||||||
agg: &Aggregations,
|
agg: &Aggregations,
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
|
|||||||
@@ -108,10 +108,10 @@ impl IntermediateAggregationResults {
|
|||||||
Self { metrics, buckets }
|
Self { metrics, buckets }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Merge an other intermediate aggregation result into this result.
|
/// Merge another intermediate aggregation result into this result.
|
||||||
///
|
///
|
||||||
/// The order of the values need to be the same on both results. This is ensured when the same
|
/// The order of the values need to be the same on both results. This is ensured when the same
|
||||||
/// (key values) are present on the underlying VecWithNames struct.
|
/// (key values) are present on the underlying `VecWithNames` struct.
|
||||||
pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) {
|
pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) {
|
||||||
if let (Some(buckets_left), Some(buckets_right)) = (&mut self.buckets, other.buckets) {
|
if let (Some(buckets_left), Some(buckets_right)) = (&mut self.buckets, other.buckets) {
|
||||||
for (bucket_left, bucket_right) in
|
for (bucket_left, bucket_right) in
|
||||||
@@ -560,10 +560,10 @@ pub struct IntermediateRangeBucketEntry {
|
|||||||
pub doc_count: u64,
|
pub doc_count: u64,
|
||||||
/// The sub_aggregation in this bucket.
|
/// The sub_aggregation in this bucket.
|
||||||
pub sub_aggregation: IntermediateAggregationResults,
|
pub sub_aggregation: IntermediateAggregationResults,
|
||||||
/// The from range of the bucket. Equals f64::MIN when None.
|
/// The from range of the bucket. Equals `f64::MIN` when `None`.
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub from: Option<f64>,
|
pub from: Option<f64>,
|
||||||
/// The to range of the bucket. Equals f64::MAX when None.
|
/// The to range of the bucket. Equals `f64::MAX` when `None`.
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub to: Option<f64>,
|
pub to: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ use crate::{DocId, TantivyError};
|
|||||||
|
|
||||||
/// A multi-value metric aggregation that computes stats of numeric values that are
|
/// A multi-value metric aggregation that computes stats of numeric values that are
|
||||||
/// extracted from the aggregated documents.
|
/// extracted from the aggregated documents.
|
||||||
/// Supported field types are u64, i64, and f64.
|
/// Supported field types are `u64`, `i64`, and `f64`.
|
||||||
/// See [Stats] for returned statistics.
|
/// See [`Stats`] for returned statistics.
|
||||||
///
|
///
|
||||||
/// # JSON Format
|
/// # JSON Format
|
||||||
/// ```json
|
/// ```json
|
||||||
@@ -43,13 +43,13 @@ pub struct Stats {
|
|||||||
pub count: usize,
|
pub count: usize,
|
||||||
/// The sum of the fast field values.
|
/// The sum of the fast field values.
|
||||||
pub sum: f64,
|
pub sum: f64,
|
||||||
/// The standard deviation of the fast field values. None for count == 0.
|
/// The standard deviation of the fast field values. `None` for count == 0.
|
||||||
pub standard_deviation: Option<f64>,
|
pub standard_deviation: Option<f64>,
|
||||||
/// The min value of the fast field values.
|
/// The min value of the fast field values.
|
||||||
pub min: Option<f64>,
|
pub min: Option<f64>,
|
||||||
/// The max value of the fast field values.
|
/// The max value of the fast field values.
|
||||||
pub max: Option<f64>,
|
pub max: Option<f64>,
|
||||||
/// The average of the values. None for count == 0.
|
/// The average of the values. `None` for count == 0.
|
||||||
pub avg: Option<f64>,
|
pub avg: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ impl Stats {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// IntermediateStats contains the mergeable version for stats.
|
/// `IntermediateStats` contains the mergeable version for stats.
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct IntermediateStats {
|
pub struct IntermediateStats {
|
||||||
count: usize,
|
count: usize,
|
||||||
|
|||||||
@@ -14,13 +14,14 @@
|
|||||||
//!
|
//!
|
||||||
//!
|
//!
|
||||||
//! To use aggregations, build an aggregation request by constructing
|
//! To use aggregations, build an aggregation request by constructing
|
||||||
//! [Aggregations](agg_req::Aggregations).
|
//! [`Aggregations`](agg_req::Aggregations).
|
||||||
//! Create an [AggregationCollector] from this request. AggregationCollector implements the
|
//! Create an [`AggregationCollector`] from this request. `AggregationCollector` implements the
|
||||||
//! `Collector` trait and can be passed as collector into `searcher.search()`.
|
//! [`Collector`](crate::collector::Collector) trait and can be passed as collector into
|
||||||
|
//! [`Searcher::search()`](crate::Searcher::search).
|
||||||
//!
|
//!
|
||||||
//! #### Limitations
|
//! #### Limitations
|
||||||
//!
|
//!
|
||||||
//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
|
//! Currently aggregations work only on single value fast fields of type `u64`, `f64`, `i64` and
|
||||||
//! fast fields on text fields.
|
//! fast fields on text fields.
|
||||||
//!
|
//!
|
||||||
//! # JSON Format
|
//! # JSON Format
|
||||||
@@ -44,8 +45,8 @@
|
|||||||
//! - [Stats](metric::StatsAggregation)
|
//! - [Stats](metric::StatsAggregation)
|
||||||
//!
|
//!
|
||||||
//! # Example
|
//! # Example
|
||||||
//! Compute the average metric, by building [agg_req::Aggregations], which is built from an (String,
|
//! Compute the average metric, by building [`agg_req::Aggregations`], which is built from an
|
||||||
//! [agg_req::Aggregation]) iterator.
|
//! `(String, agg_req::Aggregation)` iterator.
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use tantivy::aggregation::agg_req::{Aggregations, Aggregation, MetricAggregation};
|
//! use tantivy::aggregation::agg_req::{Aggregations, Aggregation, MetricAggregation};
|
||||||
@@ -143,15 +144,15 @@
|
|||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! # Distributed Aggregation
|
//! # Distributed Aggregation
|
||||||
//! When the data is distributed on different [crate::Index] instances, the
|
//! When the data is distributed on different [`Index`](crate::Index) instances, the
|
||||||
//! [DistributedAggregationCollector] provides functionality to merge data between independent
|
//! [`DistributedAggregationCollector`] provides functionality to merge data between independent
|
||||||
//! search calls by returning
|
//! search calls by returning
|
||||||
//! [IntermediateAggregationResults](intermediate_agg_result::IntermediateAggregationResults).
|
//! [`IntermediateAggregationResults`](intermediate_agg_result::IntermediateAggregationResults).
|
||||||
//! IntermediateAggregationResults provides the
|
//! `IntermediateAggregationResults` provides the
|
||||||
//! [merge_fruits](intermediate_agg_result::IntermediateAggregationResults::merge_fruits) method to
|
//! [`merge_fruits`](intermediate_agg_result::IntermediateAggregationResults::merge_fruits) method
|
||||||
//! merge multiple results. The merged result can then be converted into
|
//! to merge multiple results. The merged result can then be converted into
|
||||||
//! [AggregationResults](agg_result::AggregationResults) via the
|
//! [`AggregationResults`](agg_result::AggregationResults) via the
|
||||||
//! [into_final_bucket_result](intermediate_agg_result::IntermediateAggregationResults::into_final_bucket_result) method.
|
//! [`into_final_bucket_result`](intermediate_agg_result::IntermediateAggregationResults::into_final_bucket_result) method.
|
||||||
|
|
||||||
pub mod agg_req;
|
pub mod agg_req;
|
||||||
mod agg_req_with_accessor;
|
mod agg_req_with_accessor;
|
||||||
@@ -259,7 +260,7 @@ impl<T: Clone> VecWithNames<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The serialized key is used in a HashMap.
|
/// The serialized key is used in a `HashMap`.
|
||||||
pub type SerializedKey = String;
|
pub type SerializedKey = String;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd)]
|
||||||
@@ -268,7 +269,7 @@ pub type SerializedKey = String;
|
|||||||
pub enum Key {
|
pub enum Key {
|
||||||
/// String key
|
/// String key
|
||||||
Str(String),
|
Str(String),
|
||||||
/// f64 key
|
/// `f64` key
|
||||||
F64(f64),
|
F64(f64),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -281,10 +282,10 @@ impl Display for Key {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Invert of to_fastfield_u64. Used to convert to f64 for metrics.
|
/// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// Only u64, f64, i64 is supported
|
/// Only `u64`, `f64`, and `i64` are supported.
|
||||||
pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
||||||
match field_type {
|
match field_type {
|
||||||
Type::U64 => val as f64,
|
Type::U64 => val as f64,
|
||||||
@@ -296,15 +297,15 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts the f64 value to fast field value space.
|
/// Converts the `f64` value to fast field value space.
|
||||||
///
|
///
|
||||||
/// If the fast field has u64, values are stored as u64 in the fast field.
|
/// If the fast field has `u64`, values are stored as `u64` in the fast field.
|
||||||
/// A f64 value of e.g. 2.0 therefore needs to be converted to 1u64
|
/// A `f64` value of e.g. `2.0` therefore needs to be converted to `1u64`.
|
||||||
///
|
///
|
||||||
/// If the fast field has f64 values are converted and stored to u64 using a
|
/// If the fast field has `f64` values are converted and stored to `u64` using a
|
||||||
/// monotonic mapping.
|
/// monotonic mapping.
|
||||||
/// A f64 value of e.g. 2.0 needs to be converted using the same monotonic
|
/// A `f64` value of e.g. `2.0` needs to be converted using the same monotonic
|
||||||
/// conversion function, so that the value matches the u64 value stored in the fast
|
/// conversion function, so that the value matches the `u64` value stored in the fast
|
||||||
/// field.
|
/// field.
|
||||||
pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
|
pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
|
||||||
match field_type {
|
match field_type {
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ where TScore: Clone + PartialOrd
|
|||||||
/// A custom segment scorer makes it possible to define any kind of score
|
/// A custom segment scorer makes it possible to define any kind of score
|
||||||
/// for a given document belonging to a specific segment.
|
/// for a given document belonging to a specific segment.
|
||||||
///
|
///
|
||||||
/// It is the segment local version of the [`CustomScorer`](./trait.CustomScorer.html).
|
/// It is the segment local version of the [`CustomScorer`].
|
||||||
pub trait CustomSegmentScorer<TScore>: 'static {
|
pub trait CustomSegmentScorer<TScore>: 'static {
|
||||||
/// Computes the score of a specific `doc`.
|
/// Computes the score of a specific `doc`.
|
||||||
fn score(&mut self, doc: DocId) -> TScore;
|
fn score(&mut self, doc: DocId) -> TScore;
|
||||||
@@ -36,7 +36,7 @@ pub trait CustomSegmentScorer<TScore>: 'static {
|
|||||||
/// Instead, it helps constructing `Self::Child` instances that will compute
|
/// Instead, it helps constructing `Self::Child` instances that will compute
|
||||||
/// the score at a segment scale.
|
/// the score at a segment scale.
|
||||||
pub trait CustomScorer<TScore>: Sync {
|
pub trait CustomScorer<TScore>: Sync {
|
||||||
/// Type of the associated [`CustomSegmentScorer`](./trait.CustomSegmentScorer.html).
|
/// Type of the associated [`CustomSegmentScorer`].
|
||||||
type Child: CustomSegmentScorer<TScore>;
|
type Child: CustomSegmentScorer<TScore>;
|
||||||
/// Builds a child scorer for a specific segment. The child scorer is associated to
|
/// Builds a child scorer for a specific segment. The child scorer is associated to
|
||||||
/// a specific segment.
|
/// a specific segment.
|
||||||
|
|||||||
@@ -8,9 +8,9 @@
|
|||||||
//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
|
//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
|
||||||
//! - [facet counts](FacetCollector)
|
//! - [facet counts](FacetCollector)
|
||||||
//!
|
//!
|
||||||
//! At one point in your code, you will trigger the actual search operation by calling
|
//! At some point in your code, you will trigger the actual search operation by calling
|
||||||
//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
|
//! [`Searcher::search()`](crate::Searcher::search).
|
||||||
//! This call will look like this.
|
//! This call will look like this:
|
||||||
//!
|
//!
|
||||||
//! ```verbatim
|
//! ```verbatim
|
||||||
//! let fruit = searcher.search(&query, &collector)?;
|
//! let fruit = searcher.search(&query, &collector)?;
|
||||||
@@ -64,7 +64,7 @@
|
|||||||
//!
|
//!
|
||||||
//! The `Collector` trait is implemented for up to 4 collectors.
|
//! The `Collector` trait is implemented for up to 4 collectors.
|
||||||
//! If you have more than 4 collectors, you can either group them into
|
//! If you have more than 4 collectors, you can either group them into
|
||||||
//! tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`](./struct.MultiCollector.html).
|
//! tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`].
|
||||||
//!
|
//!
|
||||||
//! # Combining several collectors dynamically
|
//! # Combining several collectors dynamically
|
||||||
//!
|
//!
|
||||||
@@ -74,7 +74,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Unfortunately it requires you to know at compile time your collector types.
|
//! Unfortunately it requires you to know at compile time your collector types.
|
||||||
//! If on the other hand, the collectors depend on some query parameter,
|
//! If on the other hand, the collectors depend on some query parameter,
|
||||||
//! you can rely on `MultiCollector`'s.
|
//! you can rely on [`MultiCollector`]'s.
|
||||||
//!
|
//!
|
||||||
//!
|
//!
|
||||||
//! # Implementing your own collectors.
|
//! # Implementing your own collectors.
|
||||||
|
|||||||
@@ -287,7 +287,7 @@ impl TopDocs {
|
|||||||
/// # See also
|
/// # See also
|
||||||
///
|
///
|
||||||
/// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
|
/// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
|
||||||
/// [.order_by_fast_field(...)](#method.order_by_fast_field) method.
|
/// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
|
||||||
pub fn order_by_u64_field(
|
pub fn order_by_u64_field(
|
||||||
self,
|
self,
|
||||||
field: Field,
|
field: Field,
|
||||||
@@ -384,7 +384,7 @@ impl TopDocs {
|
|||||||
///
|
///
|
||||||
/// This method offers a convenient way to tweak or replace
|
/// This method offers a convenient way to tweak or replace
|
||||||
/// the documents score. As suggested by the prototype you can
|
/// the documents score. As suggested by the prototype you can
|
||||||
/// manually define your own [`ScoreTweaker`](./trait.ScoreTweaker.html)
|
/// manually define your own [`ScoreTweaker`]
|
||||||
/// and pass it as an argument, but there is a much simpler way to
|
/// and pass it as an argument, but there is a much simpler way to
|
||||||
/// tweak your score: you can use a closure as in the following
|
/// tweak your score: you can use a closure as in the following
|
||||||
/// example.
|
/// example.
|
||||||
@@ -401,7 +401,7 @@ impl TopDocs {
|
|||||||
/// In the following example will will tweak our ranking a bit by
|
/// In the following example will will tweak our ranking a bit by
|
||||||
/// boosting popular products a notch.
|
/// boosting popular products a notch.
|
||||||
///
|
///
|
||||||
/// In more serious application, this tweaking could involved running a
|
/// In more serious application, this tweaking could involve running a
|
||||||
/// learning-to-rank model over various features
|
/// learning-to-rank model over various features
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
@@ -474,7 +474,7 @@ impl TopDocs {
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # See also
|
/// # See also
|
||||||
/// [custom_score(...)](#method.custom_score).
|
/// - [custom_score(...)](TopDocs::custom_score)
|
||||||
pub fn tweak_score<TScore, TScoreSegmentTweaker, TScoreTweaker>(
|
pub fn tweak_score<TScore, TScoreSegmentTweaker, TScoreTweaker>(
|
||||||
self,
|
self,
|
||||||
score_tweaker: TScoreTweaker,
|
score_tweaker: TScoreTweaker,
|
||||||
@@ -491,8 +491,7 @@ impl TopDocs {
|
|||||||
///
|
///
|
||||||
/// This method offers a convenient way to use a different score.
|
/// This method offers a convenient way to use a different score.
|
||||||
///
|
///
|
||||||
/// As suggested by the prototype you can manually define your
|
/// As suggested by the prototype you can manually define your own [`CustomScorer`]
|
||||||
/// own [`CustomScorer`](./trait.CustomScorer.html)
|
|
||||||
/// and pass it as an argument, but there is a much simpler way to
|
/// and pass it as an argument, but there is a much simpler way to
|
||||||
/// tweak your score: you can use a closure as in the following
|
/// tweak your score: you can use a closure as in the following
|
||||||
/// example.
|
/// example.
|
||||||
@@ -588,7 +587,7 @@ impl TopDocs {
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # See also
|
/// # See also
|
||||||
/// [tweak_score(...)](#method.tweak_score).
|
/// - [tweak_score(...)](TopDocs::tweak_score)
|
||||||
pub fn custom_score<TScore, TCustomSegmentScorer, TCustomScorer>(
|
pub fn custom_score<TScore, TCustomSegmentScorer, TCustomScorer>(
|
||||||
self,
|
self,
|
||||||
custom_score: TCustomScorer,
|
custom_score: TCustomScorer,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ where TScore: Clone + PartialOrd
|
|||||||
/// A `ScoreSegmentTweaker` makes it possible to modify the default score
|
/// A `ScoreSegmentTweaker` makes it possible to modify the default score
|
||||||
/// for a given document belonging to a specific segment.
|
/// for a given document belonging to a specific segment.
|
||||||
///
|
///
|
||||||
/// It is the segment local version of the [`ScoreTweaker`](./trait.ScoreTweaker.html).
|
/// It is the segment local version of the [`ScoreTweaker`].
|
||||||
pub trait ScoreSegmentTweaker<TScore>: 'static {
|
pub trait ScoreSegmentTweaker<TScore>: 'static {
|
||||||
/// Tweak the given `score` for the document `doc`.
|
/// Tweak the given `score` for the document `doc`.
|
||||||
fn score(&mut self, doc: DocId, score: Score) -> TScore;
|
fn score(&mut self, doc: DocId, score: Score) -> TScore;
|
||||||
@@ -37,7 +37,7 @@ pub trait ScoreSegmentTweaker<TScore>: 'static {
|
|||||||
/// Instead, it helps constructing `Self::Child` instances that will compute
|
/// Instead, it helps constructing `Self::Child` instances that will compute
|
||||||
/// the score at a segment scale.
|
/// the score at a segment scale.
|
||||||
pub trait ScoreTweaker<TScore>: Sync {
|
pub trait ScoreTweaker<TScore>: Sync {
|
||||||
/// Type of the associated [`ScoreSegmentTweaker`](./trait.ScoreSegmentTweaker.html).
|
/// Type of the associated [`ScoreSegmentTweaker`].
|
||||||
type Child: ScoreSegmentTweaker<TScore>;
|
type Child: ScoreSegmentTweaker<TScore>;
|
||||||
|
|
||||||
/// Builds a child tweaker for a specific segment. The child scorer is associated to
|
/// Builds a child tweaker for a specific segment. The child scorer is associated to
|
||||||
|
|||||||
@@ -10,12 +10,12 @@ use crate::space_usage::SearcherSpaceUsage;
|
|||||||
use crate::store::{CacheStats, StoreReader};
|
use crate::store::{CacheStats, StoreReader};
|
||||||
use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
|
use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
|
||||||
|
|
||||||
/// Identifies the searcher generation accessed by a [Searcher].
|
/// Identifies the searcher generation accessed by a [`Searcher`].
|
||||||
///
|
///
|
||||||
/// While this might seem redundant, a [SearcherGeneration] contains
|
/// While this might seem redundant, a [`SearcherGeneration`] contains
|
||||||
/// both a `generation_id` AND a list of `(SegmentId, DeleteOpstamp)`.
|
/// both a `generation_id` AND a list of `(SegmentId, DeleteOpstamp)`.
|
||||||
///
|
///
|
||||||
/// This is on purpose. This object is used by the `Warmer` API.
|
/// This is on purpose. This object is used by the [`Warmer`](crate::reader::Warmer) API.
|
||||||
/// Having both information makes it possible to identify which
|
/// Having both information makes it possible to identify which
|
||||||
/// artifact should be refreshed or garbage collected.
|
/// artifact should be refreshed or garbage collected.
|
||||||
///
|
///
|
||||||
@@ -74,12 +74,12 @@ impl Searcher {
|
|||||||
&self.inner.index
|
&self.inner.index
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [SearcherGeneration] which identifies the version of the snapshot held by this `Searcher`.
|
/// [`SearcherGeneration`] which identifies the version of the snapshot held by this `Searcher`.
|
||||||
pub fn generation(&self) -> &SearcherGeneration {
|
pub fn generation(&self) -> &SearcherGeneration {
|
||||||
self.inner.generation.as_ref()
|
self.inner.generation.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetches a document from tantivy's store given a `DocAddress`.
|
/// Fetches a document from tantivy's store given a [`DocAddress`].
|
||||||
///
|
///
|
||||||
/// The searcher uses the segment ordinal to route the
|
/// The searcher uses the segment ordinal to route the
|
||||||
/// request to the right `Segment`.
|
/// request to the right `Segment`.
|
||||||
@@ -180,7 +180,7 @@ impl Searcher {
|
|||||||
self.search_with_executor(query, collector, executor)
|
self.search_with_executor(query, collector, executor)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as [`search(...)`](#method.search) but multithreaded.
|
/// Same as [`search(...)`](Searcher::search) but multithreaded.
|
||||||
///
|
///
|
||||||
/// The current implementation is rather naive :
|
/// The current implementation is rather naive :
|
||||||
/// multithreading is by splitting search into as many task
|
/// multithreading is by splitting search into as many task
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ pub(crate) fn make_io_err(msg: String) -> io::Error {
|
|||||||
io::Error::new(io::ErrorKind::Other, msg)
|
io::Error::new(io::ErrorKind::Other, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns None iff the file exists, can be read, but is empty (and hence
|
/// Returns `None` iff the file exists, can be read, but is empty (and hence
|
||||||
/// cannot be mmapped)
|
/// cannot be mmapped)
|
||||||
fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
||||||
let file = File::open(full_path).map_err(|io_err| {
|
let file = File::open(full_path).map_err(|io_err| {
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ use std::borrow::{Borrow, BorrowMut};
|
|||||||
use crate::fastfield::AliveBitSet;
|
use crate::fastfield::AliveBitSet;
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
|
|
||||||
/// Sentinel value returned when a DocSet has been entirely consumed.
|
/// Sentinel value returned when a [`DocSet`] has been entirely consumed.
|
||||||
///
|
///
|
||||||
/// This is not u32::MAX as one would have expected, due to the lack of SSE2 instructions
|
/// This is not `u32::MAX` as one would have expected, due to the lack of SSE2 instructions
|
||||||
/// to compare [u32; 4].
|
/// to compare `[u32; 4]`.
|
||||||
pub const TERMINATED: DocId = i32::MAX as u32;
|
pub const TERMINATED: DocId = i32::MAX as u32;
|
||||||
|
|
||||||
/// Represents an iterable set of sorted doc ids.
|
/// Represents an iterable set of sorted doc ids.
|
||||||
@@ -20,21 +20,21 @@ pub trait DocSet: Send {
|
|||||||
/// assert_eq!(doc, docset.doc());
|
/// assert_eq!(doc, docset.doc());
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// If we reached the end of the DocSet, TERMINATED should be returned.
|
/// If we reached the end of the `DocSet`, [`TERMINATED`] should be returned.
|
||||||
///
|
///
|
||||||
/// Calling `.advance()` on a terminated DocSet should be supported, and TERMINATED should
|
/// Calling `.advance()` on a terminated `DocSet` should be supported, and [`TERMINATED`] should
|
||||||
/// be returned.
|
/// be returned.
|
||||||
fn advance(&mut self) -> DocId;
|
fn advance(&mut self) -> DocId;
|
||||||
|
|
||||||
/// Advances the DocSet forward until reaching the target, or going to the
|
/// Advances the `DocSet` forward until reaching the target, or going to the
|
||||||
/// lowest DocId greater than the target.
|
/// lowest [`DocId`] greater than the target.
|
||||||
///
|
///
|
||||||
/// If the end of the DocSet is reached, TERMINATED is returned.
|
/// If the end of the `DocSet` is reached, [`TERMINATED`] is returned.
|
||||||
///
|
///
|
||||||
/// Calling `.seek(target)` on a terminated DocSet is legal. Implementation
|
/// Calling `.seek(target)` on a terminated `DocSet` is legal. Implementation
|
||||||
/// of DocSet should support it.
|
/// of `DocSet` should support it.
|
||||||
///
|
///
|
||||||
/// Calling `seek(TERMINATED)` is also legal and is the normal way to consume a DocSet.
|
/// Calling `seek(TERMINATED)` is also legal and is the normal way to consume a `DocSet`.
|
||||||
fn seek(&mut self, target: DocId) -> DocId {
|
fn seek(&mut self, target: DocId) -> DocId {
|
||||||
let mut doc = self.doc();
|
let mut doc = self.doc();
|
||||||
debug_assert!(doc <= target);
|
debug_assert!(doc <= target);
|
||||||
@@ -73,9 +73,9 @@ pub trait DocSet: Send {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the current document
|
/// Returns the current document
|
||||||
/// Right after creating a new DocSet, the docset points to the first document.
|
/// Right after creating a new `DocSet`, the docset points to the first document.
|
||||||
///
|
///
|
||||||
/// If the DocSet is empty, .doc() should return `TERMINATED`.
|
/// If the `DocSet` is empty, `.doc()` should return [`TERMINATED`].
|
||||||
fn doc(&self) -> DocId;
|
fn doc(&self) -> DocId;
|
||||||
|
|
||||||
/// Returns a best-effort hint of the
|
/// Returns a best-effort hint of the
|
||||||
|
|||||||
@@ -17,14 +17,12 @@ use crate::{DatePrecision, DocId};
|
|||||||
/// This `Writer` is only useful for advanced users.
|
/// This `Writer` is only useful for advanced users.
|
||||||
/// The normal way to get your multivalued int in your index
|
/// The normal way to get your multivalued int in your index
|
||||||
/// is to
|
/// is to
|
||||||
/// - declare your field with fast set to `Cardinality::MultiValues`
|
/// - declare your field with fast set to
|
||||||
/// in your schema
|
/// [`Cardinality::MultiValues`](crate::schema::Cardinality::MultiValues) in your schema
|
||||||
/// - add your document simply by calling `.add_document(...)`.
|
/// - add your document simply by calling `.add_document(...)`.
|
||||||
///
|
///
|
||||||
/// The `MultiValuedFastFieldWriter` can be acquired from the
|
/// The `MultiValuedFastFieldWriter` can be acquired from the fastfield writer, by calling
|
||||||
/// fastfield writer, by calling
|
/// [`FastFieldWriter::get_multivalue_writer_mut()`](crate::fastfield::FastFieldsWriter::get_multivalue_writer_mut).
|
||||||
/// [`.get_multivalue_writer_mut(...)`](./struct.FastFieldsWriter.html#method.
|
|
||||||
/// get_multivalue_writer_mut).
|
|
||||||
///
|
///
|
||||||
/// Once acquired, writing is done by calling
|
/// Once acquired, writing is done by calling
|
||||||
/// [`.add_document(&Document)`](MultiValuedFastFieldWriter::add_document) once per value.
|
/// [`.add_document(&Document)`](MultiValuedFastFieldWriter::add_document) once per value.
|
||||||
|
|||||||
@@ -168,7 +168,7 @@ impl FastFieldsWriter {
|
|||||||
|
|
||||||
/// Returns the fast field multi-value writer for the given field.
|
/// Returns the fast field multi-value writer for the given field.
|
||||||
///
|
///
|
||||||
/// Returns None if the field does not exist, or is not
|
/// Returns `None` if the field does not exist, or is not
|
||||||
/// configured as a multivalued fastfield in the schema.
|
/// configured as a multivalued fastfield in the schema.
|
||||||
pub fn get_multivalue_writer_mut(
|
pub fn get_multivalue_writer_mut(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -182,7 +182,7 @@ impl FastFieldsWriter {
|
|||||||
|
|
||||||
/// Returns the bytes fast field writer for the given field.
|
/// Returns the bytes fast field writer for the given field.
|
||||||
///
|
///
|
||||||
/// Returns None if the field does not exist, or is not
|
/// Returns `None` if the field does not exist, or is not
|
||||||
/// configured as a bytes fastfield in the schema.
|
/// configured as a bytes fastfield in the schema.
|
||||||
pub fn get_bytes_writer_mut(&mut self, field: Field) -> Option<&mut BytesFastFieldWriter> {
|
pub fn get_bytes_writer_mut(&mut self, field: Field) -> Option<&mut BytesFastFieldWriter> {
|
||||||
// TODO optimize
|
// TODO optimize
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ pub struct DeleteCursor {
|
|||||||
impl DeleteCursor {
|
impl DeleteCursor {
|
||||||
/// Skips operations and position it so that
|
/// Skips operations and position it so that
|
||||||
/// - either all of the delete operation currently in the queue are consume and the next get
|
/// - either all of the delete operation currently in the queue are consume and the next get
|
||||||
/// will return None.
|
/// will return `None`.
|
||||||
/// - the next get will return the first operation with an
|
/// - the next get will return the first operation with an
|
||||||
/// `opstamp >= target_opstamp`.
|
/// `opstamp >= target_opstamp`.
|
||||||
pub fn skip_to(&mut self, target_opstamp: Opstamp) {
|
pub fn skip_to(&mut self, target_opstamp: Opstamp) {
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ impl IndexWriterStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a copy of the operation receiver.
|
/// Returns a copy of the operation receiver.
|
||||||
/// If the index writer was killed, returns None.
|
/// If the index writer was killed, returns `None`.
|
||||||
pub fn operation_receiver(&self) -> Option<AddBatchReceiver> {
|
pub fn operation_receiver(&self) -> Option<AddBatchReceiver> {
|
||||||
let rlock = self
|
let rlock = self
|
||||||
.inner
|
.inner
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use crate::{DocId, DocSet, Score, TERMINATED};
|
|||||||
///
|
///
|
||||||
/// We always have `before_pivot_len` < `pivot_len`.
|
/// We always have `before_pivot_len` < `pivot_len`.
|
||||||
///
|
///
|
||||||
/// None is returned if we establish that no document can exceed the threshold.
|
/// `None` is returned if we establish that no document can exceed the threshold.
|
||||||
fn find_pivot_doc(
|
fn find_pivot_doc(
|
||||||
term_scorers: &[TermScorerWithMaxScore],
|
term_scorers: &[TermScorerWithMaxScore],
|
||||||
threshold: Score,
|
threshold: Score,
|
||||||
|
|||||||
@@ -15,38 +15,39 @@ use crate::{DocAddress, Term};
|
|||||||
/// - a set of documents
|
/// - a set of documents
|
||||||
/// - a way to score these documents
|
/// - a way to score these documents
|
||||||
///
|
///
|
||||||
/// When performing a [search](Searcher::search), these documents will then
|
/// When performing a [search](Searcher::search), these documents will then
|
||||||
/// be pushed to a [Collector](../collector/trait.Collector.html),
|
/// be pushed to a [`Collector`](crate::collector::Collector),
|
||||||
/// which will in turn be in charge of deciding what to do with them.
|
/// which will in turn be in charge of deciding what to do with them.
|
||||||
///
|
///
|
||||||
/// Concretely, this scored docset is represented by the
|
/// Concretely, this scored docset is represented by the
|
||||||
/// [`Scorer`](./trait.Scorer.html) trait.
|
/// [`Scorer`] trait.
|
||||||
///
|
///
|
||||||
/// Because our index is actually split into segments, the
|
/// Because our index is actually split into segments, the
|
||||||
/// query does not actually directly creates `DocSet` object.
|
/// query does not actually directly creates [`DocSet`](crate::DocSet) object.
|
||||||
/// Instead, the query creates a [`Weight`](./trait.Weight.html)
|
/// Instead, the query creates a [`Weight`] object for a given searcher.
|
||||||
/// object for a given searcher.
|
|
||||||
///
|
///
|
||||||
/// The weight object, in turn, makes it possible to create
|
/// The weight object, in turn, makes it possible to create
|
||||||
/// a scorer for a specific [`SegmentReader`](../struct.SegmentReader.html).
|
/// a scorer for a specific [`SegmentReader`].
|
||||||
///
|
///
|
||||||
/// So to sum it up :
|
/// So to sum it up :
|
||||||
/// - a `Query` is recipe to define a set of documents as well the way to score them.
|
/// - a `Query` is a recipe to define a set of documents as well the way to score them.
|
||||||
/// - a `Weight` is this recipe tied to a specific `Searcher`. It may for instance
|
/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance
|
||||||
/// hold statistics about the different term of the query. It is created by the query.
|
/// hold statistics about the different term of the query. It is created by the query.
|
||||||
/// - a `Scorer` is a cursor over the set of matching documents, for a specific
|
/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific
|
||||||
/// [`SegmentReader`](../struct.SegmentReader.html). It is created by the
|
/// [`SegmentReader`]. It is created by the [`Weight`].
|
||||||
/// [`Weight`](./trait.Weight.html).
|
|
||||||
///
|
///
|
||||||
/// When implementing a new type of `Query`, it is normal to implement a
|
/// When implementing a new type of `Query`, it is normal to implement a
|
||||||
/// dedicated `Query`, `Weight` and `Scorer`.
|
/// dedicated `Query`, [`Weight`] and [`Scorer`].
|
||||||
|
///
|
||||||
|
/// [`Scorer`]: crate::query::Scorer
|
||||||
|
/// [`SegmentReader`]: crate::SegmentReader
|
||||||
pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
||||||
/// Create the weight associated to a query.
|
/// Create the weight associated to a query.
|
||||||
///
|
///
|
||||||
/// If scoring is not required, setting `scoring_enabled` to `false`
|
/// If scoring is not required, setting `scoring_enabled` to `false`
|
||||||
/// can increase performances.
|
/// can increase performances.
|
||||||
///
|
///
|
||||||
/// See [`Weight`](./trait.Weight.html).
|
/// See [`Weight`].
|
||||||
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>>;
|
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>>;
|
||||||
|
|
||||||
/// Returns an `Explanation` for the score of the document.
|
/// Returns an `Explanation` for the score of the document.
|
||||||
|
|||||||
@@ -113,8 +113,8 @@ fn trim_ast(logical_ast: LogicalAst) -> Option<LogicalAst> {
|
|||||||
/// The language covered by the current parser is extremely simple.
|
/// The language covered by the current parser is extremely simple.
|
||||||
///
|
///
|
||||||
/// * simple terms: "e.g.: `Barack Obama` are simply tokenized using tantivy's
|
/// * simple terms: "e.g.: `Barack Obama` are simply tokenized using tantivy's
|
||||||
/// [`SimpleTokenizer`](../tokenizer/struct.SimpleTokenizer.html), hence becoming `["barack",
|
/// [`SimpleTokenizer`](crate::tokenizer::SimpleTokenizer), hence becoming `["barack", "obama"]`.
|
||||||
/// "obama"]`. The terms are then searched within the default terms of the query parser.
|
/// The terms are then searched within the default terms of the query parser.
|
||||||
///
|
///
|
||||||
/// e.g. If `body` and `title` are default fields, our example terms are
|
/// e.g. If `body` and `title` are default fields, our example terms are
|
||||||
/// `["title:barack", "body:barack", "title:obama", "body:obama"]`.
|
/// `["title:barack", "body:barack", "title:obama", "body:obama"]`.
|
||||||
@@ -166,8 +166,8 @@ fn trim_ast(logical_ast: LogicalAst) -> Option<LogicalAst> {
|
|||||||
/// devops. Negative boosts are not allowed.
|
/// devops. Negative boosts are not allowed.
|
||||||
///
|
///
|
||||||
/// It is also possible to define a boost for a some specific field, at the query parser level.
|
/// It is also possible to define a boost for a some specific field, at the query parser level.
|
||||||
/// (See [`set_boost(...)`](#method.set_field_boost) ). Typically you may want to boost a title
|
/// (See [`set_field_boost(...)`](QueryParser::set_field_boost)). Typically you may want to boost a
|
||||||
/// field.
|
/// title field.
|
||||||
///
|
///
|
||||||
/// Phrase terms support the `~` slop operator which allows to set the phrase's matching
|
/// Phrase terms support the `~` slop operator which allows to set the phrase's matching
|
||||||
/// distance in words. `"big wolf"~1` will return documents containing the phrase `"big bad wolf"`.
|
/// distance in words. `"big wolf"~1` will return documents containing the phrase `"big bad wolf"`.
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use crate::Score;
|
|||||||
|
|
||||||
/// Scored set of documents matching a query within a specific segment.
|
/// Scored set of documents matching a query within a specific segment.
|
||||||
///
|
///
|
||||||
/// See [`Query`](./trait.Query.html).
|
/// See [`Query`](crate::query::Query).
|
||||||
pub trait Scorer: downcast_rs::Downcast + DocSet + 'static {
|
pub trait Scorer: downcast_rs::Downcast + DocSet + 'static {
|
||||||
/// Returns the score.
|
/// Returns the score.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ pub(crate) fn for_each_scorer<TScorer: Scorer + ?Sized>(
|
|||||||
/// Calls `callback` with all of the `(doc, score)` for which score
|
/// Calls `callback` with all of the `(doc, score)` for which score
|
||||||
/// is exceeding a given threshold.
|
/// is exceeding a given threshold.
|
||||||
///
|
///
|
||||||
/// This method is useful for the TopDocs collector.
|
/// This method is useful for the [`TopDocs`](crate::collector::TopDocs) collector.
|
||||||
/// For all docsets, the blanket implementation has the benefit
|
/// For all docsets, the blanket implementation has the benefit
|
||||||
/// of prefiltering (doc, score) pairs, avoiding the
|
/// of prefiltering (doc, score) pairs, avoiding the
|
||||||
/// virtual dispatch cost.
|
/// virtual dispatch cost.
|
||||||
@@ -44,21 +44,19 @@ pub(crate) fn for_each_pruning_scorer<TScorer: Scorer + ?Sized>(
|
|||||||
/// A Weight is the specialization of a `Query`
|
/// A Weight is the specialization of a `Query`
|
||||||
/// for a given set of segments.
|
/// for a given set of segments.
|
||||||
///
|
///
|
||||||
/// See [`Query`].
|
/// See [`Query`](crate::query::Query).
|
||||||
///
|
|
||||||
/// [`Query`]: crate::query::Query
|
|
||||||
pub trait Weight: Send + Sync + 'static {
|
pub trait Weight: Send + Sync + 'static {
|
||||||
/// Returns the scorer for the given segment.
|
/// Returns the scorer for the given segment.
|
||||||
///
|
///
|
||||||
/// `boost` is a multiplier to apply to the score.
|
/// `boost` is a multiplier to apply to the score.
|
||||||
///
|
///
|
||||||
/// See [`Query`](./trait.Query.html).
|
/// See [`Query`](crate::query::Query).
|
||||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>>;
|
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>>;
|
||||||
|
|
||||||
/// Returns an `Explanation` for the given document.
|
/// Returns an [`Explanation`] for the given document.
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation>;
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation>;
|
||||||
|
|
||||||
/// Returns the number documents within the given `SegmentReader`.
|
/// Returns the number documents within the given [`SegmentReader`].
|
||||||
fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
|
fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
|
||||||
let mut scorer = self.scorer(reader, 1.0)?;
|
let mut scorer = self.scorer(reader, 1.0)?;
|
||||||
if let Some(alive_bitset) = reader.alive_bitset() {
|
if let Some(alive_bitset) = reader.alive_bitset() {
|
||||||
@@ -83,7 +81,7 @@ pub trait Weight: Send + Sync + 'static {
|
|||||||
/// Calls `callback` with all of the `(doc, score)` for which score
|
/// Calls `callback` with all of the `(doc, score)` for which score
|
||||||
/// is exceeding a given threshold.
|
/// is exceeding a given threshold.
|
||||||
///
|
///
|
||||||
/// This method is useful for the TopDocs collector.
|
/// This method is useful for the [`TopDocs`](crate::collector::TopDocs) collector.
|
||||||
/// For all docsets, the blanket implementation has the benefit
|
/// For all docsets, the blanket implementation has the benefit
|
||||||
/// of prefiltering (doc, score) pairs, avoiding the
|
/// of prefiltering (doc, score) pairs, avoiding the
|
||||||
/// virtual dispatch cost.
|
/// virtual dispatch cost.
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ impl IndexReaderBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the [Warmer]s that are invoked when reloading searchable segments.
|
/// Set the [`Warmer`]s that are invoked when reloading searchable segments.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn warmers(mut self, warmers: Vec<Weak<dyn Warmer>>) -> IndexReaderBuilder {
|
pub fn warmers(mut self, warmers: Vec<Weak<dyn Warmer>>) -> IndexReaderBuilder {
|
||||||
self.warmers = warmers;
|
self.warmers = warmers;
|
||||||
|
|||||||
@@ -10,12 +10,12 @@ pub const GC_INTERVAL: Duration = Duration::from_secs(1);
|
|||||||
|
|
||||||
/// `Warmer` can be used to maintain segment-level state e.g. caches.
|
/// `Warmer` can be used to maintain segment-level state e.g. caches.
|
||||||
///
|
///
|
||||||
/// They must be registered with the [super::IndexReaderBuilder].
|
/// They must be registered with the [`IndexReaderBuilder`](super::IndexReaderBuilder).
|
||||||
pub trait Warmer: Sync + Send {
|
pub trait Warmer: Sync + Send {
|
||||||
/// Perform any warming work using the provided [Searcher].
|
/// Perform any warming work using the provided [`Searcher`].
|
||||||
fn warm(&self, searcher: &Searcher) -> crate::Result<()>;
|
fn warm(&self, searcher: &Searcher) -> crate::Result<()>;
|
||||||
|
|
||||||
/// Discards internal state for any [SearcherGeneration] not provided.
|
/// Discards internal state for any [`SearcherGeneration`] not provided.
|
||||||
fn garbage_collect(&self, live_generations: &[&SearcherGeneration]);
|
fn garbage_collect(&self, live_generations: &[&SearcherGeneration]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -38,11 +38,11 @@ impl WarmingState {
|
|||||||
}))))
|
}))))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start tracking a new generation of [Searcher], and [Warmer::warm] it if there are active
|
/// Start tracking a new generation of [`Searcher`], and [`Warmer::warm`] it if there are active
|
||||||
/// warmers.
|
/// warmers.
|
||||||
///
|
///
|
||||||
/// A background GC thread for [Warmer::garbage_collect] calls is uniquely created if there are
|
/// A background GC thread for [`Warmer::garbage_collect`] calls is uniquely created if there
|
||||||
/// active warmers.
|
/// are active warmers.
|
||||||
pub fn warm_new_searcher_generation(&self, searcher: &Searcher) -> crate::Result<()> {
|
pub fn warm_new_searcher_generation(&self, searcher: &Searcher) -> crate::Result<()> {
|
||||||
self.0
|
self.0
|
||||||
.lock()
|
.lock()
|
||||||
@@ -90,7 +90,7 @@ impl WarmingStateInner {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempt to upgrade the weak Warmer references, pruning those which cannot be upgraded.
|
/// Attempt to upgrade the weak `Warmer` references, pruning those which cannot be upgraded.
|
||||||
/// Return the strong references.
|
/// Return the strong references.
|
||||||
fn pruned_warmers(&mut self) -> Vec<Arc<dyn Warmer>> {
|
fn pruned_warmers(&mut self) -> Vec<Arc<dyn Warmer>> {
|
||||||
let strong_warmers = self
|
let strong_warmers = self
|
||||||
@@ -102,7 +102,7 @@ impl WarmingStateInner {
|
|||||||
strong_warmers
|
strong_warmers
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [Warmer::garbage_collect] active warmers if some searcher generation is observed to have
|
/// [`Warmer::garbage_collect`] active warmers if some searcher generation is observed to have
|
||||||
/// been dropped.
|
/// been dropped.
|
||||||
fn gc_maybe(&mut self) -> bool {
|
fn gc_maybe(&mut self) -> bool {
|
||||||
let live_generations = self.searcher_generation_inventory.list();
|
let live_generations = self.searcher_generation_inventory.list();
|
||||||
@@ -144,8 +144,8 @@ impl WarmingStateInner {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Every [GC_INTERVAL] attempt to GC, with panics caught and logged using
|
/// Every [`GC_INTERVAL`] attempt to GC, with panics caught and logged using
|
||||||
/// [std::panic::catch_unwind].
|
/// [`std::panic::catch_unwind`].
|
||||||
fn gc_loop(inner: Weak<Mutex<WarmingStateInner>>) {
|
fn gc_loop(inner: Weak<Mutex<WarmingStateInner>>) {
|
||||||
for _ in crossbeam_channel::tick(GC_INTERVAL) {
|
for _ in crossbeam_channel::tick(GC_INTERVAL) {
|
||||||
if let Some(inner) = inner.upgrade() {
|
if let Some(inner) = inner.upgrade() {
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ impl DateOptions {
|
|||||||
/// Returns the cardinality of the fastfield.
|
/// Returns the cardinality of the fastfield.
|
||||||
///
|
///
|
||||||
/// If the field has not been declared as a fastfield, then
|
/// If the field has not been declared as a fastfield, then
|
||||||
/// the method returns None.
|
/// the method returns `None`.
|
||||||
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
|
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
|
||||||
self.fast
|
self.fast
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ impl Type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Interprets a 1byte code as a type.
|
/// Interprets a 1byte code as a type.
|
||||||
/// Returns None if the code is invalid.
|
/// Returns `None` if the code is invalid.
|
||||||
pub fn from_code(code: u8) -> Option<Self> {
|
pub fn from_code(code: u8) -> Option<Self> {
|
||||||
match code {
|
match code {
|
||||||
b's' => Some(Type::Str),
|
b's' => Some(Type::Str),
|
||||||
|
|||||||
@@ -1,18 +1,14 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
/// `IndexRecordOption` describes an amount information associated
|
/// `IndexRecordOption` describes an amount information associated
|
||||||
/// to a given indexed field.
|
/// with a given indexed field.
|
||||||
///
|
///
|
||||||
/// It is both used to:
|
/// It is both used to:
|
||||||
///
|
///
|
||||||
/// * describe in the schema the amount of information
|
/// * describe in the schema the amount of information that should be retained during indexing (See
|
||||||
/// that should be retained during indexing (See
|
/// [`TextFieldIndexing::set_index_option()`](crate::schema::TextFieldIndexing::set_index_option))
|
||||||
/// [`TextFieldIndexing.html.set_index_option`](
|
/// * request that a given amount of information to be decoded as one goes through a posting list.
|
||||||
/// ../schema/struct.TextFieldIndexing.html#method.set_index_option))
|
/// (See [`InvertedIndexReader::read_postings()`](crate::InvertedIndexReader::read_postings))
|
||||||
/// * to request for a given
|
|
||||||
/// amount of information to be decoded as one goes through a posting list.
|
|
||||||
/// (See [`InvertedIndexReader.read_postings`](
|
|
||||||
/// ../struct.InvertedIndexReader.html#method.read_postings))
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
|
||||||
pub enum IndexRecordOption {
|
pub enum IndexRecordOption {
|
||||||
/// records only the `DocId`s
|
/// records only the `DocId`s
|
||||||
@@ -24,7 +20,7 @@ pub enum IndexRecordOption {
|
|||||||
WithFreqs,
|
WithFreqs,
|
||||||
/// records the document id, the term frequency and the positions of
|
/// records the document id, the term frequency and the positions of
|
||||||
/// the occurrences in the document.
|
/// the occurrences in the document.
|
||||||
/// Positions are required to run [PhraseQueries](../query/struct.PhraseQuery.html).
|
/// Positions are required to run a [`PhraseQuery`](crate::query::PhraseQuery).
|
||||||
#[serde(rename = "position")]
|
#[serde(rename = "position")]
|
||||||
WithFreqsAndPositions,
|
WithFreqsAndPositions,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
//! Tantivy has a very strict schema.
|
//! Tantivy has a very strict schema.
|
||||||
//! The schema defines information about the fields your index contains, that is, for each field:
|
//! The schema defines information about the fields your index contains, that is, for each field:
|
||||||
//!
|
//!
|
||||||
//! the field name (may only contain letters `[a-zA-Z]`, number `[0-9]`, and `_`)
|
//! - the field name (may only contain letters `[a-zA-Z]`, number `[0-9]`, and `_`)
|
||||||
//! the type of the field (currently only `text` and `u64` are supported)
|
//! - the type of the field (currently only `text` and `u64` are supported)
|
||||||
//! how the field should be indexed / stored.
|
//! - how the field should be indexed / stored.
|
||||||
//!
|
//!
|
||||||
//! This very last point is critical as it will enable / disable some of the functionality
|
//! This very last point is critical as it will enable / disable some of the functionality
|
||||||
//! for your index.
|
//! for your index.
|
||||||
@@ -36,21 +36,20 @@
|
|||||||
//! let schema = schema_builder.build();
|
//! let schema = schema_builder.build();
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! We can split the problem of generating a search result page into two phases :
|
//! We can split the problem of generating a search result page into two phases:
|
||||||
//!
|
//!
|
||||||
//! identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`)
|
//! - identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`)
|
||||||
//! for each of these documents, retrieving the information required to generate
|
//! - for each of these documents, retrieving the information required to generate the search
|
||||||
//! the search results page. (`doc_ids[] -> Document[]`)
|
//! results page. (`doc_ids[] -> Document[]`)
|
||||||
//!
|
//!
|
||||||
//! In the first phase, the ability to search for documents by the given field is determined by the
|
//! In the first phase, the ability to search for documents by the given field is determined by the
|
||||||
//! [`IndexRecordOption`](enum.IndexRecordOption.html) of our
|
//! [`IndexRecordOption`] of our [`TextOptions`].
|
||||||
//! [`TextOptions`](struct.TextOptions.html).
|
|
||||||
//!
|
//!
|
||||||
//! The effect of each possible setting is described more in detail
|
//! The effect of each possible setting is described more in detail
|
||||||
//! [`TextIndexingOptions`](enum.TextIndexingOptions.html).
|
//! [`TextIndexingOptions`](enum.TextIndexingOptions.html).
|
||||||
//!
|
//!
|
||||||
//! On the other hand setting the field as stored or not determines whether the field should be
|
//! On the other hand setting the field as stored or not determines whether the field should be
|
||||||
//! returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called.
|
//! returned when [`Searcher::doc()`](crate::Searcher::doc) is called.
|
||||||
//!
|
//!
|
||||||
//!
|
//!
|
||||||
//! ## Setting a u64, a i64 or a f64 field
|
//! ## Setting a u64, a i64 or a f64 field
|
||||||
@@ -69,7 +68,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Just like for Text fields (see above),
|
//! Just like for Text fields (see above),
|
||||||
//! setting the field as stored defines whether the field will be
|
//! setting the field as stored defines whether the field will be
|
||||||
//! returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called,
|
//! returned when [`Searcher::doc()`](crate::Searcher::doc) is called,
|
||||||
//! and setting the field as indexed means that we will be able perform queries such as
|
//! and setting the field as indexed means that we will be able perform queries such as
|
||||||
//! `num_stars:10`. Note that unlike text fields, numeric fields can only be indexed in one way for
|
//! `num_stars:10`. Note that unlike text fields, numeric fields can only be indexed in one way for
|
||||||
//! the moment.
|
//! the moment.
|
||||||
@@ -94,7 +93,7 @@
|
|||||||
//! ### Fast fields
|
//! ### Fast fields
|
||||||
//! This functionality is somewhat similar to Lucene's `DocValues`.
|
//! This functionality is somewhat similar to Lucene's `DocValues`.
|
||||||
//!
|
//!
|
||||||
//! Fields that are indexed as [FAST] will be stored in a special data structure that will
|
//! Fields that are indexed as [`FAST`] will be stored in a special data structure that will
|
||||||
//! make it possible to access the value given the doc id rapidly. This is useful if the value
|
//! make it possible to access the value given the doc id rapidly. This is useful if the value
|
||||||
//! of the field is required during scoring or collection for instance.
|
//! of the field is required during scoring or collection for instance.
|
||||||
//!
|
//!
|
||||||
|
|||||||
@@ -11,16 +11,16 @@ pub enum Cardinality {
|
|||||||
#[serde(rename = "single")]
|
#[serde(rename = "single")]
|
||||||
SingleValue,
|
SingleValue,
|
||||||
/// The document can have any number of values associated to the document.
|
/// The document can have any number of values associated to the document.
|
||||||
/// This is more memory and CPU expensive than the SingleValue solution.
|
/// This is more memory and CPU expensive than the `SingleValue` solution.
|
||||||
#[serde(rename = "multi")]
|
#[serde(rename = "multi")]
|
||||||
MultiValues,
|
MultiValues,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[deprecated(since = "0.17.0", note = "Use NumericOptions instead.")]
|
#[deprecated(since = "0.17.0", note = "Use NumericOptions instead.")]
|
||||||
/// Deprecated use [NumericOptions] instead.
|
/// Deprecated use [`NumericOptions`] instead.
|
||||||
pub type IntOptions = NumericOptions;
|
pub type IntOptions = NumericOptions;
|
||||||
|
|
||||||
/// Define how an u64, i64, of f64 field should be handled by tantivy.
|
/// Define how an `u64`, `i64`, or `f64` field should be handled by tantivy.
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||||
#[serde(from = "NumericOptionsDeser")]
|
#[serde(from = "NumericOptionsDeser")]
|
||||||
pub struct NumericOptions {
|
pub struct NumericOptions {
|
||||||
@@ -36,7 +36,7 @@ pub struct NumericOptions {
|
|||||||
/// lack of fieldnorms attribute as "true" if and only if indexed.
|
/// lack of fieldnorms attribute as "true" if and only if indexed.
|
||||||
///
|
///
|
||||||
/// (Downstream, for the moment, this attribute is not used anyway if not indexed...)
|
/// (Downstream, for the moment, this attribute is not used anyway if not indexed...)
|
||||||
/// Note that: newly serialized NumericOptions will include the new attribute.
|
/// Note that: newly serialized `NumericOptions` will include the new attribute.
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct NumericOptionsDeser {
|
struct NumericOptionsDeser {
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
@@ -135,7 +135,7 @@ impl NumericOptions {
|
|||||||
/// Returns the cardinality of the fastfield.
|
/// Returns the cardinality of the fastfield.
|
||||||
///
|
///
|
||||||
/// If the field has not been declared as a fastfield, then
|
/// If the field has not been declared as a fastfield, then
|
||||||
/// the method returns None.
|
/// the method returns `None`.
|
||||||
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
|
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
|
||||||
self.fast
|
self.fast
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -236,7 +236,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the `u64` value stored in a term.
|
/// Returns the `u64` value stored in a term.
|
||||||
///
|
///
|
||||||
/// Returns None if the term is not of the u64 type, or if the term byte representation
|
/// Returns `None` if the term is not of the u64 type, or if the term byte representation
|
||||||
/// is invalid.
|
/// is invalid.
|
||||||
pub fn as_u64(&self) -> Option<u64> {
|
pub fn as_u64(&self) -> Option<u64> {
|
||||||
self.get_fast_type::<u64>()
|
self.get_fast_type::<u64>()
|
||||||
@@ -258,7 +258,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the `i64` value stored in a term.
|
/// Returns the `i64` value stored in a term.
|
||||||
///
|
///
|
||||||
/// Returns None if the term is not of the i64 type, or if the term byte representation
|
/// Returns `None` if the term is not of the i64 type, or if the term byte representation
|
||||||
/// is invalid.
|
/// is invalid.
|
||||||
pub fn as_i64(&self) -> Option<i64> {
|
pub fn as_i64(&self) -> Option<i64> {
|
||||||
self.get_fast_type::<i64>()
|
self.get_fast_type::<i64>()
|
||||||
@@ -266,7 +266,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the `f64` value stored in a term.
|
/// Returns the `f64` value stored in a term.
|
||||||
///
|
///
|
||||||
/// Returns None if the term is not of the f64 type, or if the term byte representation
|
/// Returns `None` if the term is not of the f64 type, or if the term byte representation
|
||||||
/// is invalid.
|
/// is invalid.
|
||||||
pub fn as_f64(&self) -> Option<f64> {
|
pub fn as_f64(&self) -> Option<f64> {
|
||||||
self.get_fast_type::<f64>()
|
self.get_fast_type::<f64>()
|
||||||
@@ -274,7 +274,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the `bool` value stored in a term.
|
/// Returns the `bool` value stored in a term.
|
||||||
///
|
///
|
||||||
/// Returns None if the term is not of the bool type, or if the term byte representation
|
/// Returns `None` if the term is not of the bool type, or if the term byte representation
|
||||||
/// is invalid.
|
/// is invalid.
|
||||||
pub fn as_bool(&self) -> Option<bool> {
|
pub fn as_bool(&self) -> Option<bool> {
|
||||||
self.get_fast_type::<bool>()
|
self.get_fast_type::<bool>()
|
||||||
@@ -282,7 +282,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the `Date` value stored in a term.
|
/// Returns the `Date` value stored in a term.
|
||||||
///
|
///
|
||||||
/// Returns None if the term is not of the Date type, or if the term byte representation
|
/// Returns `None` if the term is not of the Date type, or if the term byte representation
|
||||||
/// is invalid.
|
/// is invalid.
|
||||||
pub fn as_date(&self) -> Option<DateTime> {
|
pub fn as_date(&self) -> Option<DateTime> {
|
||||||
self.get_fast_type::<DateTime>()
|
self.get_fast_type::<DateTime>()
|
||||||
@@ -290,7 +290,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the text associated with the term.
|
/// Returns the text associated with the term.
|
||||||
///
|
///
|
||||||
/// Returns None if the field is not of string type
|
/// Returns `None` if the field is not of string type
|
||||||
/// or if the bytes are not valid utf-8.
|
/// or if the bytes are not valid utf-8.
|
||||||
pub fn as_str(&self) -> Option<&str> {
|
pub fn as_str(&self) -> Option<&str> {
|
||||||
if self.as_slice().len() < 5 {
|
if self.as_slice().len() < 5 {
|
||||||
@@ -304,7 +304,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the facet associated with the term.
|
/// Returns the facet associated with the term.
|
||||||
///
|
///
|
||||||
/// Returns None if the field is not of facet type
|
/// Returns `None` if the field is not of facet type
|
||||||
/// or if the bytes are not valid utf-8.
|
/// or if the bytes are not valid utf-8.
|
||||||
pub fn as_facet(&self) -> Option<Facet> {
|
pub fn as_facet(&self) -> Option<Facet> {
|
||||||
if self.as_slice().len() < 5 {
|
if self.as_slice().len() < 5 {
|
||||||
@@ -319,7 +319,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// Returns the bytes associated with the term.
|
/// Returns the bytes associated with the term.
|
||||||
///
|
///
|
||||||
/// Returns None if the field is not of bytes type.
|
/// Returns `None` if the field is not of bytes type.
|
||||||
pub fn as_bytes(&self) -> Option<&[u8]> {
|
pub fn as_bytes(&self) -> Option<&[u8]> {
|
||||||
if self.as_slice().len() < 5 {
|
if self.as_slice().len() < 5 {
|
||||||
return None;
|
return None;
|
||||||
@@ -366,7 +366,7 @@ fn get_fast_type<T: FastValue>(bytes: &[u8]) -> Option<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the json path (without non-human friendly separators, the type of the value, and the
|
/// Returns the json path (without non-human friendly separators, the type of the value, and the
|
||||||
/// value bytes). Returns None if the value is not JSON or is not valid.
|
/// value bytes). Returns `None` if the value is not JSON or is not valid.
|
||||||
pub(crate) fn as_json_path_type_value_bytes(bytes: &[u8]) -> Option<(&str, Type, &[u8])> {
|
pub(crate) fn as_json_path_type_value_bytes(bytes: &[u8]) -> Option<(&str, Type, &[u8])> {
|
||||||
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
|
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
|
||||||
let json_path = str::from_utf8(&bytes[..pos]).ok()?;
|
let json_path = str::from_utf8(&bytes[..pos]).ok()?;
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ impl TokenizerName {
|
|||||||
/// It defines
|
/// It defines
|
||||||
/// - The amount of information that should be stored about the presence of a term in a document.
|
/// - The amount of information that should be stored about the presence of a term in a document.
|
||||||
/// Essentially, should we store the term frequency and/or the positions (See
|
/// Essentially, should we store the term frequency and/or the positions (See
|
||||||
/// [`IndexRecordOption`](./enum.IndexRecordOption.html)).
|
/// [`IndexRecordOption`]).
|
||||||
/// - The name of the `Tokenizer` that should be used to process the field.
|
/// - The name of the `Tokenizer` that should be used to process the field.
|
||||||
/// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults
|
/// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults
|
||||||
/// to `true`.
|
/// to `true`.
|
||||||
@@ -147,14 +147,14 @@ impl TextFieldIndexing {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if [fieldnorms](crate::fieldnorm)are stored.
|
/// Returns true if and only if [fieldnorms](crate::fieldnorm) are stored.
|
||||||
pub fn fieldnorms(&self) -> bool {
|
pub fn fieldnorms(&self) -> bool {
|
||||||
self.fieldnorms
|
self.fieldnorms
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets which information should be indexed with the tokens.
|
/// Sets which information should be indexed with the tokens.
|
||||||
///
|
///
|
||||||
/// See [IndexRecordOption](./enum.IndexRecordOption.html) for more detail.
|
/// See [`IndexRecordOption`] for more detail.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn set_index_option(mut self, index_option: IndexRecordOption) -> TextFieldIndexing {
|
pub fn set_index_option(mut self, index_option: IndexRecordOption) -> TextFieldIndexing {
|
||||||
self.record = index_option;
|
self.record = index_option;
|
||||||
@@ -163,7 +163,7 @@ impl TextFieldIndexing {
|
|||||||
|
|
||||||
/// Returns the indexing options associated to this field.
|
/// Returns the indexing options associated to this field.
|
||||||
///
|
///
|
||||||
/// See [IndexRecordOption](./enum.IndexRecordOption.html) for more detail.
|
/// See [`IndexRecordOption`] for more detail.
|
||||||
pub fn index_option(&self) -> IndexRecordOption {
|
pub fn index_option(&self) -> IndexRecordOption {
|
||||||
self.record
|
self.record
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ impl<'de> Deserialize<'de> for Value {
|
|||||||
|
|
||||||
impl Value {
|
impl Value {
|
||||||
/// Returns the text value, provided the value is of the `Str` type.
|
/// Returns the text value, provided the value is of the `Str` type.
|
||||||
/// (Returns None if the value is not of the `Str` type).
|
/// (Returns `None` if the value is not of the `Str` type).
|
||||||
pub fn as_text(&self) -> Option<&str> {
|
pub fn as_text(&self) -> Option<&str> {
|
||||||
if let Value::Str(text) = self {
|
if let Value::Str(text) = self {
|
||||||
Some(text)
|
Some(text)
|
||||||
@@ -107,7 +107,7 @@ impl Value {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the facet value, provided the value is of the `Facet` type.
|
/// Returns the facet value, provided the value is of the `Facet` type.
|
||||||
/// (Returns None if the value is not of the `Facet` type).
|
/// (Returns `None` if the value is not of the `Facet` type).
|
||||||
pub fn as_facet(&self) -> Option<&Facet> {
|
pub fn as_facet(&self) -> Option<&Facet> {
|
||||||
if let Value::Facet(facet) = self {
|
if let Value::Facet(facet) = self {
|
||||||
Some(facet)
|
Some(facet)
|
||||||
@@ -117,7 +117,7 @@ impl Value {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the tokenized text, provided the value is of the `PreTokStr` type.
|
/// Returns the tokenized text, provided the value is of the `PreTokStr` type.
|
||||||
/// (Returns None if the value is not of the `PreTokStr` type.)
|
/// (Returns `None` if the value is not of the `PreTokStr` type.)
|
||||||
pub fn tokenized_text(&self) -> Option<&PreTokenizedString> {
|
pub fn tokenized_text(&self) -> Option<&PreTokenizedString> {
|
||||||
if let Value::PreTokStr(tokenized_text) = self {
|
if let Value::PreTokStr(tokenized_text) = self {
|
||||||
Some(tokenized_text)
|
Some(tokenized_text)
|
||||||
@@ -127,7 +127,7 @@ impl Value {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the u64-value, provided the value is of the `U64` type.
|
/// Returns the u64-value, provided the value is of the `U64` type.
|
||||||
/// (Returns None if the value is not of the `U64` type)
|
/// (Returns `None` if the value is not of the `U64` type)
|
||||||
pub fn as_u64(&self) -> Option<u64> {
|
pub fn as_u64(&self) -> Option<u64> {
|
||||||
if let Value::U64(val) = self {
|
if let Value::U64(val) = self {
|
||||||
Some(*val)
|
Some(*val)
|
||||||
@@ -138,7 +138,7 @@ impl Value {
|
|||||||
|
|
||||||
/// Returns the i64-value, provided the value is of the `I64` type.
|
/// Returns the i64-value, provided the value is of the `I64` type.
|
||||||
///
|
///
|
||||||
/// Return None if the value is not of type `I64`.
|
/// Returns `None` if the value is not of type `I64`.
|
||||||
pub fn as_i64(&self) -> Option<i64> {
|
pub fn as_i64(&self) -> Option<i64> {
|
||||||
if let Value::I64(val) = self {
|
if let Value::I64(val) = self {
|
||||||
Some(*val)
|
Some(*val)
|
||||||
@@ -149,7 +149,7 @@ impl Value {
|
|||||||
|
|
||||||
/// Returns the f64-value, provided the value is of the `F64` type.
|
/// Returns the f64-value, provided the value is of the `F64` type.
|
||||||
///
|
///
|
||||||
/// Return None if the value is not of type `F64`.
|
/// Returns `None` if the value is not of type `F64`.
|
||||||
pub fn as_f64(&self) -> Option<f64> {
|
pub fn as_f64(&self) -> Option<f64> {
|
||||||
if let Value::F64(value) = self {
|
if let Value::F64(value) = self {
|
||||||
Some(*value)
|
Some(*value)
|
||||||
@@ -160,7 +160,7 @@ impl Value {
|
|||||||
|
|
||||||
/// Returns the bool value, provided the value is of the `Bool` type.
|
/// Returns the bool value, provided the value is of the `Bool` type.
|
||||||
///
|
///
|
||||||
/// Return None if the value is not of type `Bool`.
|
/// Returns `None` if the value is not of type `Bool`.
|
||||||
pub fn as_bool(&self) -> Option<bool> {
|
pub fn as_bool(&self) -> Option<bool> {
|
||||||
if let Value::Bool(value) = self {
|
if let Value::Bool(value) = self {
|
||||||
Some(*value)
|
Some(*value)
|
||||||
@@ -171,7 +171,7 @@ impl Value {
|
|||||||
|
|
||||||
/// Returns the Date-value, provided the value is of the `Date` type.
|
/// Returns the Date-value, provided the value is of the `Date` type.
|
||||||
///
|
///
|
||||||
/// Returns None if the value is not of type `Date`.
|
/// Returns `None` if the value is not of type `Date`.
|
||||||
pub fn as_date(&self) -> Option<DateTime> {
|
pub fn as_date(&self) -> Option<DateTime> {
|
||||||
if let Value::Date(date) = self {
|
if let Value::Date(date) = self {
|
||||||
Some(*date)
|
Some(*date)
|
||||||
@@ -182,7 +182,7 @@ impl Value {
|
|||||||
|
|
||||||
/// Returns the Bytes-value, provided the value is of the `Bytes` type.
|
/// Returns the Bytes-value, provided the value is of the `Bytes` type.
|
||||||
///
|
///
|
||||||
/// Returns None if the value is not of type `Bytes`.
|
/// Returns `None` if the value is not of type `Bytes`.
|
||||||
pub fn as_bytes(&self) -> Option<&[u8]> {
|
pub fn as_bytes(&self) -> Option<&[u8]> {
|
||||||
if let Value::Bytes(bytes) = self {
|
if let Value::Bytes(bytes) = self {
|
||||||
Some(bytes)
|
Some(bytes)
|
||||||
@@ -191,9 +191,9 @@ impl Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the json object, provided the value is of the JsonObject type.
|
/// Returns the json object, provided the value is of the `JsonObject` type.
|
||||||
///
|
///
|
||||||
/// Returns None if the value is not of type JsonObject.
|
/// Returns `None` if the value is not of type `JsonObject`.
|
||||||
pub fn as_json(&self) -> Option<&Map<String, serde_json::Value>> {
|
pub fn as_json(&self) -> Option<&Map<String, serde_json::Value>> {
|
||||||
if let Value::JsonObject(json) = self {
|
if let Value::JsonObject(json) = self {
|
||||||
Some(json)
|
Some(json)
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ impl PerFieldSpaceUsage {
|
|||||||
/// Represents space usage of a given field, breaking it down into the (field, index) pairs that
|
/// Represents space usage of a given field, breaking it down into the (field, index) pairs that
|
||||||
/// comprise it.
|
/// comprise it.
|
||||||
///
|
///
|
||||||
/// See documentation for [PerFieldSpaceUsage] for slightly more information.
|
/// See documentation for [`PerFieldSpaceUsage`] for slightly more information.
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct FieldUsage {
|
pub struct FieldUsage {
|
||||||
field: Field,
|
field: Field,
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ impl LayerBuilder {
|
|||||||
/// Serializes the block, and return a checkpoint representing
|
/// Serializes the block, and return a checkpoint representing
|
||||||
/// the entire block.
|
/// the entire block.
|
||||||
///
|
///
|
||||||
/// If the block was empty to begin with, simply return None.
|
/// If the block was empty to begin with, simply return `None`.
|
||||||
fn flush_block(&mut self) -> Option<Checkpoint> {
|
fn flush_block(&mut self) -> Option<Checkpoint> {
|
||||||
if let Some(doc_range) = self.block.doc_interval() {
|
if let Some(doc_range) = self.block.doc_interval() {
|
||||||
let start_offset = self.buffer.len();
|
let start_offset = self.buffer.len();
|
||||||
|
|||||||
@@ -36,10 +36,10 @@ impl<'a> TermMerger<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterator over (segment ordinal, [TermOrdinal]) sorted by segment ordinal
|
/// Iterator over `(segment ordinal, TermOrdinal)` pairs sorted by segment ordinal
|
||||||
///
|
///
|
||||||
/// This method may be called
|
/// This method may be called
|
||||||
/// if [Self::advance] has been called before
|
/// if [`Self::advance`] has been called before
|
||||||
/// and `true` was returned.
|
/// and `true` was returned.
|
||||||
pub fn matching_segments<'b: 'a>(&'b self) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
|
pub fn matching_segments<'b: 'a>(&'b self) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
|
||||||
self.current_segment_and_term_ordinals
|
self.current_segment_and_term_ordinals
|
||||||
@@ -68,17 +68,15 @@ impl<'a> TermMerger<'a> {
|
|||||||
|
|
||||||
/// Returns the current term.
|
/// Returns the current term.
|
||||||
///
|
///
|
||||||
/// This method may be called
|
/// This method may be called if [`Self::advance`] has been called before
|
||||||
/// if [Self::advance] has been called before
|
|
||||||
/// and `true` was returned.
|
/// and `true` was returned.
|
||||||
pub fn key(&self) -> &[u8] {
|
pub fn key(&self) -> &[u8] {
|
||||||
&self.current_key
|
&self.current_key
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterator over (segment ordinal, [TermInfo]) pairs iterator sorted by the ordinal.
|
/// Iterator over `(segment ordinal, TermInfo)` pairs sorted by the ordinal.
|
||||||
///
|
///
|
||||||
/// This method may be called
|
/// This method may be called if [`Self::advance`] has been called before
|
||||||
/// if [Self::advance] has been called before
|
|
||||||
/// and `true` was returned.
|
/// and `true` was returned.
|
||||||
pub fn current_segment_ords_and_term_infos<'b: 'a>(
|
pub fn current_segment_ords_and_term_infos<'b: 'a>(
|
||||||
&'b self,
|
&'b self,
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//! The term dictionary main role is to associate the sorted [`Term`s](../struct.Term.html) to
|
//! The term dictionary main role is to associate the sorted [`Term`](crate::Term)s with
|
||||||
//! a [`TermInfo`](../postings/struct.TermInfo.html) struct that contains some meta-information
|
//! a [`TermInfo`](crate::postings::TermInfo) struct that contains some meta-information
|
||||||
//! about the term.
|
//! about the term.
|
||||||
//!
|
//!
|
||||||
//! Internally, the term dictionary relies on the `fst` crate to store
|
//! Internally, the term dictionary relies on the `fst` crate to store
|
||||||
//! a sorted mapping that associate each term to its rank in the lexicographical order.
|
//! a sorted mapping that associates each term to its rank in the lexicographical order.
|
||||||
//! For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
|
//! For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
|
||||||
//! the `TermOrdinal` are respectively `0`, `1`, `2`, and `3`.
|
//! the `TermOrdinal` are respectively `0`, `1`, `2`, and `3`.
|
||||||
//!
|
//!
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
//! as `u64`.
|
//! as `u64`.
|
||||||
//!
|
//!
|
||||||
//! A second datastructure makes it possible to access a
|
//! A second datastructure makes it possible to access a
|
||||||
//! [`TermInfo`](../postings/struct.TermInfo.html).
|
//! [`TermInfo`](crate::postings::TermInfo).
|
||||||
mod merger;
|
mod merger;
|
||||||
mod streamer;
|
mod streamer;
|
||||||
mod term_info_store;
|
mod term_info_store;
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
//! The term dictionary main role is to associate the sorted [`Term`s](../struct.Term.html) to
|
//! The term dictionary main role is to associate the sorted [`Term`s](crate::Term) to
|
||||||
//! a [`TermInfo`](../postings/struct.TermInfo.html) struct that contains some meta-information
|
//! a [`TermInfo`](crate::postings::TermInfo) struct that contains some meta-information
|
||||||
//! about the term.
|
//! about the term.
|
||||||
//!
|
//!
|
||||||
//! Internally, the term dictionary relies on the `fst` crate to store
|
//! Internally, the term dictionary relies on the `fst` crate to store
|
||||||
//! a sorted mapping that associate each term to its rank in the lexicographical order.
|
//! a sorted mapping that associate each term to its rank in the lexicographical order.
|
||||||
//! For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
|
//! For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
|
||||||
//! the [TermOrdinal] are respectively `0`, `1`, `2`, and `3`.
|
//! the [`TermOrdinal`] are respectively `0`, `1`, `2`, and `3`.
|
||||||
//!
|
//!
|
||||||
//! For `u64`-terms, tantivy explicitly uses a `BigEndian` representation to ensure that the
|
//! For `u64`-terms, tantivy explicitly uses a `BigEndian` representation to ensure that the
|
||||||
//! lexicographical order matches the natural order of integers.
|
//! lexicographical order matches the natural order of integers.
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
//! as `u64`.
|
//! as `u64`.
|
||||||
//!
|
//!
|
||||||
//! A second datastructure makes it possible to access a
|
//! A second datastructure makes it possible to access a
|
||||||
//! [`TermInfo`](../postings/struct.TermInfo.html).
|
//! [`TermInfo`](crate::postings::TermInfo).
|
||||||
|
|
||||||
#[cfg(not(feature = "quickwit"))]
|
#[cfg(not(feature = "quickwit"))]
|
||||||
mod fst_termdict;
|
mod fst_termdict;
|
||||||
|
|||||||
@@ -55,9 +55,9 @@
|
|||||||
//!
|
//!
|
||||||
//! # Custom tokenizers
|
//! # Custom tokenizers
|
||||||
//!
|
//!
|
||||||
//! You can write your own tokenizer by implementing the [`Tokenizer`](./trait.Tokenizer.html)
|
//! You can write your own tokenizer by implementing the [`Tokenizer`] trait
|
||||||
//! or you can extend an existing [`Tokenizer`](./trait.Tokenizer.html) by chaining it several
|
//! or you can extend an existing [`Tokenizer`] by chaining it with several
|
||||||
//! [`TokenFilter`s](./trait.TokenFilter.html).
|
//! [`TokenFilter`]s.
|
||||||
//!
|
//!
|
||||||
//! For instance, the `en_stem` is defined as follows.
|
//! For instance, the `en_stem` is defined as follows.
|
||||||
//!
|
//!
|
||||||
@@ -71,7 +71,7 @@
|
|||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! Once your tokenizer is defined, you need to
|
//! Once your tokenizer is defined, you need to
|
||||||
//! register it with a name in your index's [`TokenizerManager`](./struct.TokenizerManager.html).
|
//! register it with a name in your index's [`TokenizerManager`].
|
||||||
//!
|
//!
|
||||||
//! ```rust
|
//! ```rust
|
||||||
//! # use tantivy::schema::Schema;
|
//! # use tantivy::schema::Schema;
|
||||||
@@ -89,7 +89,7 @@
|
|||||||
//! could like this for instance.
|
//! could like this for instance.
|
||||||
//!
|
//!
|
||||||
//! Note that tokens with a len greater or equal to
|
//! Note that tokens with a len greater or equal to
|
||||||
//! [`MAX_TOKEN_LEN`](./constant.MAX_TOKEN_LEN.html).
|
//! [`MAX_TOKEN_LEN`].
|
||||||
//!
|
//!
|
||||||
//! # Example
|
//! # Example
|
||||||
//!
|
//!
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ impl Language {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `Stemmer` token filter. Several languages are supported, see [Language] for the available
|
/// `Stemmer` token filter. Several languages are supported, see [`Language`] for the available
|
||||||
/// languages.
|
/// languages.
|
||||||
/// Tokens are expected to be lowercased beforehand.
|
/// Tokens are expected to be lowercased beforehand.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -63,7 +63,7 @@ pub struct Stemmer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Stemmer {
|
impl Stemmer {
|
||||||
/// Creates a new Stemmer `TokenFilter` for a given language algorithm.
|
/// Creates a new `Stemmer` [`TokenFilter`] for a given language algorithm.
|
||||||
pub fn new(language: Language) -> Stemmer {
|
pub fn new(language: Language) -> Stemmer {
|
||||||
Stemmer {
|
Stemmer {
|
||||||
stemmer_algorithm: language.algorithm(),
|
stemmer_algorithm: language.algorithm(),
|
||||||
@@ -72,7 +72,7 @@ impl Stemmer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Stemmer {
|
impl Default for Stemmer {
|
||||||
/// Creates a new Stemmer `TokenFilter` for English.
|
/// Creates a new `Stemmer` [`TokenFilter`] for [`Language::English`].
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Stemmer::new(Language::English)
|
Stemmer::new(Language::English)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ impl PartialOrd for PreTokenizedString {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TokenStream implementation which wraps [PreTokenizedString]
|
/// [`TokenStream`] implementation which wraps [`PreTokenizedString`]
|
||||||
pub struct PreTokenizedStream {
|
pub struct PreTokenizedStream {
|
||||||
tokenized_string: PreTokenizedString,
|
tokenized_string: PreTokenizedString,
|
||||||
current_token: i64,
|
current_token: i64,
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ impl Clone for TextAnalyzer {
|
|||||||
/// `Tokenizer` are in charge of splitting text into a stream of token
|
/// `Tokenizer` are in charge of splitting text into a stream of token
|
||||||
/// before indexing.
|
/// before indexing.
|
||||||
///
|
///
|
||||||
/// See the [module documentation](./index.html) for more detail.
|
/// See the [module documentation](crate::tokenizer) for more detail.
|
||||||
///
|
///
|
||||||
/// # Warning
|
/// # Warning
|
||||||
///
|
///
|
||||||
@@ -155,7 +155,7 @@ impl<'a> TokenStream for Box<dyn TokenStream + 'a> {
|
|||||||
|
|
||||||
/// Simple wrapper of `Box<dyn TokenStream + 'a>`.
|
/// Simple wrapper of `Box<dyn TokenStream + 'a>`.
|
||||||
///
|
///
|
||||||
/// See [TokenStream] for more information.
|
/// See [`TokenStream`] for more information.
|
||||||
pub struct BoxTokenStream<'a>(Box<dyn TokenStream + 'a>);
|
pub struct BoxTokenStream<'a>(Box<dyn TokenStream + 'a>);
|
||||||
|
|
||||||
impl<'a, T> From<T> for BoxTokenStream<'a>
|
impl<'a, T> From<T> for BoxTokenStream<'a>
|
||||||
@@ -181,7 +181,7 @@ impl<'a> DerefMut for BoxTokenStream<'a> {
|
|||||||
|
|
||||||
/// Simple wrapper of `Box<dyn TokenFilter + 'a>`.
|
/// Simple wrapper of `Box<dyn TokenFilter + 'a>`.
|
||||||
///
|
///
|
||||||
/// See [TokenStream] for more information.
|
/// See [`TokenFilter`] for more information.
|
||||||
pub struct BoxTokenFilter(Box<dyn TokenFilter>);
|
pub struct BoxTokenFilter(Box<dyn TokenFilter>);
|
||||||
|
|
||||||
impl Deref for BoxTokenFilter {
|
impl Deref for BoxTokenFilter {
|
||||||
|
|||||||
Reference in New Issue
Block a user