mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 20:42:54 +00:00
Compare commits
1 Commits
columnar-c
...
issue/922
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7cb018c640 |
@@ -7,6 +7,7 @@ Tantivy 0.14.0
|
||||
- Added support for Brotli compression in the DocStore. (@ppodolsky)
|
||||
- Added helper for building intersections and unions in BooleanQuery (@guilload)
|
||||
- Bugfix in `Query::explain`
|
||||
- Making it possible to opt out the generation of fieldnorms information for indexed fields. This change breaks compatibility as the meta.json file format is slightly changed. (#922, @pmasurel)
|
||||
|
||||
Tantivy 0.13.2
|
||||
===================
|
||||
|
||||
@@ -301,7 +301,7 @@ mod tests {
|
||||
let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
|
||||
assert_eq!(
|
||||
json,
|
||||
r#"{"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default"},"stored":false}}],"opstamp":0}"#
|
||||
r#"{"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default","fieldnorms":true},"stored":false}}],"opstamp":0}"#
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,10 +98,9 @@ mod tests {
|
||||
let field = searcher.schema().get_field("string_bytes").unwrap();
|
||||
let term = Term::from_field_bytes(field, b"lucene".as_ref());
|
||||
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
let term_weight = term_query.specialized_weight(&searcher, false)?;
|
||||
let term_scorer_err = term_weight.specialized_scorer(searcher.segment_reader(0), 1.0f32);
|
||||
let term_weight_res = term_query.specialized_weight(&searcher, false);
|
||||
assert!(matches!(
|
||||
term_scorer_err,
|
||||
term_weight_res,
|
||||
Err(crate::TantivyError::SchemaError(_))
|
||||
));
|
||||
Ok(())
|
||||
|
||||
@@ -49,7 +49,7 @@ impl FieldNormReaders {
|
||||
///
|
||||
/// This metric is important to compute the score of a
|
||||
/// document : a document having a query word in one its short fields
|
||||
/// (e.g. title) is likely to be more relevant than in one of its longer field
|
||||
/// (e.g. title)is likely to be more relevant than in one of its longer field
|
||||
/// (e.g. body).
|
||||
///
|
||||
/// tantivy encodes `fieldnorm` on one byte with some precision loss,
|
||||
@@ -61,20 +61,31 @@ impl FieldNormReaders {
|
||||
/// precompute computationally expensive functions of the fieldnorm
|
||||
/// in a very short array.
|
||||
#[derive(Clone)]
|
||||
pub struct FieldNormReader {
|
||||
data: OwnedBytes,
|
||||
pub enum FieldNormReader {
|
||||
ConstFieldNorm { fieldnorm_id: u8, num_docs: u32 },
|
||||
OneByte(OwnedBytes),
|
||||
}
|
||||
|
||||
impl FieldNormReader {
|
||||
pub fn const_fieldnorm_id(fieldnorm_id: u8, num_docs: u32) -> FieldNormReader {
|
||||
FieldNormReader::ConstFieldNorm {
|
||||
fieldnorm_id,
|
||||
num_docs,
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a field norm reader given its file.
|
||||
pub fn open(fieldnorm_file: FileSlice) -> crate::Result<Self> {
|
||||
let data = fieldnorm_file.read_bytes()?;
|
||||
Ok(FieldNormReader { data })
|
||||
Ok(FieldNormReader::OneByte(data))
|
||||
}
|
||||
|
||||
/// Returns the number of documents in this segment.
|
||||
pub fn num_docs(&self) -> u32 {
|
||||
self.data.len() as u32
|
||||
match self {
|
||||
Self::ConstFieldNorm { num_docs, .. } => *num_docs,
|
||||
FieldNormReader::OneByte(vals) => vals.len() as u32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the `fieldnorm` associated to a doc id.
|
||||
@@ -86,6 +97,7 @@ impl FieldNormReader {
|
||||
///
|
||||
/// The fieldnorm is effectively decoded from the
|
||||
/// `fieldnorm_id` by doing a simple table lookup.
|
||||
#[inline(always)]
|
||||
pub fn fieldnorm(&self, doc_id: DocId) -> u32 {
|
||||
let fieldnorm_id = self.fieldnorm_id(doc_id);
|
||||
id_to_fieldnorm(fieldnorm_id)
|
||||
@@ -94,7 +106,11 @@ impl FieldNormReader {
|
||||
/// Returns the `fieldnorm_id` associated to a document.
|
||||
#[inline(always)]
|
||||
pub fn fieldnorm_id(&self, doc_id: DocId) -> u8 {
|
||||
self.data.as_slice()[doc_id as usize]
|
||||
match self {
|
||||
FieldNormReader::ConstFieldNorm { fieldnorm_id, .. } => *fieldnorm_id,
|
||||
|
||||
FieldNormReader::OneByte(data) => data.as_slice()[doc_id as usize],
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a `fieldnorm_id` into a fieldnorm.
|
||||
@@ -118,9 +134,7 @@ impl FieldNormReader {
|
||||
.map(FieldNormReader::fieldnorm_to_id)
|
||||
.collect::<Vec<u8>>();
|
||||
let field_norms_data = OwnedBytes::new(field_norms_id);
|
||||
FieldNormReader {
|
||||
data: field_norms_data,
|
||||
}
|
||||
FieldNormReader::OneByte(field_norms_data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use super::fieldnorm_to_id;
|
||||
use super::FieldNormsSerializer;
|
||||
use crate::schema::Field;
|
||||
use crate::schema::Schema;
|
||||
use std::{io, iter};
|
||||
use std::io;
|
||||
|
||||
/// The `FieldNormsWriter` is in charge of tracking the fieldnorm byte
|
||||
/// of each document for each field with field norms.
|
||||
@@ -13,7 +13,7 @@ use std::{io, iter};
|
||||
/// byte per document per field.
|
||||
pub struct FieldNormsWriter {
|
||||
fields: Vec<Field>,
|
||||
fieldnorms_buffer: Vec<Vec<u8>>,
|
||||
fieldnorms_buffer: Vec<Option<Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl FieldNormsWriter {
|
||||
@@ -23,7 +23,7 @@ impl FieldNormsWriter {
|
||||
schema
|
||||
.fields()
|
||||
.filter_map(|(field, field_entry)| {
|
||||
if field_entry.is_indexed() {
|
||||
if field_entry.has_fieldnorms() {
|
||||
Some(field)
|
||||
} else {
|
||||
None
|
||||
@@ -36,17 +36,14 @@ impl FieldNormsWriter {
|
||||
/// specified in the schema.
|
||||
pub fn for_schema(schema: &Schema) -> FieldNormsWriter {
|
||||
let fields = FieldNormsWriter::fields_with_fieldnorm(schema);
|
||||
let max_field = fields
|
||||
.iter()
|
||||
.map(Field::field_id)
|
||||
.max()
|
||||
.map(|max_field_id| max_field_id as usize + 1)
|
||||
.unwrap_or(0);
|
||||
let num_fields = schema.num_fields();
|
||||
let mut fieldnorms_buffer: Vec<Option<Vec<u8>>> = vec![None; num_fields];
|
||||
for field in &fields {
|
||||
fieldnorms_buffer[field.field_id() as usize] = Some(Vec::new());
|
||||
}
|
||||
FieldNormsWriter {
|
||||
fields,
|
||||
fieldnorms_buffer: iter::repeat_with(Vec::new)
|
||||
.take(max_field)
|
||||
.collect::<Vec<_>>(),
|
||||
fieldnorms_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,8 +52,10 @@ impl FieldNormsWriter {
|
||||
///
|
||||
/// Will extend with 0-bytes for documents that have not been seen.
|
||||
pub fn fill_up_to_max_doc(&mut self, max_doc: DocId) {
|
||||
for field in self.fields.iter() {
|
||||
self.fieldnorms_buffer[field.field_id() as usize].resize(max_doc as usize, 0u8);
|
||||
for buffer_opt in self.fieldnorms_buffer.iter_mut() {
|
||||
if let Some(buffer) = buffer_opt {
|
||||
buffer.resize(max_doc as usize, 0u8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,21 +68,22 @@ impl FieldNormsWriter {
|
||||
/// * field - the field being set
|
||||
/// * fieldnorm - the number of terms present in document `doc` in field `field`
|
||||
pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) {
|
||||
let fieldnorm_buffer: &mut Vec<u8> = &mut self.fieldnorms_buffer[field.field_id() as usize];
|
||||
assert!(
|
||||
fieldnorm_buffer.len() <= doc as usize,
|
||||
"Cannot register a given fieldnorm twice"
|
||||
);
|
||||
// we fill intermediary `DocId` as having a fieldnorm of 0.
|
||||
fieldnorm_buffer.resize(doc as usize + 1, 0u8);
|
||||
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
|
||||
if let Some(fieldnorm_buffer) = self.fieldnorms_buffer[field.field_id() as usize].as_mut() {
|
||||
assert!(
|
||||
fieldnorm_buffer.len() <= doc as usize,
|
||||
"Cannot register a given fieldnorm twice" // we fill intermediary `DocId` as having a fieldnorm of 0.
|
||||
);
|
||||
fieldnorm_buffer.resize(doc as usize + 1, 0u8);
|
||||
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialize the seen fieldnorm values to the serializer for all fields.
|
||||
pub fn serialize(&self, mut fieldnorms_serializer: FieldNormsSerializer) -> io::Result<()> {
|
||||
for &field in self.fields.iter() {
|
||||
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.field_id() as usize][..];
|
||||
fieldnorms_serializer.serialize_field(field, fieldnorm_values)?;
|
||||
if let Some(buffer) = self.fieldnorms_buffer[field.field_id() as usize].as_ref() {
|
||||
fieldnorms_serializer.serialize_field(field, &buffer[..])?;
|
||||
}
|
||||
}
|
||||
fieldnorms_serializer.close()?;
|
||||
Ok(())
|
||||
|
||||
@@ -322,9 +322,8 @@ pub struct PostingsSerializer<W: Write> {
|
||||
|
||||
bm25_weight: Option<BM25Weight>,
|
||||
|
||||
num_docs: u32, // Number of docs in the segment
|
||||
avg_fieldnorm: Score, // Average number of term in the field for that segment.
|
||||
// this value is used to compute the block wand information.
|
||||
// this value is used to compute the block wand information.
|
||||
}
|
||||
|
||||
impl<W: Write> PostingsSerializer<W> {
|
||||
@@ -334,10 +333,6 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
mode: IndexRecordOption,
|
||||
fieldnorm_reader: Option<FieldNormReader>,
|
||||
) -> PostingsSerializer<W> {
|
||||
let num_docs = fieldnorm_reader
|
||||
.as_ref()
|
||||
.map(|fieldnorm_reader| fieldnorm_reader.num_docs())
|
||||
.unwrap_or(0u32);
|
||||
PostingsSerializer {
|
||||
output_write: CountingWriter::wrap(write),
|
||||
|
||||
@@ -353,20 +348,25 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
fieldnorm_reader,
|
||||
bm25_weight: None,
|
||||
|
||||
num_docs,
|
||||
avg_fieldnorm,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of documents in the segment currently being serialized.
|
||||
/// This function may return `None` if there are no fieldnorm for that field.
|
||||
fn num_docs_in_segment(&self) -> Option<u32> {
|
||||
self.fieldnorm_reader
|
||||
.as_ref()
|
||||
.map(|reader| reader.num_docs())
|
||||
}
|
||||
|
||||
pub fn new_term(&mut self, term_doc_freq: u32) {
|
||||
if self.mode.has_freq() && self.num_docs > 0 {
|
||||
let bm25_weight = BM25Weight::for_one_term(
|
||||
term_doc_freq as u64,
|
||||
self.num_docs as u64,
|
||||
self.avg_fieldnorm,
|
||||
);
|
||||
self.bm25_weight = Some(bm25_weight);
|
||||
if self.mode.has_freq() {
|
||||
return;
|
||||
}
|
||||
self.bm25_weight = self.num_docs_in_segment().map(|num_docs| {
|
||||
BM25Weight::for_one_term(term_doc_freq as u64, num_docs as u64, self.avg_fieldnorm)
|
||||
});
|
||||
}
|
||||
|
||||
fn write_block(&mut self) {
|
||||
|
||||
@@ -92,6 +92,17 @@ impl TermQuery {
|
||||
searcher: &Searcher,
|
||||
scoring_enabled: bool,
|
||||
) -> crate::Result<TermWeight> {
|
||||
let field_entry = searcher
|
||||
.schema()
|
||||
.get_field_entry(self.term.field());
|
||||
if !field_entry.is_indexed() {
|
||||
let error_msg = format!("Field {:?} is not indexed.", field_entry.name());
|
||||
return Err(crate::TantivyError::SchemaError(error_msg));
|
||||
}
|
||||
let has_fieldnorms = searcher
|
||||
.schema()
|
||||
.get_field_entry(self.term.field())
|
||||
.has_fieldnorms();
|
||||
let term = self.term.clone();
|
||||
let bm25_weight = BM25Weight::for_terms(searcher, &[term])?;
|
||||
let index_record_option = if scoring_enabled {
|
||||
@@ -103,6 +114,7 @@ impl TermQuery {
|
||||
self.term.clone(),
|
||||
index_record_option,
|
||||
bm25_weight,
|
||||
has_fieldnorms,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::term_scorer::TermScorer;
|
||||
use crate::core::SegmentReader;
|
||||
use crate::docset::DocSet;
|
||||
use crate::fieldnorm::FieldNormReader;
|
||||
use crate::postings::SegmentPostings;
|
||||
use crate::query::bm25::BM25Weight;
|
||||
use crate::query::explanation::does_not_match;
|
||||
@@ -15,6 +16,7 @@ pub struct TermWeight {
|
||||
term: Term,
|
||||
index_record_option: IndexRecordOption,
|
||||
similarity_weight: BM25Weight,
|
||||
has_fieldnorms: bool,
|
||||
}
|
||||
|
||||
impl Weight for TermWeight {
|
||||
@@ -87,11 +89,13 @@ impl TermWeight {
|
||||
term: Term,
|
||||
index_record_option: IndexRecordOption,
|
||||
similarity_weight: BM25Weight,
|
||||
has_fieldnorms: bool,
|
||||
) -> TermWeight {
|
||||
TermWeight {
|
||||
term,
|
||||
index_record_option,
|
||||
similarity_weight,
|
||||
has_fieldnorms,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,7 +106,11 @@ impl TermWeight {
|
||||
) -> crate::Result<TermScorer> {
|
||||
let field = self.term.field();
|
||||
let inverted_index = reader.inverted_index(field)?;
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field)?;
|
||||
let fieldnorm_reader = if self.has_fieldnorms {
|
||||
reader.get_fieldnorms_reader(field)?
|
||||
} else {
|
||||
FieldNormReader::const_fieldnorm_id(1u8, reader.num_docs())
|
||||
};
|
||||
let similarity_weight = self.similarity_weight.boost_by(boost);
|
||||
let postings_opt: Option<SegmentPostings> =
|
||||
inverted_index.read_postings(&self.term, self.index_record_option)?;
|
||||
|
||||
@@ -112,6 +112,21 @@ impl FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_fieldnorms(&self) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::Str(ref options) => options
|
||||
.get_indexing_options()
|
||||
.map(|options| options.fieldnorms())
|
||||
.unwrap_or(false),
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Date(ref options) => options.index_option().has_fieldnorms(),
|
||||
FieldType::HierarchicalFacet => false,
|
||||
FieldType::Bytes(ref _options) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true iff the field is a int (signed or unsigned) fast field
|
||||
pub fn is_fast(&self) -> bool {
|
||||
match self.field_type {
|
||||
@@ -272,7 +287,8 @@ impl<'de> Deserialize<'de> for FieldEntry {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::schema::TEXT;
|
||||
use crate::schema::{Schema, STRING, TEXT};
|
||||
use crate::Index;
|
||||
use serde_json;
|
||||
|
||||
#[test]
|
||||
@@ -291,7 +307,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "position",
|
||||
"tokenizer": "default"
|
||||
"tokenizer": "default",
|
||||
"fieldnorms": true
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
@@ -309,4 +326,19 @@ mod tests {
|
||||
_ => panic!("expected FieldType::Str"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fieldnorms() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text = schema_builder.add_text_field("text", STRING);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(text=>"abc"));
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let err = searcher.segment_reader(0u32).get_fieldnorms_reader(text);
|
||||
assert!(matches!(err, Err(crate::TantivyError::SchemaError(_))));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,10 +14,50 @@ pub enum Cardinality {
|
||||
MultiValues,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum IntOptionIndex {
|
||||
#[serde(rename = "no_index")]
|
||||
NoIndex,
|
||||
#[serde(rename = "index_no_fieldnorms")]
|
||||
IndexNoFieldnorms,
|
||||
#[serde(rename = "index_with_fieldnorms")]
|
||||
IndexWithFieldnorms,
|
||||
}
|
||||
|
||||
impl BitOr<IntOptionIndex> for IntOptionIndex {
|
||||
type Output = IntOptionIndex;
|
||||
|
||||
fn bitor(self, other: IntOptionIndex) -> IntOptionIndex {
|
||||
match (self, other) {
|
||||
(_, Self::IndexWithFieldnorms) | (Self::IndexWithFieldnorms, _) => {
|
||||
Self::IndexWithFieldnorms
|
||||
}
|
||||
(_, Self::IndexNoFieldnorms) | (Self::IndexNoFieldnorms, _) => Self::IndexNoFieldnorms,
|
||||
(Self::NoIndex, Self::NoIndex) => Self::NoIndex,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntOptionIndex {
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match *self {
|
||||
Self::NoIndex => false,
|
||||
Self::IndexNoFieldnorms | Self::IndexWithFieldnorms => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_fieldnorms(&self) -> bool {
|
||||
match *self {
|
||||
Self::NoIndex | Self::IndexNoFieldnorms => false,
|
||||
Self::IndexWithFieldnorms => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Define how an u64, i64, of f64 field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct IntOptions {
|
||||
indexed: bool,
|
||||
indexed: IntOptionIndex,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
fast: Option<Cardinality>,
|
||||
stored: bool,
|
||||
@@ -31,7 +71,7 @@ impl IntOptions {
|
||||
|
||||
/// Returns true iff the value is indexed.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
self.indexed
|
||||
self.indexed.is_indexed()
|
||||
}
|
||||
|
||||
/// Returns true iff the value is a fast field.
|
||||
@@ -48,12 +88,21 @@ impl IntOptions {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn index_option(&self) -> &IntOptionIndex {
|
||||
&self.indexed
|
||||
}
|
||||
|
||||
pub fn set_indexed(mut self) -> IntOptions {
|
||||
self.indexed = IntOptionIndex::IndexWithFieldnorms;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field as indexed.
|
||||
///
|
||||
/// Setting an integer as indexed will generate
|
||||
/// a posting list for each value taken by the integer.
|
||||
pub fn set_indexed(mut self) -> IntOptions {
|
||||
self.indexed = true;
|
||||
pub fn set_index_option(mut self, int_option_index: IntOptionIndex) -> IntOptions {
|
||||
self.indexed = int_option_index;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -80,7 +129,7 @@ impl IntOptions {
|
||||
impl Default for IntOptions {
|
||||
fn default() -> IntOptions {
|
||||
IntOptions {
|
||||
indexed: false,
|
||||
indexed: IntOptionIndex::NoIndex,
|
||||
stored: false,
|
||||
fast: None,
|
||||
}
|
||||
@@ -96,7 +145,7 @@ impl From<()> for IntOptions {
|
||||
impl From<FastFlag> for IntOptions {
|
||||
fn from(_: FastFlag) -> Self {
|
||||
IntOptions {
|
||||
indexed: false,
|
||||
indexed: IntOptionIndex::NoIndex,
|
||||
stored: false,
|
||||
fast: Some(Cardinality::SingleValue),
|
||||
}
|
||||
@@ -106,7 +155,7 @@ impl From<FastFlag> for IntOptions {
|
||||
impl From<StoredFlag> for IntOptions {
|
||||
fn from(_: StoredFlag) -> Self {
|
||||
IntOptions {
|
||||
indexed: false,
|
||||
indexed: IntOptionIndex::NoIndex,
|
||||
stored: true,
|
||||
fast: None,
|
||||
}
|
||||
@@ -116,7 +165,7 @@ impl From<StoredFlag> for IntOptions {
|
||||
impl From<IndexedFlag> for IntOptions {
|
||||
fn from(_: IndexedFlag) -> Self {
|
||||
IntOptions {
|
||||
indexed: true,
|
||||
indexed: IntOptionIndex::IndexWithFieldnorms,
|
||||
stored: false,
|
||||
fast: None,
|
||||
}
|
||||
|
||||
@@ -231,6 +231,10 @@ impl Schema {
|
||||
&self.0.fields[field.field_id() as usize]
|
||||
}
|
||||
|
||||
pub fn num_fields(&self) -> usize {
|
||||
self.0.fields.len()
|
||||
}
|
||||
|
||||
/// Return the field name for a given `Field`.
|
||||
pub fn get_field_name(&self, field: Field) -> &str {
|
||||
self.get_field_entry(field).name()
|
||||
@@ -444,7 +448,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "position",
|
||||
"tokenizer": "default"
|
||||
"tokenizer": "default",
|
||||
"fieldnorms": true
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
@@ -455,7 +460,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "basic",
|
||||
"tokenizer": "raw"
|
||||
"tokenizer": "raw",
|
||||
"fieldnorms": false
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
@@ -464,7 +470,7 @@ mod tests {
|
||||
"name": "count",
|
||||
"type": "u64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"indexed": "no_index",
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
@@ -473,7 +479,7 @@ mod tests {
|
||||
"name": "popularity",
|
||||
"type": "i64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"indexed": "no_index",
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
@@ -482,7 +488,7 @@ mod tests {
|
||||
"name": "score",
|
||||
"type": "f64",
|
||||
"options": {
|
||||
"indexed": true,
|
||||
"indexed": "index_with_fieldnorms",
|
||||
"fast": "single",
|
||||
"stored": false
|
||||
}
|
||||
@@ -747,7 +753,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "position",
|
||||
"tokenizer": "default"
|
||||
"tokenizer": "default",
|
||||
"fieldnorms": true
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
@@ -756,7 +763,7 @@ mod tests {
|
||||
"name": "popularity",
|
||||
"type": "i64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"indexed": "no_index",
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
@@ -777,7 +784,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "basic",
|
||||
"tokenizer": "raw"
|
||||
"tokenizer": "raw",
|
||||
"fieldnorms": false
|
||||
},
|
||||
"stored": true
|
||||
}
|
||||
@@ -786,7 +794,7 @@ mod tests {
|
||||
"name": "_timestamp",
|
||||
"type": "date",
|
||||
"options": {
|
||||
"indexed": true,
|
||||
"indexed": "index_with_fieldnorms",
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
@@ -797,7 +805,8 @@ mod tests {
|
||||
"options": {
|
||||
"indexing": {
|
||||
"record": "position",
|
||||
"tokenizer": "default"
|
||||
"tokenizer": "default",
|
||||
"fieldnorms": true
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
@@ -806,7 +815,7 @@ mod tests {
|
||||
"name": "popularity",
|
||||
"type": "i64",
|
||||
"options": {
|
||||
"indexed": false,
|
||||
"indexed": "no_index",
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ impl Default for TextOptions {
|
||||
pub struct TextFieldIndexing {
|
||||
record: IndexRecordOption,
|
||||
tokenizer: Cow<'static, str>,
|
||||
fieldnorms: bool,
|
||||
}
|
||||
|
||||
impl Default for TextFieldIndexing {
|
||||
@@ -62,6 +63,7 @@ impl Default for TextFieldIndexing {
|
||||
TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("default"),
|
||||
record: IndexRecordOption::Basic,
|
||||
fieldnorms: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -78,6 +80,15 @@ impl TextFieldIndexing {
|
||||
&self.tokenizer
|
||||
}
|
||||
|
||||
pub fn set_fieldnorms(mut self, fieldnorms: bool) -> TextFieldIndexing {
|
||||
self.fieldnorms = fieldnorms;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn fieldnorms(&self) -> bool {
|
||||
self.fieldnorms
|
||||
}
|
||||
|
||||
/// Sets which information should be indexed with the tokens.
|
||||
///
|
||||
/// See [IndexRecordOption](./enum.IndexRecordOption.html) for more detail.
|
||||
@@ -99,6 +110,7 @@ pub const STRING: TextOptions = TextOptions {
|
||||
indexing: Some(TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("raw"),
|
||||
record: IndexRecordOption::Basic,
|
||||
fieldnorms: false,
|
||||
}),
|
||||
stored: false,
|
||||
};
|
||||
@@ -108,6 +120,7 @@ pub const TEXT: TextOptions = TextOptions {
|
||||
indexing: Some(TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("default"),
|
||||
record: IndexRecordOption::WithFreqsAndPositions,
|
||||
fieldnorms: true,
|
||||
}),
|
||||
stored: false,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user