mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-22 18:19:58 +00:00
@@ -11,9 +11,10 @@ use columnar::Column;
|
||||
// ---
|
||||
// Importing tantivy...
|
||||
use tantivy::collector::{Collector, SegmentCollector};
|
||||
use tantivy::index::SegmentReader;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
|
||||
use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
|
||||
use tantivy::{doc, Index, IndexWriter, Score};
|
||||
|
||||
#[derive(Default)]
|
||||
struct Stats {
|
||||
|
||||
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let opts = DateOptions::from(INDEXED)
|
||||
.set_stored()
|
||||
.set_fast()
|
||||
.set_precision(tantivy::DateTimePrecision::Seconds);
|
||||
.set_precision(tantivy::schema::DateTimePrecision::Seconds);
|
||||
// Add `occurred_at` date field type
|
||||
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
|
||||
let event_type = schema_builder.add_text_field("event", STRING | STORED);
|
||||
|
||||
@@ -7,10 +7,11 @@
|
||||
// the list of documents containing a term, getting
|
||||
// its term frequency, and accessing its positions.
|
||||
|
||||
use tantivy::postings::Postings;
|
||||
// ---
|
||||
// Importing tantivy...
|
||||
use tantivy::schema::*;
|
||||
use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
|
||||
use tantivy::{doc, DocSet, Index, IndexWriter, TERMINATED};
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
// We first create a schema for the sake of the
|
||||
|
||||
@@ -3,10 +3,11 @@ use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, RwLock, Weak};
|
||||
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::index::SegmentId;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::{Schema, FAST, TEXT};
|
||||
use tantivy::{
|
||||
doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
|
||||
doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration,
|
||||
SegmentReader, Warmer,
|
||||
};
|
||||
|
||||
|
||||
@@ -17,7 +17,8 @@ use super::metric::{
|
||||
use super::segment_agg_result::AggregationLimits;
|
||||
use super::VecWithNames;
|
||||
use crate::aggregation::{f64_to_fastfield_u64, Key};
|
||||
use crate::{SegmentOrdinal, SegmentReader};
|
||||
use crate::index::SegmentReader;
|
||||
use crate::SegmentOrdinal;
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct AggregationsWithAccessor {
|
||||
|
||||
@@ -8,7 +8,8 @@ use super::segment_agg_result::{
|
||||
};
|
||||
use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
|
||||
use crate::collector::{Collector, SegmentCollector};
|
||||
use crate::{DocId, SegmentOrdinal, SegmentReader, TantivyError};
|
||||
use crate::index::SegmentReader;
|
||||
use crate::{DocId, SegmentOrdinal, TantivyError};
|
||||
|
||||
/// The default max bucket count, before the aggregation fails.
|
||||
pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
|
||||
|
||||
@@ -4,7 +4,8 @@ use std::marker::PhantomData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::top_score_collector::TopNComputer;
|
||||
use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
|
||||
use crate::index::SegmentReader;
|
||||
use crate::{DocAddress, DocId, SegmentOrdinal};
|
||||
|
||||
/// Contains a feature (field, score, etc.) of a document along with the document address.
|
||||
///
|
||||
|
||||
@@ -4,13 +4,13 @@ use std::{fmt, io};
|
||||
|
||||
use crate::collector::Collector;
|
||||
use crate::core::Executor;
|
||||
use crate::index::SegmentReader;
|
||||
use crate::index::{SegmentId, SegmentReader};
|
||||
use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
|
||||
use crate::schema::document::DocumentDeserialize;
|
||||
use crate::schema::{Schema, Term};
|
||||
use crate::space_usage::SearcherSpaceUsage;
|
||||
use crate::store::{CacheStats, StoreReader};
|
||||
use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
|
||||
use crate::{DocAddress, Index, Opstamp, TrackedObject};
|
||||
|
||||
/// Identifies the searcher generation accessed by a [`Searcher`].
|
||||
///
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
use crate::collector::Count;
|
||||
use crate::directory::{RamDirectory, WatchCallback};
|
||||
use crate::index::SegmentId;
|
||||
use crate::indexer::{LogMergePolicy, NoMergePolicy};
|
||||
use crate::postings::Postings;
|
||||
use crate::query::TermQuery;
|
||||
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
|
||||
use crate::tokenizer::TokenizerManager;
|
||||
use crate::{
|
||||
Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
|
||||
ReloadPolicy, SegmentId, TantivyDocument, Term,
|
||||
Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy,
|
||||
TantivyDocument, Term,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -80,7 +80,7 @@ mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use columnar::StrColumn;
|
||||
use common::{ByteCount, HasLen, TerminatingWrite};
|
||||
use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::rngs::StdRng;
|
||||
@@ -88,14 +88,15 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||
use crate::index::SegmentId;
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{
|
||||
Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, TantivyDocument,
|
||||
TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
|
||||
DateOptions, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder,
|
||||
TantivyDocument, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
|
||||
use crate::{DateOptions, DateTimePrecision, Index, IndexWriter, SegmentId, SegmentReader};
|
||||
use crate::{Index, IndexWriter, SegmentReader};
|
||||
|
||||
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use std::io;
|
||||
|
||||
use columnar::{ColumnarWriter, NumericalValue};
|
||||
use common::JsonPathWriter;
|
||||
use common::{DateTimePrecision, JsonPathWriter};
|
||||
use tokenizer_api::Token;
|
||||
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
|
||||
use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
|
||||
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
||||
use crate::{DateTimePrecision, DocId, TantivyError};
|
||||
use crate::{DocId, TantivyError};
|
||||
|
||||
/// Only index JSON down to a depth of 20.
|
||||
/// This is mostly to guard us from a stack overflow triggered by malicious input.
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
//! # Index Module
|
||||
//!
|
||||
//! The `index` module in Tantivy contains core components to read and write indexes.
|
||||
//!
|
||||
//! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.
|
||||
|
||||
@@ -246,8 +246,9 @@ impl DeleteCursor {
|
||||
mod tests {
|
||||
|
||||
use super::{DeleteOperation, DeleteQueue};
|
||||
use crate::index::SegmentReader;
|
||||
use crate::query::{Explanation, Scorer, Weight};
|
||||
use crate::{DocId, Score, SegmentReader};
|
||||
use crate::{DocId, Score};
|
||||
|
||||
struct DummyWeight;
|
||||
impl Weight for DummyWeight {
|
||||
|
||||
@@ -144,9 +144,9 @@ mod tests {
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use super::*;
|
||||
use crate::index::SegmentMetaInventory;
|
||||
use crate::index::{SegmentId, SegmentMetaInventory};
|
||||
use crate::schema;
|
||||
use crate::schema::INDEXED;
|
||||
use crate::{schema, SegmentId};
|
||||
|
||||
static INVENTORY: Lazy<SegmentMetaInventory> = Lazy::new(SegmentMetaInventory::default);
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Deref;
|
||||
|
||||
use crate::{Inventory, Opstamp, SegmentId, TrackedObject};
|
||||
use crate::index::SegmentId;
|
||||
use crate::{Inventory, Opstamp, TrackedObject};
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct MergeOperationInventory(Inventory<InnerMergeOperation>);
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
||||
use crate::index::{Segment, SegmentReader};
|
||||
use crate::index::{Segment, SegmentComponent, SegmentReader};
|
||||
use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
|
||||
use crate::indexer::SegmentSerializer;
|
||||
use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
|
||||
@@ -21,8 +21,7 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
|
||||
use crate::store::StoreWriter;
|
||||
use crate::termdict::{TermMerger, TermOrdinal};
|
||||
use crate::{
|
||||
DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
|
||||
SegmentComponent, SegmentOrdinal,
|
||||
DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, SegmentOrdinal,
|
||||
};
|
||||
|
||||
/// Segment's max doc must be `< MAX_DOC_LIMIT`.
|
||||
@@ -794,7 +793,7 @@ mod tests {
|
||||
BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
|
||||
};
|
||||
use crate::collector::{Count, FacetCollector};
|
||||
use crate::index::Index;
|
||||
use crate::index::{Index, SegmentId};
|
||||
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
|
||||
use crate::schema::document::Value;
|
||||
use crate::schema::{
|
||||
@@ -804,7 +803,7 @@ mod tests {
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{
|
||||
assert_nearly_equals, schema, DateTime, DocAddress, DocId, DocSet, IndexSettings,
|
||||
IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
|
||||
IndexSortByField, IndexWriter, Order, Searcher,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -3,6 +3,7 @@ mod tests {
|
||||
use crate::collector::TopDocs;
|
||||
use crate::fastfield::AliveBitSet;
|
||||
use crate::index::Index;
|
||||
use crate::postings::Postings;
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::document::Value;
|
||||
use crate::schema::{
|
||||
@@ -10,8 +11,8 @@ mod tests {
|
||||
TextFieldIndexing, TextOptions,
|
||||
};
|
||||
use crate::{
|
||||
DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, Postings,
|
||||
TantivyDocument, Term,
|
||||
DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, TantivyDocument,
|
||||
Term,
|
||||
};
|
||||
|
||||
fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
|
||||
|
||||
@@ -7,7 +7,7 @@ use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
|
||||
use super::operation::AddOperation;
|
||||
use crate::fastfield::FastFieldsWriter;
|
||||
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
|
||||
use crate::index::Segment;
|
||||
use crate::index::{Segment, SegmentComponent};
|
||||
use crate::indexer::segment_serializer::SegmentSerializer;
|
||||
use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
|
||||
use crate::postings::{
|
||||
@@ -18,7 +18,7 @@ use crate::schema::document::{Document, Value};
|
||||
use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED};
|
||||
use crate::store::{StoreReader, StoreWriter};
|
||||
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
|
||||
use crate::{DocId, Opstamp, SegmentComponent, TantivyError};
|
||||
use crate::{DocId, Opstamp, TantivyError};
|
||||
|
||||
/// Computes the initial size of the hash table.
|
||||
///
|
||||
@@ -498,7 +498,7 @@ mod tests {
|
||||
use crate::collector::{Count, TopDocs};
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::postings::{Postings, TermInfo};
|
||||
use crate::query::{PhraseQuery, QueryParser};
|
||||
use crate::schema::document::Value;
|
||||
use crate::schema::{
|
||||
@@ -510,8 +510,8 @@ mod tests {
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::tokenizer::{PreTokenizedString, Token};
|
||||
use crate::{
|
||||
DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, Postings, TantivyDocument,
|
||||
Term, TERMINATED,
|
||||
DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, TantivyDocument, Term,
|
||||
TERMINATED,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
||||
20
src/lib.rs
20
src/lib.rs
@@ -216,11 +216,6 @@ use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
|
||||
#[deprecated(
|
||||
since = "0.22.0",
|
||||
note = "Will be removed in tantivy 0.23. Use export from snippet module instead"
|
||||
)]
|
||||
pub use self::snippet::{Snippet, SnippetGenerator};
|
||||
#[doc(hidden)]
|
||||
pub use crate::core::json_utils;
|
||||
pub use crate::core::{Executor, Searcher, SearcherGeneration};
|
||||
@@ -228,16 +223,10 @@ pub use crate::directory::Directory;
|
||||
#[allow(deprecated)] // Remove with index sorting
|
||||
pub use crate::index::{
|
||||
Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
|
||||
Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader,
|
||||
Segment, SegmentMeta, SegmentReader,
|
||||
};
|
||||
#[deprecated(
|
||||
since = "0.22.0",
|
||||
note = "Will be removed in tantivy 0.23. Use export from indexer module instead"
|
||||
)]
|
||||
pub use crate::indexer::PreparedCommit;
|
||||
pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
|
||||
pub use crate::postings::Postings;
|
||||
pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
|
||||
pub use crate::schema::{Document, TantivyDocument, Term};
|
||||
|
||||
/// Index format version.
|
||||
const INDEX_FORMAT_VERSION: u32 = 6;
|
||||
@@ -392,9 +381,10 @@ pub mod tests {
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::index::SegmentReader;
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::postings::Postings;
|
||||
use crate::query::BooleanQuery;
|
||||
use crate::schema::*;
|
||||
use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
|
||||
use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};
|
||||
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
@@ -1109,9 +1099,9 @@ pub mod tests {
|
||||
#[test]
|
||||
fn test_update_via_delete_insert() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
use crate::index::SegmentId;
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::AllQuery;
|
||||
use crate::SegmentId;
|
||||
|
||||
const DOC_COUNT: u64 = 2u64;
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ pub struct InvertedIndexSerializer {
|
||||
impl InvertedIndexSerializer {
|
||||
/// Open a new `InvertedIndexSerializer` for the given segment
|
||||
pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
|
||||
use crate::SegmentComponent::{Positions, Postings, Terms};
|
||||
use crate::index::SegmentComponent::{Positions, Postings, Terms};
|
||||
let inv_index_serializer = InvertedIndexSerializer {
|
||||
terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
|
||||
postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use super::Scorer;
|
||||
use crate::docset::TERMINATED;
|
||||
use crate::index::SegmentReader;
|
||||
use crate::query::explanation::does_not_match;
|
||||
use crate::query::{EnableScoring, Explanation, Query, Weight};
|
||||
use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
|
||||
use crate::{DocId, DocSet, Score, Searcher};
|
||||
|
||||
/// `EmptyQuery` is a dummy `Query` in which no document matches.
|
||||
///
|
||||
|
||||
@@ -127,6 +127,7 @@ impl Scorer for TermScorer {
|
||||
mod tests {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use crate::index::SegmentId;
|
||||
use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
@@ -134,8 +135,7 @@ mod tests {
|
||||
use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery};
|
||||
use crate::schema::{IndexRecordOption, Schema, TEXT};
|
||||
use crate::{
|
||||
assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, SegmentId, Term,
|
||||
TERMINATED,
|
||||
assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, Term, TERMINATED,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -179,9 +179,10 @@ mod tests {
|
||||
use super::Warmer;
|
||||
use crate::core::searcher::SearcherGeneration;
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::index::SegmentId;
|
||||
use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
|
||||
use crate::schema::{Schema, INDEXED};
|
||||
use crate::{Index, IndexSettings, ReloadPolicy, Searcher, SegmentId};
|
||||
use crate::{Index, IndexSettings, ReloadPolicy, Searcher};
|
||||
|
||||
#[derive(Default)]
|
||||
struct TestWarmer {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::ops::BitOr;
|
||||
|
||||
use crate::schema::{NumericOptions, TextOptions};
|
||||
use crate::DateOptions;
|
||||
use crate::schema::{DateOptions, NumericOptions, TextOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StoredFlag;
|
||||
|
||||
@@ -12,8 +12,8 @@ use std::collections::HashMap;
|
||||
use common::ByteCount;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::index::SegmentComponent;
|
||||
use crate::schema::Field;
|
||||
use crate::SegmentComponent;
|
||||
|
||||
/// Enum containing any of the possible space usage results for segment components.
|
||||
pub enum ComponentSpaceUsage {
|
||||
@@ -115,7 +115,7 @@ impl SegmentSpaceUsage {
|
||||
/// Use the components directly if this is somehow in performance critical code.
|
||||
pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
|
||||
use self::ComponentSpaceUsage::*;
|
||||
use crate::SegmentComponent::*;
|
||||
use crate::index::SegmentComponent::*;
|
||||
match component {
|
||||
Postings => PerField(self.postings().clone()),
|
||||
Positions => PerField(self.positions().clone()),
|
||||
|
||||
Reference in New Issue
Block a user