From 548eb6ebc7bf146055e27c632ebe6574e017e2ed Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 2 Apr 2026 15:30:56 +0800 Subject: [PATCH] Remove Index::tokenizer_for_field, fix BlockSegmentPostings doc comment tokenizer_for_field is a duplicate of SearcherContext::tokenizer_for_field. --- src/core/tests.rs | 14 ----------- src/index/index.rs | 34 ++------------------------ src/postings/block_segment_postings.rs | 2 +- 3 files changed, 3 insertions(+), 47 deletions(-) diff --git a/src/core/tests.rs b/src/core/tests.rs index d97e65884..9c6e8b500 100644 --- a/src/core/tests.rs +++ b/src/core/tests.rs @@ -11,20 +11,6 @@ use crate::{ ReloadPolicy, Searcher, SearcherContext, TantivyDocument, Term, }; -#[test] -fn test_indexer_for_field() { - let mut schema_builder = Schema::builder(); - let num_likes_field = schema_builder.add_u64_field("num_likes", INDEXED); - let body_field = schema_builder.add_text_field("body", TEXT); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); - assert!(index.tokenizer_for_field(body_field).is_ok()); - assert_eq!( - format!("{:?}", index.tokenizer_for_field(num_likes_field).err()), - "Some(SchemaError(\"\\\"num_likes\\\" is not a text field.\"))" - ); -} - #[test] fn test_set_tokenizer_manager() { let mut schema_builder = Schema::builder(); diff --git a/src/index/index.rs b/src/index/index.rs index 462c0b8fb..2f6c3e19d 100644 --- a/src/index/index.rs +++ b/src/index/index.rs @@ -23,8 +23,8 @@ use crate::indexer::segment_updater::save_metas; use crate::indexer::{IndexWriter, SingleSegmentIndexWriter}; use crate::reader::{IndexReader, IndexReaderBuilder}; use crate::schema::document::Document; -use crate::schema::{Field, FieldType, Schema}; -use crate::tokenizer::{TextAnalyzer, TokenizerManager}; +use crate::schema::Schema; +use crate::tokenizer::TokenizerManager; fn load_metas( directory: &dyn Directory, @@ -417,36 +417,6 @@ impl Index { &self.fast_field_tokenizers } - /// Get the tokenizer associated with a specific field. - pub fn tokenizer_for_field(&self, field: Field) -> crate::Result { - let field_entry = self.schema.get_field_entry(field); - let field_type = field_entry.field_type(); - let tokenizer_manager: &TokenizerManager = self.tokenizers(); - let indexing_options_opt = match field_type { - FieldType::JsonObject(options) => options.get_text_indexing_options(), - FieldType::Str(options) => options.get_indexing_options(), - _ => { - return Err(TantivyError::SchemaError(format!( - "{:?} is not a text field.", - field_entry.name() - ))) - } - }; - let indexing_options = indexing_options_opt.ok_or_else(|| { - TantivyError::InvalidArgument(format!( - "No indexing options set for field {field_entry:?}" - )) - })?; - - tokenizer_manager - .get(indexing_options.tokenizer()) - .ok_or_else(|| { - TantivyError::InvalidArgument(format!( - "No Tokenizer found for field {field_entry:?}" - )) - }) - } - /// Create a default [`IndexReader`] for the given index. /// /// See [`Index.reader_builder()`]. diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs index f527d46c0..f25b183c1 100644 --- a/src/postings/block_segment_postings.rs +++ b/src/postings/block_segment_postings.rs @@ -79,7 +79,7 @@ fn split_into_skips_and_postings( } impl BlockSegmentPostings { - /// Opens a `StandardPostingsReader`. + /// Opens a `BlockSegmentPostings`. /// `doc_freq` is the number of documents in the posting list. /// `record_option` represents the amount of data available according to the schema. /// `requested_option` is the amount of data requested by the user.