From fbda511a1a0e7ce494ab70ce91c16ad718bce049 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 20 Apr 2023 11:37:45 +0900 Subject: [PATCH] Making more things public for quickwit. (#2005) --- src/core/json_utils.rs | 5 ++--- src/core/mod.rs | 1 + src/lib.rs | 2 ++ src/query/range_query/range_query.rs | 24 ++++++++++++++++++++++++ src/schema/term.rs | 3 ++- src/tokenizer/simple_tokenizer.rs | 1 + 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index 749045ea0..7f3cb2c8f 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -201,7 +201,7 @@ fn infer_type_from_str(text: &str) -> TextOrDateTime { } // Tries to infer a JSON type from a string. -pub(crate) fn convert_to_fast_value_and_get_term( +pub fn convert_to_fast_value_and_get_term( json_term_writer: &mut JsonTermWriter, phrase: &str, ) -> Option { @@ -405,8 +405,7 @@ impl<'a> JsonTermWriter<'a> { .append_bytes(value.to_be_bytes().as_slice()); } - #[cfg(test)] - pub(crate) fn set_str(&mut self, text: &str) { + pub fn set_str(&mut self, text: &str) { self.close_path_and_set_type(Type::Str); self.term_buffer.append_bytes(text.as_bytes()); } diff --git a/src/core/mod.rs b/src/core/mod.rs index 38976378d..b0b674c2e 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -2,6 +2,7 @@ mod executor; pub mod index; mod index_meta; mod inverted_index_reader; +#[doc(hidden)] pub mod json_utils; pub mod searcher; mod segment; diff --git a/src/lib.rs b/src/lib.rs index ac3c8e2b7..0940aa8b7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,6 +173,8 @@ use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; pub use self::docset::{DocSet, TERMINATED}; +#[doc(hidden)] +pub use crate::core::json_utils; pub use crate::core::{ Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta, diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 83bf6ae8e..61b65f34c 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -1,4 +1,5 @@ use std::io; +use std::net::Ipv6Addr; use std::ops::{Bound, Range}; use common::{BinarySerializable, BitSet}; @@ -196,6 +197,29 @@ impl RangeQuery { } } + /// Create a new `RangeQuery` over a `ip` field. + /// + /// If the field is not of the type `ip`, tantivy + /// will panic when the `Weight` object is created. + pub fn new_ip_bounds( + field: String, + left_bound: Bound, + right_bound: Bound, + ) -> RangeQuery { + let make_term_val = |val: &Ipv6Addr| { + Term::from_field_ip_addr(Field::from_field_id(0), *val) + .value_bytes() + .to_owned() + }; + RangeQuery { + field, + value_type: Type::IpAddr, + left_bound: map_bound(&left_bound, &make_term_val), + right_bound: map_bound(&right_bound, &make_term_val), + limit: None, + } + } + /// Create a new `RangeQuery` over a `u64` field. /// /// If the field is not of the type `u64`, tantivy diff --git a/src/schema/term.rs b/src/schema/term.rs index 747cb434e..da5d236e9 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -30,7 +30,8 @@ where B: AsRef<[u8]>; const TERM_METADATA_LENGTH: usize = 5; impl Term { - pub(crate) fn with_capacity(capacity: usize) -> Term { + /// Create a new Term with a buffer with a given capacity. + pub fn with_capacity(capacity: usize) -> Term { let mut data = Vec::with_capacity(TERM_METADATA_LENGTH + capacity); data.resize(TERM_METADATA_LENGTH, 0u8); Term(data) diff --git a/src/tokenizer/simple_tokenizer.rs b/src/tokenizer/simple_tokenizer.rs index 2b9163b23..dc9a3b126 100644 --- a/src/tokenizer/simple_tokenizer.rs +++ b/src/tokenizer/simple_tokenizer.rs @@ -6,6 +6,7 @@ use super::{Token, TokenStream, Tokenizer}; #[derive(Clone)] pub struct SimpleTokenizer; +/// TokenStream produced by the `SimpleTokenizer`. pub struct SimpleTokenStream<'a> { text: &'a str, chars: CharIndices<'a>,