Update nom requirement from 7 to 8

Updates the requirements on [nom](https://github.com/rust-bakery/nom) to permit the latest version. - [Changelog](https://github.com/rust-bakery/nom/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-bakery/nom/compare/7.0.0...8.0.0) --- updated-dependencies: - dependency-name: nom dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>
2026-01-04 16:22:55 +00:00 · 2025-01-27 20:21:30 +00:00
12 changed files with 94 additions and 151 deletions
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -94,14 +94,14 @@ impl BitUnpacker {

    #[inline]
    pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
-        let addr_in_bits = idx as usize * self.num_bits as usize;
-        let addr = addr_in_bits >> 3;
+        let addr_in_bits = idx * self.num_bits;
+        let addr = (addr_in_bits >> 3) as usize;
        if addr + 8 > data.len() {
            if self.num_bits == 0 {
                return 0;
            }
            let bit_shift = addr_in_bits & 7;
-            return self.get_slow_path(addr, bit_shift as u32, data);
+            return self.get_slow_path(addr, bit_shift, data);
        }
        let bit_shift = addr_in_bits & 7;
        let bytes: [u8; 8] = (&data[addr..addr + 8]).try_into().unwrap();
--- a/columnar/columnar-cli-inspect/Cargo.toml
+++ b/columnar/columnar-cli-inspect/Cargo.toml
@@ -1,18 +0,0 @@
-[package]
-name = "tantivy-columnar-inspect"
-version = "0.1.0"
-edition = "2021"
-license = "MIT"
-
-[dependencies]
-tantivy = {path="../..", package="tantivy"}
-columnar = {path="../", package="tantivy-columnar"}
-common = {path="../../common", package="tantivy-common"}
-
-[workspace]
-members = []
-
-[profile.release]
-debug = true
-#debug-assertions = true
-#overflow-checks = true
--- a/columnar/columnar-cli-inspect/src/main.rs
+++ b/columnar/columnar-cli-inspect/src/main.rs
@@ -1,54 +0,0 @@
-use columnar::ColumnarReader;
-use common::file_slice::{FileSlice, WrapFile};
-use std::io;
-use std::path::Path;
-use tantivy::directory::footer::Footer;
-
-fn main() -> io::Result<()> {
-    println!("Opens a columnar file written by tantivy and validates it.");
-    let path = std::env::args().nth(1).unwrap();
-
-    let path = Path::new(&path);
-    println!("Reading {:?}", path);
-    let _reader = open_and_validate_columnar(path.to_str().unwrap())?;
-
-    Ok(())
-}
-
-pub fn validate_columnar_reader(reader: &ColumnarReader) {
-    let num_rows = reader.num_rows();
-    println!("num_rows: {}", num_rows);
-    let columns = reader.list_columns().unwrap();
-    println!("num columns: {:?}", columns.len());
-    for (col_name, dynamic_column_handle) in columns {
-        let col = dynamic_column_handle.open().unwrap();
-        match col {
-            columnar::DynamicColumn::Bool(_)
-            | columnar::DynamicColumn::I64(_)
-            | columnar::DynamicColumn::U64(_)
-            | columnar::DynamicColumn::F64(_)
-            | columnar::DynamicColumn::IpAddr(_)
-            | columnar::DynamicColumn::DateTime(_)
-            | columnar::DynamicColumn::Bytes(_) => {}
-            columnar::DynamicColumn::Str(str_column) => {
-                let num_vals = str_column.ords().values.num_vals();
-                let num_terms_dict = str_column.num_terms() as u64;
-                let max_ord = str_column.ords().values.iter().max().unwrap_or_default();
-                println!("{col_name:35}  num_vals {num_vals:10} \t num_terms_dict {num_terms_dict:8} max_ord: {max_ord:8}",);
-                for ord in str_column.ords().values.iter() {
-                    assert!(ord < num_terms_dict);
-                }
-            }
-        }
-    }
-}
-
-/// Opens a columnar file that was written by tantivy and validates it.
-pub fn open_and_validate_columnar(path: &str) -> io::Result<ColumnarReader> {
-    let wrap_file = WrapFile::new(std::fs::File::open(path)?)?;
-    let slice = FileSlice::new(std::sync::Arc::new(wrap_file));
-    let (_footer, slice) = Footer::extract_footer(slice.clone()).unwrap();
-    let reader = ColumnarReader::open(slice).unwrap();
-    validate_columnar_reader(&reader);
-    Ok(reader)
-}
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -1,6 +1,5 @@
 use std::fs::File;
 use std::ops::{Deref, Range, RangeBounds};
-use std::path::Path;
 use std::sync::Arc;
 use std::{fmt, io};

@@ -178,12 +177,6 @@ fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R)
 }

 impl FileSlice {
-    /// Creates a FileSlice from a path.
-    pub fn open(path: &Path) -> io::Result<FileSlice> {
-        let wrap_file = WrapFile::new(File::open(path)?)?;
-        Ok(FileSlice::new(Arc::new(wrap_file)))
-    }
-
    /// Wraps a FileHandle.
    pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
        let num_bytes = file_handle.len();
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -12,4 +12,4 @@ keywords = ["search", "information", "retrieval"]
 edition = "2021"

 [dependencies]
-nom = "7"
+nom = "8"
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -321,17 +321,7 @@ fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
        UserInputLeaf::Exists {
            field: String::new(),
        },
-        tuple((
-            multispace0,
-            char('*'),
-            peek(alt((
-                value(
-                    "",
-                    satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
-                ),
-                eof,
-            ))),
-        )),
+        tuple((multispace0, char('*'))),
    )(inp)
 }

@@ -341,14 +331,7 @@ fn exists_precond(inp: &str) -> IResult<&str, (), ()> {
        peek(tuple((
            field_name,
            multispace0,
-            char('*'),
-            peek(alt((
-                value(
-                    "",
-                    satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
-                ),
-                eof,
-            ))), // we need to check this isn't a wildcard query
+            char('*'), // when we are here, we know it can't be anything but a exists
        ))),
    )(inp)
    .map_err(|e| e.map(|_| ()))
@@ -1641,19 +1624,13 @@ mod test {

    #[test]
    fn test_exist_query() {
-        test_parse_query_to_ast_helper("a:*", "$exists(\"a\")");
-        test_parse_query_to_ast_helper("a: *", "$exists(\"a\")");
+        test_parse_query_to_ast_helper("a:*", "\"a\":*");
+        test_parse_query_to_ast_helper("a: *", "\"a\":*");
+        // an exist followed by default term being b
+        test_is_parse_err("a:*b", "(*\"a\":* *b)");

-        test_parse_query_to_ast_helper(
-            "(hello AND toto:*) OR happy",
-            "(?(+hello +$exists(\"toto\")) ?happy)",
-        );
-        test_parse_query_to_ast_helper("(a:*)", "$exists(\"a\")");
-
-        // these are term/wildcard query (not a phrase prefix)
+        // this is a term query (not a phrase prefix)
        test_parse_query_to_ast_helper("a:b*", "\"a\":b*");
-        test_parse_query_to_ast_helper("a:*b", "\"a\":*b");
-        test_parse_query_to_ast_helper(r#"a:*def*"#, "\"a\":*def*");
    }

    #[test]
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -101,7 +101,7 @@ impl Debug for UserInputLeaf {
            }
            UserInputLeaf::All => write!(formatter, "*"),
            UserInputLeaf::Exists { field } => {
-                write!(formatter, "$exists(\"{field}\")")
+                write!(formatter, "\"{field}\":*")
            }
        }
    }
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -125,26 +125,9 @@ impl MetricResult {
 }

 /// BucketEntry holds bucket aggregation result types.
-// the order of fields is important to deserialize properly
-// Terms must be first because all Terms are valid Range (we ignore unknown fields)
-// Range and Histogram are always ambiguous, they contain the same 3 required fields, and all else
-// is optional Having Range is usually more useful (contains more fields, missing field from
-// Histogram can be obtained by key.to_string())
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum BucketResult {
-    /// This is the term result
-    Terms {
-        /// The buckets.
-        ///
-        /// See [`TermsAggregation`](super::bucket::TermsAggregation)
-        buckets: Vec<BucketEntry>,
-        /// The number of documents that didn’t make it into to TOP N due to shard_size or size
-        sum_other_doc_count: u64,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        /// The upper bound error for the doc count of each term.
-        doc_count_error_upper_bound: Option<u64>,
-    },
    /// This is the range entry for a bucket, which contains a key, count, from, to, and optionally
    /// sub-aggregations.
    Range {
@@ -161,6 +144,18 @@ pub enum BucketResult {
        /// See [`HistogramAggregation`](super::bucket::HistogramAggregation)
        buckets: BucketEntries<BucketEntry>,
    },
+    /// This is the term result
+    Terms {
+        /// The buckets.
+        ///
+        /// See [`TermsAggregation`](super::bucket::TermsAggregation)
+        buckets: Vec<BucketEntry>,
+        /// The number of documents that didn’t make it into to TOP N due to shard_size or size
+        sum_other_doc_count: u64,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        /// The upper bound error for the doc count of each term.
+        doc_count_error_upper_bound: Option<u64>,
+    },
 }

 impl BucketResult {
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -34,10 +34,10 @@ use crate::aggregation::*;
 pub struct DateHistogramAggregationReq {
    #[doc(hidden)]
    /// Only for validation
-    pub interval: Option<String>,
+    interval: Option<String>,
    #[doc(hidden)]
    /// Only for validation
-    pub calendar_interval: Option<String>,
+    calendar_interval: Option<String>,
    /// The field to aggregate on.
    pub field: String,
    /// The format to format dates. Unsupported currently.
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -1,9 +1,3 @@
-//! The footer is a small metadata structure that is appended at the end of every file.
-//!
-//! The footer is used to store a checksum of the file content.
-//! The footer also stores the version of the index format.
-//! This version is used to detect incompatibility between the index and the library version.
-
 use std::io;
 use std::io::Write;

@@ -26,22 +20,20 @@ type CrcHashU32 = u32;
 /// A Footer is appended to every file
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Footer {
-    /// The version of the index format
    pub version: Version,
-    /// The crc32 hash of the body
    pub crc: CrcHashU32,
 }

 impl Footer {
-    pub(crate) fn new(crc: CrcHashU32) -> Self {
+    pub fn new(crc: CrcHashU32) -> Self {
        let version = crate::VERSION.clone();
        Footer { version, crc }
    }

-    pub(crate) fn crc(&self) -> CrcHashU32 {
+    pub fn crc(&self) -> CrcHashU32 {
        self.crc
    }
-    pub(crate) fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
+    pub fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
        let mut counting_write = CountingWriter::wrap(&mut write);
        counting_write.write_all(serde_json::to_string(&self)?.as_ref())?;
        let footer_payload_len = counting_write.written_bytes();
@@ -50,7 +42,6 @@ impl Footer {
        Ok(())
    }

-    /// Extracts the tantivy Footer from the file and returns the footer and the rest of the file
    pub fn extract_footer(file: FileSlice) -> io::Result<(Footer, FileSlice)> {
        if file.len() < 4 {
            return Err(io::Error::new(
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -6,7 +6,7 @@ mod mmap_directory;
 mod directory;
 mod directory_lock;
 mod file_watcher;
-pub mod footer;
+mod footer;
 mod managed_directory;
 mod ram_directory;
 mod watch_event_router;
--- a/src/indexer/stamper.rs
+++ b/src/indexer/stamper.rs
@@ -1,19 +1,79 @@
 use std::ops::Range;
-use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::atomic::Ordering;
 use std::sync::Arc;

 use crate::Opstamp;

+#[cfg(not(target_arch = "arm"))]
+mod atomic_impl {
+
+    use std::sync::atomic::{AtomicU64, Ordering};
+
+    use crate::Opstamp;
+
+    #[derive(Default)]
+    pub struct AtomicU64Wrapper(AtomicU64);
+
+    impl AtomicU64Wrapper {
+        pub fn new(first_opstamp: Opstamp) -> AtomicU64Wrapper {
+            AtomicU64Wrapper(AtomicU64::new(first_opstamp))
+        }
+
+        pub fn fetch_add(&self, val: u64, order: Ordering) -> u64 {
+            self.0.fetch_add(val, order)
+        }
+
+        pub fn revert(&self, val: u64, order: Ordering) -> u64 {
+            self.0.store(val, order);
+            val
+        }
+    }
+}
+
+#[cfg(target_arch = "arm")]
+mod atomic_impl {
+
+    /// Under other architecture, we rely on a mutex.
+    use std::sync::atomic::Ordering;
+    use std::sync::RwLock;
+
+    use crate::Opstamp;
+
+    #[derive(Default)]
+    pub struct AtomicU64Wrapper(RwLock<u64>);
+
+    impl AtomicU64Wrapper {
+        pub fn new(first_opstamp: Opstamp) -> AtomicU64Wrapper {
+            AtomicU64Wrapper(RwLock::new(first_opstamp))
+        }
+
+        pub fn fetch_add(&self, incr: u64, _order: Ordering) -> u64 {
+            let mut lock = self.0.write().unwrap();
+            let previous_val = *lock;
+            *lock = previous_val + incr;
+            previous_val
+        }
+
+        pub fn revert(&self, val: u64, _order: Ordering) -> u64 {
+            let mut lock = self.0.write().unwrap();
+            *lock = val;
+            val
+        }
+    }
+}
+
+use self::atomic_impl::AtomicU64Wrapper;
+
 /// Stamper provides Opstamps, which is just an auto-increment id to label
 /// an operation.
 ///
 /// Cloning does not "fork" the stamp generation. The stamper actually wraps an `Arc`.
 #[derive(Clone, Default)]
-pub struct Stamper(Arc<AtomicU64>);
+pub struct Stamper(Arc<AtomicU64Wrapper>);

 impl Stamper {
    pub fn new(first_opstamp: Opstamp) -> Stamper {
-        Stamper(Arc::new(AtomicU64::new(first_opstamp)))
+        Stamper(Arc::new(AtomicU64Wrapper::new(first_opstamp)))
    }

    pub fn stamp(&self) -> Opstamp {
@@ -32,8 +92,7 @@ impl Stamper {

    /// Reverts the stamper to a given `Opstamp` value and returns it
    pub fn revert(&self, to_opstamp: Opstamp) -> Opstamp {
-        self.0.store(to_opstamp, Ordering::SeqCst);
-        to_opstamp
+        self.0.revert(to_opstamp, Ordering::SeqCst)
    }
 }