use github actions for tests

Bump to 0.15.1 to publish bugfix
Merge pull request #1076 from appaquet/fix/store-reader-iterator
2026-01-11 11:32:54 +00:00 · 2021-06-14 12:51:46 +02:00 · 2021-06-14 18:45:38 +09:00 · 2021-06-14 11:22:58 +02:00 · 2021-06-14 10:31:45 +02:00 · 2021-06-14 10:10:05 +02:00
8 changed files with 82 additions and 8 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,24 @@
+name: Rust
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Build
+      run: cargo build --verbose --workspace
+    - name: Run tests
+      run: cargo test --verbose --workspace
+    - name: Check Formatting
+      run: cargo fmt --all -- --check
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+Tantivy 0.15.1
+=========================
+- Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077
+
 Tantivy 0.15.0
 =========================
 - API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
@@ -8,7 +12,7 @@ Tantivy 0.15.0
 - Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
 - Date field support for range queries (@rihardsk) #516
 - Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@pmasurel)
+- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@fulmicoton)
 - Simplified positions index format (@fulmicoton) #1022
 - Moved bitpacking to bitpacker subcrate and add BlockedBitpacker, which bitpacks blocks of 128 elements (@PSeitz) #1030
 - Added support for more-like-this query in tantivy (@evanxg852000) #1011
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.14.0"
+version = "0.15.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -33,7 +33,7 @@ levenshtein_automata = "0.2"
 uuid = { version = "0.8.2", features = ["v4", "serde"] }
 crossbeam = "0.8"
 futures = { version = "0.3.15", features = ["thread-pool"] }
-tantivy-query-grammar = { version="0.14.0", path="./query-grammar" }
+tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
 tantivy-bitpacker = { version="0.1", path="./bitpacker" }
 stable_deref_trait = "1.2"
 rust-stemmers = "1.2"
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -2,6 +2,13 @@
 name = "tantivy-bitpacker"
 version = "0.1.0"
 edition = "2018"
+authors = ["Paul Masurel <paul.masurel@gmail.com>"]
+license = "MIT"
+categories = []
+description = """Tantivy-sub crate: bitpacking"""
+repository = "https://github.com/tantivy-search/tantivy"
+keywords = []
+

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -90,7 +90,7 @@ fn main() -> tantivy::Result<()> {

    let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");

-    // Oops our frankenstein doc seems mispelled
+    // Oops our frankenstein doc seems misspelled
    let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
    assert_eq!(
        schema.to_json(&frankenstein_doc_misspelled),
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.14.0"
+version = "0.15.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -57,6 +57,7 @@ pub mod tests {
    use futures::executor::block_on;

    use super::*;
+    use crate::fastfield::DeleteBitSet;
    use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
    use crate::schema::{Document, TextOptions};
    use crate::{
@@ -107,15 +108,51 @@ pub mod tests {
        schema
    }

+    const NUM_DOCS: usize = 1_000;
+    #[test]
+    fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
+        // this will cover deletion of the first element in a checkpoint
+        let deleted_docids = (200..300).collect::<Vec<_>>();
+        let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
+
+        let path = Path::new("store");
+        let directory = RamDirectory::create();
+        let store_wrt = directory.open_write(path)?;
+        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, Compressor::Lz4);
+        let field_title = schema.get_field("title").unwrap();
+        let store_file = directory.open_read(path)?;
+        let store = StoreReader::open(store_file)?;
+        for i in 0..NUM_DOCS as u32 {
+            assert_eq!(
+                *store
+                    .get(i)?
+                    .get_first(field_title)
+                    .unwrap()
+                    .text()
+                    .unwrap(),
+                format!("Doc {}", i)
+            );
+        }
+        for (_, doc) in store.iter(Some(&delete_bitset)).enumerate() {
+            let doc = doc?;
+            let title_content = doc.get_first(field_title).unwrap().text().unwrap();
+            if !title_content.starts_with("Doc ") {
+                panic!("unexpected title_content {}", title_content);
+            }
+        }
+
+        Ok(())
+    }
+
    fn test_store(compressor: Compressor) -> crate::Result<()> {
        let path = Path::new("store");
        let directory = RamDirectory::create();
        let store_wrt = directory.open_write(path)?;
-        let schema = write_lorem_ipsum_store(store_wrt, 1_000, compressor);
+        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, compressor);
        let field_title = schema.get_field("title").unwrap();
        let store_file = directory.open_read(path)?;
        let store = StoreReader::open(store_file)?;
-        for i in 0..1_000 {
+        for i in 0..NUM_DOCS as u32 {
            assert_eq!(
                *store
                    .get(i)?
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -166,6 +166,7 @@ impl StoreReader {
            .map(|checkpoint| self.read_block(&checkpoint).map_err(|e| e.kind())); // map error in order to enable cloning
        let mut block_start_pos = 0;
        let mut num_skipped = 0;
+        let mut reset_block_pos = false;
        (0..last_docid)
            .filter_map(move |doc_id| {
                // filter_map is only used to resolve lifetime issues between the two closures on
@@ -175,8 +176,8 @@ impl StoreReader {
                    // we keep the number of skipped documents to move forward in the map block
                    num_skipped += 1;
                }
+
                // check move to next checkpoint
-                let mut reset_block_pos = false;
                if doc_id >= curr_checkpoint.as_ref().unwrap().doc_range.end {
                    curr_checkpoint = checkpoint_block_iter.next();
                    curr_block = curr_checkpoint
@@ -190,6 +191,7 @@ impl StoreReader {
                    let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
                    // the map block will move over the num_skipped, so we reset to 0
                    num_skipped = 0;
+                    reset_block_pos = false;
                    ret
                } else {
                    None
Author	SHA1	Message	Date
PSeitz	5209238c1b	use github actions for tests	2021-06-14 12:51:46 +02:00
Paul Masurel	7ef25ec400	Bump to 0.15.1 to publish bugfix	2021-06-14 18:45:38 +09:00
PSeitz	221e7cbb55	Merge pull request #1076 from appaquet/fix/store-reader-iterator Fix panic in store reader raw document iterator during segment merge	2021-06-14 11:22:58 +02:00
Pascal Seitz	873ac1a3ac	cleanup import	2021-06-14 10:31:45 +02:00
Pascal Seitz	ebe55a7ae1	refactor test, fixes #1077 replace test with smaller test in doc_store	2021-06-14 10:10:05 +02:00
Bernard Swart	9f32d40b27	Misspelling of misspelled was fixed (#1078 )	2021-06-14 16:29:12 +09:00
Andre-Philippe Paquet	8ae10a930a	fix formatting	2021-06-13 17:23:40 -04:00
Andre-Philippe Paquet	473a346814	remove debugging	2021-06-13 16:49:44 -04:00
Andre-Philippe Paquet	3a8a0fe79a	add fuzzy merge test	2021-06-13 16:42:24 -04:00
Andre-Philippe Paquet	511dc8f87f	fix store reader iterator	2021-06-13 16:00:13 -04:00
Paul Masurel	3901295329	Bumped query-grammar version	2021-06-07 10:00:14 +09:00
Paul Masurel	f5918c6c74	Completed bitpacker README	2021-06-07 09:57:17 +09:00
Paul Masurel	abe6b4baec	Bumped tantivy version to 0.15	2021-06-07 09:52:48 +09:00