Compare commits

...

13 Commits
0.15 ... 0.15.1

Author SHA1 Message Date
PSeitz
5209238c1b use github actions for tests 2021-06-14 12:51:46 +02:00
Paul Masurel
7ef25ec400 Bump to 0.15.1 to publish bugfix 2021-06-14 18:45:38 +09:00
PSeitz
221e7cbb55 Merge pull request #1076 from appaquet/fix/store-reader-iterator
Fix panic in store reader raw document iterator during segment merge
2021-06-14 11:22:58 +02:00
Pascal Seitz
873ac1a3ac cleanup import 2021-06-14 10:31:45 +02:00
Pascal Seitz
ebe55a7ae1 refactor test, fixes #1077
replace test with smaller test in doc_store
2021-06-14 10:10:05 +02:00
Bernard Swart
9f32d40b27 Misspelling of misspelled was fixed (#1078) 2021-06-14 16:29:12 +09:00
Andre-Philippe Paquet
8ae10a930a fix formatting 2021-06-13 17:23:40 -04:00
Andre-Philippe Paquet
473a346814 remove debugging 2021-06-13 16:49:44 -04:00
Andre-Philippe Paquet
3a8a0fe79a add fuzzy merge test 2021-06-13 16:42:24 -04:00
Andre-Philippe Paquet
511dc8f87f fix store reader iterator 2021-06-13 16:00:13 -04:00
Paul Masurel
3901295329 Bumped query-grammar version 2021-06-07 10:00:14 +09:00
Paul Masurel
f5918c6c74 Completed bitpacker README 2021-06-07 09:57:17 +09:00
Paul Masurel
abe6b4baec Bumped tantivy version to 0.15 2021-06-07 09:52:48 +09:00
8 changed files with 82 additions and 8 deletions

24
.github/workflows/test.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
name: Rust
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Build
run: cargo build --verbose --workspace
- name: Run tests
run: cargo test --verbose --workspace
- name: Check Formatting
run: cargo fmt --all -- --check

View File

@@ -1,3 +1,7 @@
Tantivy 0.15.1
=========================
- Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077
Tantivy 0.15.0
=========================
- API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
@@ -8,7 +12,7 @@ Tantivy 0.15.0
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
- Date field support for range queries (@rihardsk) #516
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@pmasurel)
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@fulmicoton)
- Simplified positions index format (@fulmicoton) #1022
- Moved bitpacking to bitpacker subcrate and add BlockedBitpacker, which bitpacks blocks of 128 elements (@PSeitz) #1030
- Added support for more-like-this query in tantivy (@evanxg852000) #1011

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy"
version = "0.14.0"
version = "0.15.1"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
@@ -33,7 +33,7 @@ levenshtein_automata = "0.2"
uuid = { version = "0.8.2", features = ["v4", "serde"] }
crossbeam = "0.8"
futures = { version = "0.3.15", features = ["thread-pool"] }
tantivy-query-grammar = { version="0.14.0", path="./query-grammar" }
tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
tantivy-bitpacker = { version="0.1", path="./bitpacker" }
stable_deref_trait = "1.2"
rust-stemmers = "1.2"

View File

@@ -2,6 +2,13 @@
name = "tantivy-bitpacker"
version = "0.1.0"
edition = "2018"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = []
description = """Tantivy-sub crate: bitpacking"""
repository = "https://github.com/tantivy-search/tantivy"
keywords = []
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -90,7 +90,7 @@ fn main() -> tantivy::Result<()> {
let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
// Oops our frankenstein doc seems mispelled
// Oops our frankenstein doc seems misspelled
let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
assert_eq!(
schema.to_json(&frankenstein_doc_misspelled),

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-query-grammar"
version = "0.14.0"
version = "0.15.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]

View File

@@ -57,6 +57,7 @@ pub mod tests {
use futures::executor::block_on;
use super::*;
use crate::fastfield::DeleteBitSet;
use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
use crate::schema::{Document, TextOptions};
use crate::{
@@ -107,15 +108,51 @@ pub mod tests {
schema
}
const NUM_DOCS: usize = 1_000;
#[test]
fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
// this will cover deletion of the first element in a checkpoint
let deleted_docids = (200..300).collect::<Vec<_>>();
let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
let path = Path::new("store");
let directory = RamDirectory::create();
let store_wrt = directory.open_write(path)?;
let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, Compressor::Lz4);
let field_title = schema.get_field("title").unwrap();
let store_file = directory.open_read(path)?;
let store = StoreReader::open(store_file)?;
for i in 0..NUM_DOCS as u32 {
assert_eq!(
*store
.get(i)?
.get_first(field_title)
.unwrap()
.text()
.unwrap(),
format!("Doc {}", i)
);
}
for (_, doc) in store.iter(Some(&delete_bitset)).enumerate() {
let doc = doc?;
let title_content = doc.get_first(field_title).unwrap().text().unwrap();
if !title_content.starts_with("Doc ") {
panic!("unexpected title_content {}", title_content);
}
}
Ok(())
}
fn test_store(compressor: Compressor) -> crate::Result<()> {
let path = Path::new("store");
let directory = RamDirectory::create();
let store_wrt = directory.open_write(path)?;
let schema = write_lorem_ipsum_store(store_wrt, 1_000, compressor);
let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, compressor);
let field_title = schema.get_field("title").unwrap();
let store_file = directory.open_read(path)?;
let store = StoreReader::open(store_file)?;
for i in 0..1_000 {
for i in 0..NUM_DOCS as u32 {
assert_eq!(
*store
.get(i)?

View File

@@ -166,6 +166,7 @@ impl StoreReader {
.map(|checkpoint| self.read_block(&checkpoint).map_err(|e| e.kind())); // map error in order to enable cloning
let mut block_start_pos = 0;
let mut num_skipped = 0;
let mut reset_block_pos = false;
(0..last_docid)
.filter_map(move |doc_id| {
// filter_map is only used to resolve lifetime issues between the two closures on
@@ -175,8 +176,8 @@ impl StoreReader {
// we keep the number of skipped documents to move forward in the map block
num_skipped += 1;
}
// check move to next checkpoint
let mut reset_block_pos = false;
if doc_id >= curr_checkpoint.as_ref().unwrap().doc_range.end {
curr_checkpoint = checkpoint_block_iter.next();
curr_block = curr_checkpoint
@@ -190,6 +191,7 @@ impl StoreReader {
let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
// the map block will move over the num_skipped, so we reset to 0
num_skipped = 0;
reset_block_pos = false;
ret
} else {
None