mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
Compare commits
1 Commits
hotfix-108
...
0.15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c3493cf15 |
24
.github/workflows/test.yml
vendored
24
.github/workflows/test.yml
vendored
@@ -1,24 +0,0 @@
|
|||||||
name: Rust
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
|
|
||||||
env:
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- name: Build
|
|
||||||
run: cargo build --verbose --workspace
|
|
||||||
- name: Run tests
|
|
||||||
run: cargo test --verbose --workspace
|
|
||||||
- name: Check Formatting
|
|
||||||
run: cargo fmt --all -- --check
|
|
||||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,11 +1,3 @@
|
|||||||
Tantivy 0.15.2
|
|
||||||
========================
|
|
||||||
- Major bugfix. DocStore still panics when a deleted doc is at the beginning of a block. (@appaquet) #1088
|
|
||||||
|
|
||||||
Tantivy 0.15.1
|
|
||||||
=========================
|
|
||||||
- Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077
|
|
||||||
|
|
||||||
Tantivy 0.15.0
|
Tantivy 0.15.0
|
||||||
=========================
|
=========================
|
||||||
- API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
|
- API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
|
||||||
@@ -16,7 +8,7 @@ Tantivy 0.15.0
|
|||||||
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
||||||
- Date field support for range queries (@rihardsk) #516
|
- Date field support for range queries (@rihardsk) #516
|
||||||
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
|
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
|
||||||
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@fulmicoton)
|
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@pmasurel)
|
||||||
- Simplified positions index format (@fulmicoton) #1022
|
- Simplified positions index format (@fulmicoton) #1022
|
||||||
- Moved bitpacking to bitpacker subcrate and add BlockedBitpacker, which bitpacks blocks of 128 elements (@PSeitz) #1030
|
- Moved bitpacking to bitpacker subcrate and add BlockedBitpacker, which bitpacks blocks of 128 elements (@PSeitz) #1030
|
||||||
- Added support for more-like-this query in tantivy (@evanxg852000) #1011
|
- Added support for more-like-this query in tantivy (@evanxg852000) #1011
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.15.2"
|
version = "0.15.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -33,7 +33,7 @@ levenshtein_automata = "0.2"
|
|||||||
uuid = { version = "0.8.2", features = ["v4", "serde"] }
|
uuid = { version = "0.8.2", features = ["v4", "serde"] }
|
||||||
crossbeam = "0.8"
|
crossbeam = "0.8"
|
||||||
futures = { version = "0.3.15", features = ["thread-pool"] }
|
futures = { version = "0.3.15", features = ["thread-pool"] }
|
||||||
tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
|
tantivy-query-grammar = { version="0.14.0", path="./query-grammar" }
|
||||||
tantivy-bitpacker = { version="0.1", path="./bitpacker" }
|
tantivy-bitpacker = { version="0.1", path="./bitpacker" }
|
||||||
stable_deref_trait = "1.2"
|
stable_deref_trait = "1.2"
|
||||||
rust-stemmers = "1.2"
|
rust-stemmers = "1.2"
|
||||||
|
|||||||
@@ -2,13 +2,6 @@
|
|||||||
name = "tantivy-bitpacker"
|
name = "tantivy-bitpacker"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
|
||||||
license = "MIT"
|
|
||||||
categories = []
|
|
||||||
description = """Tantivy-sub crate: bitpacking"""
|
|
||||||
repository = "https://github.com/tantivy-search/tantivy"
|
|
||||||
keywords = []
|
|
||||||
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ fn main() -> tantivy::Result<()> {
|
|||||||
|
|
||||||
let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
|
let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
|
||||||
|
|
||||||
// Oops our frankenstein doc seems misspelled
|
// Oops our frankenstein doc seems mispelled
|
||||||
let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
|
let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
schema.to_json(&frankenstein_doc_misspelled),
|
schema.to_json(&frankenstein_doc_misspelled),
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-query-grammar"
|
name = "tantivy-query-grammar"
|
||||||
version = "0.15.0"
|
version = "0.14.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ pub mod tests {
|
|||||||
use futures::executor::block_on;
|
use futures::executor::block_on;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::fastfield::DeleteBitSet;
|
|
||||||
use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
|
use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
|
||||||
use crate::schema::{Document, TextOptions};
|
use crate::schema::{Document, TextOptions};
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -108,61 +107,15 @@ pub mod tests {
|
|||||||
schema
|
schema
|
||||||
}
|
}
|
||||||
|
|
||||||
const NUM_DOCS: usize = 1_000;
|
|
||||||
#[test]
|
|
||||||
fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
|
|
||||||
// this will cover deletion of the first element in a checkpoint
|
|
||||||
let deleted_docids = (200..300).collect::<Vec<_>>();
|
|
||||||
let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
|
|
||||||
|
|
||||||
let path = Path::new("store");
|
|
||||||
let directory = RamDirectory::create();
|
|
||||||
let store_wrt = directory.open_write(path)?;
|
|
||||||
let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, Compressor::Lz4);
|
|
||||||
let field_title = schema.get_field("title").unwrap();
|
|
||||||
let store_file = directory.open_read(path)?;
|
|
||||||
let store = StoreReader::open(store_file)?;
|
|
||||||
for i in 0..NUM_DOCS as u32 {
|
|
||||||
assert_eq!(
|
|
||||||
*store
|
|
||||||
.get(i)?
|
|
||||||
.get_first(field_title)
|
|
||||||
.unwrap()
|
|
||||||
.text()
|
|
||||||
.unwrap(),
|
|
||||||
format!("Doc {}", i)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (_, doc) in store.iter(Some(&delete_bitset)).enumerate() {
|
|
||||||
let doc = doc?;
|
|
||||||
let title_content = doc.get_first(field_title).unwrap().text().unwrap();
|
|
||||||
if !title_content.starts_with("Doc ") {
|
|
||||||
panic!("unexpected title_content {}", title_content);
|
|
||||||
}
|
|
||||||
|
|
||||||
let id = title_content
|
|
||||||
.strip_prefix("Doc ")
|
|
||||||
.unwrap()
|
|
||||||
.parse::<u32>()
|
|
||||||
.unwrap();
|
|
||||||
if delete_bitset.is_deleted(id) {
|
|
||||||
panic!("unexpected deleted document {}", id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn test_store(compressor: Compressor) -> crate::Result<()> {
|
fn test_store(compressor: Compressor) -> crate::Result<()> {
|
||||||
let path = Path::new("store");
|
let path = Path::new("store");
|
||||||
let directory = RamDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let store_wrt = directory.open_write(path)?;
|
let store_wrt = directory.open_write(path)?;
|
||||||
let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, compressor);
|
let schema = write_lorem_ipsum_store(store_wrt, 1_000, compressor);
|
||||||
let field_title = schema.get_field("title").unwrap();
|
let field_title = schema.get_field("title").unwrap();
|
||||||
let store_file = directory.open_read(path)?;
|
let store_file = directory.open_read(path)?;
|
||||||
let store = StoreReader::open(store_file)?;
|
let store = StoreReader::open(store_file)?;
|
||||||
for i in 0..NUM_DOCS as u32 {
|
for i in 0..1_000 {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
*store
|
*store
|
||||||
.get(i)?
|
.get(i)?
|
||||||
|
|||||||
@@ -166,13 +166,17 @@ impl StoreReader {
|
|||||||
.map(|checkpoint| self.read_block(&checkpoint).map_err(|e| e.kind())); // map error in order to enable cloning
|
.map(|checkpoint| self.read_block(&checkpoint).map_err(|e| e.kind())); // map error in order to enable cloning
|
||||||
let mut block_start_pos = 0;
|
let mut block_start_pos = 0;
|
||||||
let mut num_skipped = 0;
|
let mut num_skipped = 0;
|
||||||
let mut reset_block_pos = false;
|
|
||||||
(0..last_docid)
|
(0..last_docid)
|
||||||
.filter_map(move |doc_id| {
|
.filter_map(move |doc_id| {
|
||||||
// filter_map is only used to resolve lifetime issues between the two closures on
|
// filter_map is only used to resolve lifetime issues between the two closures on
|
||||||
// the outer variables
|
// the outer variables
|
||||||
|
let alive = delete_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
|
||||||
|
if !alive {
|
||||||
|
// we keep the number of skipped documents to move forward in the map block
|
||||||
|
num_skipped += 1;
|
||||||
|
}
|
||||||
// check move to next checkpoint
|
// check move to next checkpoint
|
||||||
|
let mut reset_block_pos = false;
|
||||||
if doc_id >= curr_checkpoint.as_ref().unwrap().doc_range.end {
|
if doc_id >= curr_checkpoint.as_ref().unwrap().doc_range.end {
|
||||||
curr_checkpoint = checkpoint_block_iter.next();
|
curr_checkpoint = checkpoint_block_iter.next();
|
||||||
curr_block = curr_checkpoint
|
curr_block = curr_checkpoint
|
||||||
@@ -182,16 +186,12 @@ impl StoreReader {
|
|||||||
num_skipped = 0;
|
num_skipped = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let alive = delete_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
|
|
||||||
if alive {
|
if alive {
|
||||||
let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
|
let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
|
||||||
// the map block will move over the num_skipped, so we reset to 0
|
// the map block will move over the num_skipped, so we reset to 0
|
||||||
num_skipped = 0;
|
num_skipped = 0;
|
||||||
reset_block_pos = false;
|
|
||||||
ret
|
ret
|
||||||
} else {
|
} else {
|
||||||
// we keep the number of skipped documents to move forward in the map block
|
|
||||||
num_skipped += 1;
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user