mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
blop
This commit is contained in:
@@ -10,3 +10,4 @@ memmap = "0.2.3"
|
||||
lazy_static = "0.1.*"
|
||||
regex = "0.1"
|
||||
fst = "0.1.26"
|
||||
rand = "0.3.13"
|
||||
|
||||
@@ -9,9 +9,19 @@ use std::rc::Rc;
|
||||
use std::ops::Deref;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::Arc;
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SegmentId(String);
|
||||
pub struct SegmentId(pub String);
|
||||
|
||||
pub fn generate_segment_name() -> SegmentId {
|
||||
static CHARS: &'static [u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let random_name: String = (0..8)
|
||||
.map(|_| thread_rng().choose(CHARS).unwrap().clone() as char)
|
||||
.collect();
|
||||
SegmentId( String::from("_") + &random_name)
|
||||
}
|
||||
|
||||
pub trait Dir {
|
||||
fn get_data(&self, segment_id: &SegmentId, component: SegmentComponent) -> Result<SharedMmapMemory, io::Error>; // {
|
||||
@@ -30,6 +40,10 @@ impl Directory {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_segment(&self,) -> Segment {
|
||||
self.segment(&generate_segment_name())
|
||||
}
|
||||
|
||||
fn from<T: Dir + 'static>(directory: T) -> Directory {
|
||||
Directory {
|
||||
dir: Rc::new(directory),
|
||||
|
||||
@@ -31,32 +31,25 @@ impl PostingsWriter for SimplePostingsWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl Flushable for SimplePostingsWriter {
|
||||
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
|
||||
let num_docs = self.doc_ids.len() as u64;
|
||||
writer.write_u64::<NativeEndian>(num_docs);
|
||||
for &doc_id in self.doc_ids.iter() {
|
||||
writer.write_u64::<NativeEndian>(doc_id as u64);
|
||||
}
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
|
||||
struct FieldWriter {
|
||||
postings: Vec<SimplePostingsWriter>,
|
||||
term_index: BTreeMap<String, usize>,
|
||||
}
|
||||
//
|
||||
// impl Flushable for FieldWriter {
|
||||
// fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
|
||||
// let num_docs = self.doc_ids.len() as u64;
|
||||
// writer.write_u64::<NativeEndian>(num_docs);
|
||||
// for &doc_id in self.doc_ids.iter() {
|
||||
// writer.write_u64::<NativeEndian>(doc_id as u64);
|
||||
// }
|
||||
// Ok(1)
|
||||
// }
|
||||
// }
|
||||
|
||||
impl Flushable for SimplePostingsWriter {
|
||||
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
|
||||
let mut num_bytes_written = 0;
|
||||
let num_docs = self.doc_ids.len() as u64;
|
||||
writer.write_u64::<NativeEndian>(num_docs);
|
||||
num_bytes_written += 8;
|
||||
for &doc_id in self.doc_ids.iter() {
|
||||
writer.write_u64::<NativeEndian>(doc_id as u64);
|
||||
num_bytes_written += 8;
|
||||
}
|
||||
Ok(num_bytes_written)
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldWriter {
|
||||
pub fn new() -> FieldWriter {
|
||||
@@ -120,6 +113,7 @@ impl IndexWriter {
|
||||
}
|
||||
|
||||
pub fn sync(&mut self,) -> Result<(), io::Error> {
|
||||
self.directory.new_segment();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
extern crate byteorder;
|
||||
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
|
||||
pub mod core;
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
extern crate tantivy;
|
||||
extern crate itertools;
|
||||
extern crate byteorder;
|
||||
extern crate regex;
|
||||
|
||||
use tantivy::core::DocId;
|
||||
use tantivy::core::postings::{VecPostings, intersection};
|
||||
use tantivy::core::postings::Postings;
|
||||
use tantivy::core::analyzer::tokenize;
|
||||
use tantivy::core::writer::IndexWriter;
|
||||
use tantivy::core::directory::Directory;
|
||||
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
|
||||
use tantivy::core::schema::{Field, Document};
|
||||
use tantivy::core::reader::IndexReader;
|
||||
use tantivy::core::writer::SimplePostingsWriter;
|
||||
use tantivy::core::postings::PostingsWriter;
|
||||
use tantivy::core::global::Flushable;
|
||||
use std::io::{ BufWriter, Write };
|
||||
use std::io::{ BufWriter, Write};
|
||||
use regex::Regex;
|
||||
use std::convert::From;
|
||||
|
||||
#[test]
|
||||
@@ -58,3 +60,10 @@ fn test_postings_writer() {
|
||||
postings_writer.flush(&mut buffer);
|
||||
assert_eq!(buffer.len(), 5 * 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_segment() {
|
||||
let SegmentId(segment_name) = generate_segment_name();
|
||||
let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap();
|
||||
assert!(segment_ptn.is_match(&segment_name));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user