mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
* fix windows build (#1) * Fix windows build * Add doc traits * Add field value iter * Add value and serialization * Adjust order * Fix bug * Correct type * Fix generic bugs * Reformat code * Add generic to index writer which I forgot about * Fix missing generics on single segment writer * Add missing type export * Add default methods for convenience * Cleanup * Fix more-like-this query to use standard types * Update API and fix tests * Add doc traits * Add field value iter * Add value and serialization * Adjust order * Fix bug * Correct type * Rebase main and fix conflicts * Reformat code * Merge upstream * Fix missing generics on single segment writer * Add missing type export * Add default methods for convenience * Cleanup * Fix more-like-this query to use standard types * Update API and fix tests * Add tokenizer improvements from previous commits * Add tokenizer improvements from previous commits * Reformat * Fix unit tests * Fix unit tests * Use enum in changes * Stage changes * Add new deserializer logic * Add serializer integration * Add document deserializer * Implement new (de)serialization api for existing types * Fix bugs and type errors * Add helper implementations * Fix errors * Reformat code * Add unit tests and some code organisation for serialization * Add unit tests to deserializer * Add some small docs * Add support for deserializing serde values * Reformat * Fix typo * Fix typo * Change repr of facet * Remove unused trait methods * Add child value type * Resolve comments * Fix build * Fix more build errors * Fix more build errors * Fix the tests I missed * Fix examples * fix numerical order, serialize PreTok Str * fix coverage * rename Document to TantivyDocument, rename DocumentAccess to Document add Binary prefix to binary de/serialization * fix coverage --------- Co-authored-by: Pascal Seitz <pascal.seitz@gmail.com>
108 lines
3.9 KiB
Rust
108 lines
3.9 KiB
Rust
// # Json field example
|
|
//
|
|
// This example shows how the json field can be used
|
|
// to make tantivy partially schemaless by setting it as
|
|
// default query parser field.
|
|
|
|
use tantivy::collector::{Count, TopDocs};
|
|
use tantivy::query::QueryParser;
|
|
use tantivy::schema::{Schema, FAST, STORED, STRING, TEXT};
|
|
use tantivy::{Index, IndexWriter, TantivyDocument};
|
|
|
|
fn main() -> tantivy::Result<()> {
|
|
// # Defining the schema
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_date_field("timestamp", FAST | STORED);
|
|
let event_type = schema_builder.add_text_field("event_type", STRING | STORED);
|
|
let attributes = schema_builder.add_json_field("attributes", STORED | TEXT);
|
|
let schema = schema_builder.build();
|
|
|
|
// # Indexing documents
|
|
let index = Index::create_in_ram(schema.clone());
|
|
|
|
let mut index_writer: IndexWriter = index.writer(50_000_000)?;
|
|
let doc = TantivyDocument::parse_json(
|
|
&schema,
|
|
r#"{
|
|
"timestamp": "2022-02-22T23:20:50.53Z",
|
|
"event_type": "click",
|
|
"attributes": {
|
|
"target": "submit-button",
|
|
"cart": {"product_id": 103},
|
|
"description": "the best vacuum cleaner ever"
|
|
}
|
|
}"#,
|
|
)?;
|
|
index_writer.add_document(doc)?;
|
|
let doc = TantivyDocument::parse_json(
|
|
&schema,
|
|
r#"{
|
|
"timestamp": "2022-02-22T23:20:51.53Z",
|
|
"event_type": "click",
|
|
"attributes": {
|
|
"target": "submit-button",
|
|
"cart": {"product_id": 133},
|
|
"description": "das keyboard",
|
|
"event_type": "holiday-sale"
|
|
}
|
|
}"#,
|
|
)?;
|
|
index_writer.add_document(doc)?;
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
|
|
// # Default fields: event_type and attributes
|
|
// By setting attributes as a default field it allows omitting attributes itself, e.g. "target",
|
|
// instead of "attributes.target"
|
|
let query_parser = QueryParser::for_index(&index, vec![event_type, attributes]);
|
|
{
|
|
let query = query_parser.parse_query("target:submit-button")?;
|
|
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(count_docs.len(), 2);
|
|
}
|
|
{
|
|
let query = query_parser.parse_query("target:submit")?;
|
|
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(count_docs.len(), 2);
|
|
}
|
|
{
|
|
let query = query_parser.parse_query("cart.product_id:103")?;
|
|
let count_docs = searcher.search(&*query, &Count)?;
|
|
assert_eq!(count_docs, 1);
|
|
}
|
|
{
|
|
let query = query_parser.parse_query("click AND cart.product_id:133")?;
|
|
let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(hits.len(), 1);
|
|
}
|
|
{
|
|
// The sub-fields in the json field marked as default field still need to be explicitly
|
|
// addressed
|
|
let query = query_parser.parse_query("click AND 133")?;
|
|
let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(hits.len(), 0);
|
|
}
|
|
{
|
|
// Default json fields are ignored if they collide with the schema
|
|
let query = query_parser.parse_query("event_type:holiday-sale")?;
|
|
let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(hits.len(), 0);
|
|
}
|
|
// # Query via full attribute path
|
|
{
|
|
// This only searches in our schema's `event_type` field
|
|
let query = query_parser.parse_query("event_type:click")?;
|
|
let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(hits.len(), 2);
|
|
}
|
|
{
|
|
// Default json fields can still be accessed by full path
|
|
let query = query_parser.parse_query("attributes.event_type:holiday-sale")?;
|
|
let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
|
|
assert_eq!(hits.len(), 1);
|
|
}
|
|
Ok(())
|
|
}
|