Merge pull request #2606 from katlim-br/add_serde_serialize

Add serde json serialize to UserInputAst
This commit is contained in:
Paul Masurel
2025-04-03 15:57:03 +02:00
committed by GitHub
6 changed files with 227 additions and 12 deletions

View File

@@ -31,8 +31,8 @@ lz4_flex = { version = "0.11", default-features = false, optional = true }
zstd = { version = "0.13", optional = true, default-features = false }
tempfile = { version = "3.12.0", optional = true }
log = "0.4.16"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.79"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
fs4 = { version = "0.8.0", optional = true }
levenshtein_automata = "0.2.1"
uuid = { version = "1.0.0", features = ["v4", "serde"] }
@@ -60,7 +60,7 @@ bon = "3.3.1"
columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
query-grammar = { version = "0.23.0", path = "./query-grammar", package = "tantivy-query-grammar" }
tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
common = { version = "0.7", path = "./common/", package = "tantivy-common" }
tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-query-grammar"
version = "0.22.0"
version = "0.23.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
@@ -13,3 +13,5 @@ edition = "2021"
[dependencies]
nom = "7"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"

View File

@@ -3,6 +3,7 @@
use std::convert::Infallible;
use nom::{AsChar, IResult, InputLength, InputTakeAtPosition};
use serde::Serialize;
pub(crate) type ErrorList = Vec<LenientErrorInternal>;
pub(crate) type JResult<I, O> = IResult<I, (O, ErrorList), Infallible>;
@@ -15,7 +16,8 @@ pub(crate) struct LenientErrorInternal {
}
/// A recoverable error and the position it happened at
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct LenientError {
pub pos: usize,
pub message: String,
@@ -353,3 +355,21 @@ where
{
move |i: I| l.choice(i.clone()).unwrap_or_else(|| default.parse(i))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lenient_error_serialization() {
let error = LenientError {
pos: 42,
message: "test error message".to_string(),
};
assert_eq!(
serde_json::to_string(&error).unwrap(),
"{\"pos\":42,\"message\":\"test error message\"}"
);
}
}

View File

@@ -1,5 +1,7 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use serde::Serialize;
mod infallible;
mod occur;
mod query_grammar;
@@ -12,6 +14,8 @@ pub use crate::user_input_ast::{
Delimiter, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral,
};
#[derive(Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct Error;
/// Parse a query
@@ -24,3 +28,31 @@ pub fn parse_query(query: &str) -> Result<UserInputAst, Error> {
pub fn parse_query_lenient(query: &str) -> (UserInputAst, Vec<LenientError>) {
parse_to_ast_lenient(query)
}
#[cfg(test)]
mod tests {
use crate::{parse_query, parse_query_lenient};
#[test]
fn test_parse_query_serialization() {
let ast = parse_query("title:hello OR title:x").unwrap();
let json = serde_json::to_string(&ast).unwrap();
assert_eq!(
json,
r#"{"type":"bool","clauses":[["should",{"type":"literal","field_name":"title","phrase":"hello","delimiter":"none","slop":0,"prefix":false}],["should",{"type":"literal","field_name":"title","phrase":"x","delimiter":"none","slop":0,"prefix":false}]]}"#
);
}
#[test]
fn test_parse_query_wrong_query() {
assert!(parse_query("title:").is_err());
}
#[test]
fn test_parse_query_lenient_wrong_query() {
let (_, errors) = parse_query_lenient("title:");
assert!(errors.len() == 1);
let json = serde_json::to_string(&errors).unwrap();
assert_eq!(json, r#"[{"pos":6,"message":"expected word"}]"#);
}
}

View File

@@ -1,9 +1,12 @@
use std::fmt;
use std::fmt::Write;
use serde::Serialize;
/// Defines whether a term in a query must be present,
/// should be present or must not be present.
#[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)]
#[derive(Debug, Clone, Hash, Copy, Eq, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Occur {
/// For a given document to be considered for scoring,
/// at least one of the queries with the Should or the Must

View File

@@ -1,9 +1,13 @@
use std::fmt;
use std::fmt::{Debug, Formatter};
use serde::Serialize;
use crate::Occur;
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize)]
#[serde(tag = "type")]
#[serde(rename_all = "snake_case")]
pub enum UserInputLeaf {
Literal(UserInputLiteral),
All,
@@ -107,14 +111,16 @@ impl Debug for UserInputLeaf {
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Delimiter {
SingleQuotes,
DoubleQuotes,
None,
}
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct UserInputLiteral {
pub field_name: Option<String>,
pub phrase: String,
@@ -152,7 +158,9 @@ impl fmt::Debug for UserInputLiteral {
}
}
#[derive(PartialEq, Debug, Clone)]
#[derive(PartialEq, Debug, Clone, Serialize)]
#[serde(tag = "type", content = "value")]
#[serde(rename_all = "snake_case")]
pub enum UserInputBound {
Inclusive(String),
Exclusive(String),
@@ -187,11 +195,38 @@ impl UserInputBound {
}
}
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize)]
#[serde(into = "UserInputAstSerde")]
pub enum UserInputAst {
Clause(Vec<(Option<Occur>, UserInputAst)>),
Leaf(Box<UserInputLeaf>),
Boost(Box<UserInputAst>, f64),
Leaf(Box<UserInputLeaf>),
}
#[derive(Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum UserInputAstSerde {
Bool {
clauses: Vec<(Option<Occur>, UserInputAst)>,
},
Boost {
underlying: Box<UserInputAst>,
boost: f64,
},
#[serde(untagged)]
Leaf(Box<UserInputLeaf>),
}
impl From<UserInputAst> for UserInputAstSerde {
fn from(ast: UserInputAst) -> Self {
match ast {
UserInputAst::Clause(clause) => UserInputAstSerde::Bool { clauses: clause },
UserInputAst::Boost(underlying, boost) => {
UserInputAstSerde::Boost { underlying, boost }
}
UserInputAst::Leaf(leaf) => UserInputAstSerde::Leaf(leaf),
}
}
}
impl UserInputAst {
@@ -285,3 +320,126 @@ impl fmt::Debug for UserInputAst {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_all_leaf_serialization() {
let ast = UserInputAst::Leaf(Box::new(UserInputLeaf::All));
let json = serde_json::to_string(&ast).unwrap();
assert_eq!(json, r#"{"type":"all"}"#);
}
#[test]
fn test_literal_leaf_serialization() {
let literal = UserInputLiteral {
field_name: Some("title".to_string()),
phrase: "hello".to_string(),
delimiter: Delimiter::None,
slop: 0,
prefix: false,
};
let ast = UserInputAst::Leaf(Box::new(UserInputLeaf::Literal(literal)));
let json = serde_json::to_string(&ast).unwrap();
assert_eq!(
json,
r#"{"type":"literal","field_name":"title","phrase":"hello","delimiter":"none","slop":0,"prefix":false}"#
);
}
#[test]
fn test_range_leaf_serialization() {
let range = UserInputLeaf::Range {
field: Some("price".to_string()),
lower: UserInputBound::Inclusive("10".to_string()),
upper: UserInputBound::Exclusive("100".to_string()),
};
let ast = UserInputAst::Leaf(Box::new(range));
let json = serde_json::to_string(&ast).unwrap();
assert_eq!(
json,
r#"{"type":"range","field":"price","lower":{"type":"inclusive","value":"10"},"upper":{"type":"exclusive","value":"100"}}"#
);
}
#[test]
fn test_range_leaf_unbounded_serialization() {
let range = UserInputLeaf::Range {
field: Some("price".to_string()),
lower: UserInputBound::Inclusive("10".to_string()),
upper: UserInputBound::Unbounded,
};
let ast = UserInputAst::Leaf(Box::new(range));
let json = serde_json::to_string(&ast).unwrap();
assert_eq!(
json,
r#"{"type":"range","field":"price","lower":{"type":"inclusive","value":"10"},"upper":{"type":"unbounded"}}"#
);
}
#[test]
fn test_boost_serialization() {
let inner_ast = UserInputAst::Leaf(Box::new(UserInputLeaf::All));
let boost_ast = UserInputAst::Boost(Box::new(inner_ast), 2.5);
let json = serde_json::to_string(&boost_ast).unwrap();
assert_eq!(
json,
r#"{"type":"boost","underlying":{"type":"all"},"boost":2.5}"#
);
}
#[test]
fn test_boost_serialization2() {
let boost_ast = UserInputAst::Boost(
Box::new(UserInputAst::Clause(vec![
(
Some(Occur::Must),
UserInputAst::Leaf(Box::new(UserInputLeaf::All)),
),
(
Some(Occur::Should),
UserInputAst::Leaf(Box::new(UserInputLeaf::Literal(UserInputLiteral {
field_name: Some("title".to_string()),
phrase: "hello".to_string(),
delimiter: Delimiter::None,
slop: 0,
prefix: false,
}))),
),
])),
2.5,
);
let json = serde_json::to_string(&boost_ast).unwrap();
assert_eq!(
json,
r#"{"type":"boost","underlying":{"type":"bool","clauses":[["must",{"type":"all"}],["should",{"type":"literal","field_name":"title","phrase":"hello","delimiter":"none","slop":0,"prefix":false}]]},"boost":2.5}"#
);
}
#[test]
fn test_clause_serialization() {
let clause = UserInputAst::Clause(vec![
(
Some(Occur::Must),
UserInputAst::Leaf(Box::new(UserInputLeaf::All)),
),
(
Some(Occur::Should),
UserInputAst::Leaf(Box::new(UserInputLeaf::Literal(UserInputLiteral {
field_name: Some("title".to_string()),
phrase: "hello".to_string(),
delimiter: Delimiter::None,
slop: 0,
prefix: false,
}))),
),
]);
let json = serde_json::to_string(&clause).unwrap();
assert_eq!(
json,
r#"{"type":"bool","clauses":[["must",{"type":"all"}],["should",{"type":"literal","field_name":"title","phrase":"hello","delimiter":"none","slop":0,"prefix":false}]]}"#
);
}
}