Skip to main content

sql/parsers/create_parser/
json.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use snafu::{ResultExt, ensure};
16use sqlparser::ast::{DataType, ExactNumberInfo, Expr, ObjectName, UnaryOperator};
17use sqlparser::dialect::keywords::Keyword;
18use sqlparser::parser::Parser;
19use sqlparser::tokenizer::Token;
20
21use crate::ast::Ident;
22use crate::error::{InvalidSqlSnafu, Result, SyntaxSnafu};
23use crate::parsers::create_parser::{INVERTED, SKIPPING};
24use crate::statements::create::JsonTypeHint;
25use crate::statements::transform::type_alias::get_type_by_alias;
26
27const JSON2_TYPE_NAME: &str = "JSON2";
28
29pub(super) fn parse_json2_type_and_hints(
30    parser: &mut Parser<'_>,
31) -> Result<Option<(DataType, Vec<JsonTypeHint>)>> {
32    let token = parser.peek_token();
33    let Token::Word(word) = &token.token else {
34        return Ok(None);
35    };
36
37    if !word.value.eq_ignore_ascii_case(JSON2_TYPE_NAME) || word.quote_style.is_some() {
38        return Ok(None);
39    }
40
41    parser.next_token();
42    let data_type = DataType::Custom(ObjectName::from(vec![Ident::new(JSON2_TYPE_NAME)]), vec![]);
43    let type_hints = if parser.consume_token(&Token::LParen) {
44        parse_json2_type_hints(parser)?
45    } else {
46        vec![]
47    };
48
49    Ok(Some((data_type, type_hints)))
50}
51
52fn parse_json2_type_hints(parser: &mut Parser<'_>) -> Result<Vec<JsonTypeHint>> {
53    let mut hints = Vec::new();
54
55    if parser.consume_token(&Token::RParen) {
56        return Ok(hints);
57    }
58
59    loop {
60        let hint = parse_json2_type_hint(parser)?;
61        ensure_no_path_conflict(&hints, &hint.path)?;
62        hints.push(hint);
63
64        if parser.consume_token(&Token::Comma) {
65            if parser.consume_token(&Token::RParen) {
66                break;
67            }
68        } else {
69            parser.expect_token(&Token::RParen).context(SyntaxSnafu)?;
70            break;
71        }
72    }
73
74    Ok(hints)
75}
76
77fn parse_json2_type_hint(parser: &mut Parser<'_>) -> Result<JsonTypeHint> {
78    let path = parse_json2_path(parser)?;
79    let data_type = parser.parse_data_type().context(SyntaxSnafu)?;
80    let data_type = normalize_json2_type_hint_type(data_type)?;
81
82    let mut nullable = true;
83    let mut nullable_set = false;
84    let mut default = None;
85    let mut inverted_index = false;
86
87    loop {
88        if parser.parse_keywords(&[Keyword::NOT, Keyword::NULL]) {
89            ensure!(
90                !nullable_set,
91                InvalidSqlSnafu {
92                    msg: format!(
93                        "NULL/NOT NULL option already specified for JSON2 type hint '{}'",
94                        path.join(".")
95                    )
96                }
97            );
98            nullable = false;
99            nullable_set = true;
100        } else if parser.parse_keyword(Keyword::NULL) {
101            ensure!(
102                !nullable_set,
103                InvalidSqlSnafu {
104                    msg: format!(
105                        "NULL/NOT NULL option already specified for JSON2 type hint '{}'",
106                        path.join(".")
107                    )
108                }
109            );
110            nullable = true;
111            nullable_set = true;
112        } else if parser.parse_keyword(Keyword::DEFAULT) {
113            ensure!(
114                default.is_none(),
115                InvalidSqlSnafu {
116                    msg: format!(
117                        "duplicated DEFAULT option for JSON2 type hint '{}'",
118                        path.join(".")
119                    )
120                }
121            );
122            let expr = parser.parse_expr().context(SyntaxSnafu)?;
123            ensure_json2_default_expr_is_literal(&expr)?;
124            default = Some(expr);
125        } else if let Token::Word(word) = parser.peek_token().token
126            && word.value.eq_ignore_ascii_case(INVERTED)
127        {
128            parser.next_token();
129            ensure!(
130                parser.parse_keyword(Keyword::INDEX),
131                InvalidSqlSnafu {
132                    msg: format!(
133                        "expect INDEX after INVERTED keyword for JSON2 type hint '{}'",
134                        path.join(".")
135                    )
136                }
137            );
138            ensure!(
139                !inverted_index,
140                InvalidSqlSnafu {
141                    msg: format!(
142                        "duplicated INVERTED INDEX option for JSON2 type hint '{}'",
143                        path.join(".")
144                    )
145                }
146            );
147            inverted_index = true;
148        } else if let Token::Word(word) = parser.peek_token().token
149            && word.value.eq_ignore_ascii_case(SKIPPING)
150        {
151            return InvalidSqlSnafu {
152                msg: "JSON2 type hint SKIPPING INDEX is not supported yet".to_string(),
153            }
154            .fail();
155        } else if matches!(parser.peek_token().token, Token::Comma | Token::RParen) {
156            break;
157        } else {
158            return parser
159                .expected("JSON2 type hint option", parser.peek_token())
160                .context(SyntaxSnafu);
161        }
162    }
163
164    Ok(JsonTypeHint {
165        path,
166        data_type,
167        nullable,
168        default,
169        inverted_index,
170    })
171}
172
173fn parse_json2_path(parser: &mut Parser<'_>) -> Result<Vec<String>> {
174    let first = parser.parse_identifier().context(SyntaxSnafu)?;
175    let mut path = vec![first.value];
176
177    while parser.consume_token(&Token::Period) {
178        let segment = parser.parse_identifier().context(SyntaxSnafu)?;
179        path.push(segment.value);
180    }
181
182    ensure!(
183        !path.iter().any(|segment| segment.is_empty()),
184        InvalidSqlSnafu {
185            msg: "JSON2 type hint path segment cannot be empty".to_string(),
186        }
187    );
188
189    Ok(path)
190}
191
192fn normalize_json2_type_hint_type(data_type: DataType) -> Result<DataType> {
193    let data_type = get_type_by_alias(&data_type).unwrap_or(data_type);
194    let normalized = match data_type {
195        DataType::String(_) | DataType::Text | DataType::Varchar(_) | DataType::Char(_) => {
196            DataType::String(None)
197        }
198        DataType::TinyInt(_)
199        | DataType::SmallInt(_)
200        | DataType::Int(_)
201        | DataType::Integer(_)
202        | DataType::BigInt(_) => DataType::BigInt(None),
203        DataType::TinyIntUnsigned(_)
204        | DataType::SmallIntUnsigned(_)
205        | DataType::IntUnsigned(_)
206        | DataType::UnsignedInteger
207        | DataType::BigIntUnsigned(_) => DataType::BigIntUnsigned(None),
208        DataType::Float(_) | DataType::Real | DataType::Double(_) => {
209            DataType::Double(ExactNumberInfo::None)
210        }
211        DataType::Boolean => DataType::Boolean,
212        _ => {
213            return InvalidSqlSnafu {
214                msg: format!("unsupported JSON2 type hint data type: {data_type}"),
215            }
216            .fail();
217        }
218    };
219
220    Ok(normalized)
221}
222
223fn ensure_json2_default_expr_is_literal(expr: &Expr) -> Result<()> {
224    let is_literal = match expr {
225        Expr::Value(_) => true,
226        Expr::UnaryOp { op, expr } => {
227            matches!(op, UnaryOperator::Plus | UnaryOperator::Minus)
228                && matches!(expr.as_ref(), Expr::Value(_))
229        }
230        _ => false,
231    };
232    ensure!(
233        is_literal,
234        InvalidSqlSnafu {
235            msg: "JSON2 type hint DEFAULT only supports literal values",
236        }
237    );
238    Ok(())
239}
240
241fn ensure_no_path_conflict(hints: &[JsonTypeHint], path: &[String]) -> Result<()> {
242    for hint in hints {
243        ensure!(
244            hint.path != path,
245            InvalidSqlSnafu {
246                msg: format!("duplicated JSON2 type hint path '{}'", path.join("."))
247            }
248        );
249        ensure!(
250            !hint.path.starts_with(path) && !path.starts_with(&hint.path),
251            InvalidSqlSnafu {
252                msg: format!(
253                    "JSON2 type hint path '{}' conflicts with '{}'",
254                    path.join("."),
255                    hint.path.join(".")
256                )
257            }
258        );
259    }
260    Ok(())
261}
262
263#[cfg(test)]
264mod tests {
265    use sqlparser::ast::{DataType, ExactNumberInfo};
266
267    use crate::dialect::GreptimeDbDialect;
268    use crate::parser::{ParseOptions, ParserContext};
269    use crate::statements::create::Column;
270    use crate::statements::statement::Statement;
271
272    fn parse_json2_column(sql: &str) -> Column {
273        let Statement::CreateTable(mut create_table) =
274            ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
275                .unwrap()
276                .remove(0)
277        else {
278            unreachable!()
279        };
280
281        create_table.columns.remove(0)
282    }
283
284    #[test]
285    fn test_parse_json2_type_hints() {
286        let column = parse_json2_column(
287            r#"
288CREATE TABLE traces (
289    log_json_data JSON2 (
290        "service.name" STRING NOT NULL DEFAULT 'null' INVERTED INDEX,
291        http.method STRING NOT NULL,
292        status_code INT64 NOT NULL,
293        comment STRING NULL,
294    ),
295    ts TIMESTAMP TIME INDEX,
296)"#,
297        );
298
299        assert!(matches!(
300            column.column_def.data_type,
301            DataType::Custom(_, _)
302        ));
303        let hints = column.extensions.json_type_hints;
304        assert_eq!(hints.len(), 4);
305
306        assert_eq!(hints[0].path, vec!["service.name"]);
307        assert_eq!(hints[0].data_type, DataType::String(None));
308        assert!(!hints[0].nullable);
309        assert_eq!(
310            hints[0]
311                .default
312                .as_ref()
313                .map(|expr| expr.to_string())
314                .as_deref(),
315            Some("'null'")
316        );
317        assert!(hints[0].inverted_index);
318
319        assert_eq!(hints[1].path, vec!["http", "method"]);
320        assert_eq!(hints[1].data_type, DataType::String(None));
321        assert!(!hints[1].nullable);
322        assert!(!hints[1].inverted_index);
323
324        assert_eq!(hints[2].path, vec!["status_code"]);
325        assert_eq!(hints[2].data_type, DataType::BigInt(None));
326        assert!(!hints[2].nullable);
327
328        assert_eq!(hints[3].path, vec!["comment"]);
329        assert_eq!(hints[3].data_type, DataType::String(None));
330        assert!(hints[3].nullable);
331    }
332
333    #[test]
334    fn test_parse_json2_type_hint_default_nullable() {
335        let column = parse_json2_column(
336            r#"
337CREATE TABLE traces (
338    log_json_data JSON2 (http.method STRING),
339    ts TIMESTAMP TIME INDEX,
340)"#,
341        );
342
343        let hints = column.extensions.json_type_hints;
344        assert_eq!(hints.len(), 1);
345        assert!(hints[0].nullable);
346    }
347
348    #[test]
349    fn test_parse_json2_type_hint_quoted_path_segments() {
350        let column = parse_json2_column(
351            r#"
352CREATE TABLE traces (
353    log_json_data JSON2 (
354        "a".b STRING,
355        "x"."y" STRING,
356        "a.b"."c" STRING,
357        a."b.c" STRING
358    ),
359    ts TIMESTAMP TIME INDEX,
360)"#,
361        );
362
363        let hints = column.extensions.json_type_hints;
364        assert_eq!(hints.len(), 4);
365        assert_eq!(hints[0].path, vec!["a", "b"]);
366        assert_eq!(hints[1].path, vec!["x", "y"]);
367        assert_eq!(hints[2].path, vec!["a.b", "c"]);
368        assert_eq!(hints[3].path, vec!["a", "b.c"]);
369    }
370
371    #[test]
372    fn test_parse_json2_type_hint_normalizes_numeric_types() {
373        let column = parse_json2_column(
374            r#"
375CREATE TABLE traces (
376    log_json_data JSON2 (
377        tinyint_value TINYINT,
378        smallint_value SMALLINT,
379        int_value INT,
380        integer_value INTEGER,
381        bigint_value BIGINT,
382        int64_value INT64,
383        tinyuint_value TINYINT UNSIGNED,
384        smalluint_value SMALLINT UNSIGNED,
385        uint_value INT UNSIGNED,
386        uint64_value UINT64,
387        float_value FLOAT,
388        real_value REAL,
389        double_value DOUBLE,
390        float64_value FLOAT64
391    ),
392    ts TIMESTAMP TIME INDEX,
393)"#,
394        );
395
396        let hints = column.extensions.json_type_hints;
397        assert_eq!(hints.len(), 14);
398        for hint in hints.iter().take(6) {
399            assert_eq!(hint.data_type, DataType::BigInt(None));
400        }
401        for hint in hints.iter().skip(6).take(4) {
402            assert_eq!(hint.data_type, DataType::BigIntUnsigned(None));
403        }
404        for hint in hints.iter().skip(10) {
405            assert_eq!(hint.data_type, DataType::Double(ExactNumberInfo::None));
406        }
407    }
408
409    #[test]
410    fn test_parse_json2_type_hint_default_accepts_signed_literals() {
411        let column = parse_json2_column(
412            r#"
413CREATE TABLE traces (
414    log_json_data JSON2 (
415        negative_int INT64 DEFAULT -5,
416        positive_float FLOAT64 DEFAULT +1.5
417    ),
418    ts TIMESTAMP TIME INDEX,
419)"#,
420        );
421
422        let hints = column.extensions.json_type_hints;
423        assert_eq!(hints.len(), 2);
424        assert_eq!(
425            hints[0]
426                .default
427                .as_ref()
428                .map(|expr| expr.to_string())
429                .as_deref(),
430            Some("-5")
431        );
432        assert_eq!(
433            hints[1]
434                .default
435                .as_ref()
436                .map(|expr| expr.to_string())
437                .as_deref(),
438            Some("+1.5")
439        );
440    }
441
442    #[test]
443    fn test_parse_json2_type_hint_default_rejects_function() {
444        let result = ParserContext::create_with_dialect(
445            r#"
446CREATE TABLE traces (
447    log_json_data JSON2 (status_code INT64 DEFAULT abs(-1)),
448    ts TIMESTAMP TIME INDEX,
449)"#,
450            &GreptimeDbDialect {},
451            ParseOptions::default(),
452        );
453
454        assert!(result.is_err());
455        assert!(
456            result
457                .unwrap_err()
458                .to_string()
459                .contains("DEFAULT only supports literal values")
460        );
461    }
462
463    #[test]
464    fn test_parse_json2_type_hint_rejects_duplicate_path() {
465        let result = ParserContext::create_with_dialect(
466            r#"
467CREATE TABLE traces (
468    log_json_data JSON2 (a.b STRING, a.b INT64),
469    ts TIMESTAMP TIME INDEX,
470)"#,
471            &GreptimeDbDialect {},
472            ParseOptions::default(),
473        );
474
475        assert!(result.is_err());
476        assert!(result.unwrap_err().to_string().contains("duplicated"));
477    }
478
479    #[test]
480    fn test_parse_json2_type_hint_rejects_parent_child_path() {
481        let result = ParserContext::create_with_dialect(
482            r#"
483CREATE TABLE traces (
484    log_json_data JSON2 (a STRING, a.b INT64),
485    ts TIMESTAMP TIME INDEX,
486)"#,
487            &GreptimeDbDialect {},
488            ParseOptions::default(),
489        );
490
491        assert!(result.is_err());
492        assert!(result.unwrap_err().to_string().contains("conflicts"));
493    }
494
495    #[test]
496    fn test_parse_json2_type_hint_rejects_duplicated_nullability() {
497        for sql in [
498            r#"
499CREATE TABLE traces (
500    log_json_data JSON2 (a STRING NULL NULL),
501    ts TIMESTAMP TIME INDEX,
502)"#,
503            r#"
504CREATE TABLE traces (
505    log_json_data JSON2 (a STRING NOT NULL NOT NULL),
506    ts TIMESTAMP TIME INDEX,
507)"#,
508            r#"
509CREATE TABLE traces (
510    log_json_data JSON2 (a STRING NOT NULL NULL),
511    ts TIMESTAMP TIME INDEX,
512)"#,
513            r#"
514CREATE TABLE traces (
515    log_json_data JSON2 (a STRING NULL NOT NULL),
516    ts TIMESTAMP TIME INDEX,
517)"#,
518        ] {
519            let result = ParserContext::create_with_dialect(
520                sql,
521                &GreptimeDbDialect {},
522                ParseOptions::default(),
523            );
524
525            assert!(result.is_err());
526            assert!(
527                result
528                    .unwrap_err()
529                    .to_string()
530                    .contains("NULL/NOT NULL option already specified")
531            );
532        }
533    }
534}