mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-05 21:02:58 +00:00
feat: new datatypes subcrate based on the official arrow (#705)
* feat: Init datatypes2 crate * chore: Remove some unimplemented types * feat: Implements PrimitiveType and PrimitiveVector for datatypes2 (#633) * feat: Implement primitive types and vectors * feat: Implement a wrapper type * feat: Remove VectorType from ScalarRef * feat: Move some trait bound from NativeType to WrapperType * feat: pub use primitive vectors and builders * feat: Returns error in try_from when type mismatch * feat: Impl PartialEq for some vectors * test: Pass vector tests * chore: Add license header * test: Pass more vector tests * feat: Implement some methods of vector Helper * test: Pass more tests * style: Fix clippy * chore: Add license header * feat: Remove IntoValueRef trait * feat: Add NativeType trait bound to WrapperType::Native * docs: Explain what is wrapper type * chore: Fix typos * refactor: LogicalPrimitiveType::type_name returns str * feat: Implements DateType and DateVector (#651) * feat: Implement DateType and DateVector * test: Pass more value and data type tests * chore: Address CR comments * test: Skip list value test * feat: datatypes2 datetime (#661) * feat: impl DateTime type and vector * fix: add license header * fix: CR comments and add more tests * fix: customized serialization for wrapper type * feat: Implements NullType and NullVector (#658) * feat: Implements NullType and NullVector * chore: Address CR comment Co-authored-by: Ruihang Xia <waynestxia@gmail.com> * chore: Address CR comment Co-authored-by: Ruihang Xia <waynestxia@gmail.com> * feat: Implements StringType and StringVector (#659) * feat: implement string vector Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add more test and from Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * cover NUL Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * feat: impl datatypes2/timestamp (#686) * feat: add timestamp datatype and vectors * fix: cr comments and reformat code * chore: add some tests * feat: Implements ListType and ListVector (#681) * feat: Implement ListType and ListVector * test: Pass more tests * style: Fix clippy * chore: Fix comment * chore: Address CR comments * feat: impl constant vector (#680) * feat: impl constant vector Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix tests Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * Apply suggestions from code review Co-authored-by: Yingwen <realevenyag@gmail.com> * rename fn names Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove println Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Yingwen <realevenyag@gmail.com> * feat: Implements Validity (#684) * feat: Implements Validity * chore: remove pub from sub mod in vectors * feat: Implements schema for datatypes2 (#695) * feat: Add is_timestamp_compatible to DataType * feat: Implement ColumnSchema and Schema * feat: Impl RawSchema * chore: Remove useless codes and run more tests * chore: Fix clippy * feat: Impl from_arrow_time_unit and pass schema tests * chore: add more tests for timestamp (#702) * chore: add more tests for timestamp * chore: add replicate test for timestamps * feat: Implements helper methods for vectors/values (#703) * feat: Implement helper methods for vectors/values * chore: Address CR comments * chore: add more test for timestamp Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: evenyag <realevenyag@gmail.com> Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
248
Cargo.lock
generated
248
Cargo.lock
generated
@@ -40,6 +40,19 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"const-random",
|
||||
"getrandom 0.2.7",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.19"
|
||||
@@ -182,8 +195,8 @@ dependencies = [
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"csv",
|
||||
"flatbuffers",
|
||||
"half",
|
||||
"flatbuffers 2.1.1",
|
||||
"half 1.8.2",
|
||||
"hex",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
@@ -197,6 +210,72 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e24e2bcd431a4aa0ff003fdd2dc21c78cfb42f31459c89d2312c2746fe17a5ac"
|
||||
dependencies = [
|
||||
"ahash 0.8.2",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"csv",
|
||||
"flatbuffers 22.9.29",
|
||||
"half 2.1.0",
|
||||
"hashbrown",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"lexical-core",
|
||||
"multiversion",
|
||||
"num",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-array"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9044300874385f19e77cbf90911e239bd23630d8f23bb0f948f9067998a13b7"
|
||||
dependencies = [
|
||||
"ahash 0.8.2",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"half 2.1.0",
|
||||
"hashbrown",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78476cbe9e3f808dcecab86afe42d573863c63e149c62e6e379ed2522743e626"
|
||||
dependencies = [
|
||||
"half 2.1.0",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-data"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d916feee158c485dad4f701cba31bc9a90a8db87d9df8e2aa8adc0c20a2bbb9"
|
||||
dependencies = [
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"half 2.1.0",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-format"
|
||||
version = "0.4.0"
|
||||
@@ -207,13 +286,32 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-schema"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f9406eb7834ca6bd8350d1baa515d18b9fcec487eddacfb62f5e19511f7bd37"
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6593a01586751c74498495d2f5a01fcd438102b52965c11dd98abf4ebcacef37"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"arrow-format",
|
||||
"base64",
|
||||
"bytemuck",
|
||||
@@ -551,7 +649,7 @@ checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow 10.0.0",
|
||||
"clap 4.0.18",
|
||||
"client",
|
||||
"indicatif",
|
||||
@@ -961,7 +1059,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"half",
|
||||
"half 1.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1207,7 +1305,7 @@ dependencies = [
|
||||
"common-function-macro",
|
||||
"common-query",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datatypes",
|
||||
"libc",
|
||||
"num",
|
||||
@@ -1283,7 +1381,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"snafu",
|
||||
@@ -1297,7 +1395,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"paste",
|
||||
@@ -1412,6 +1510,28 @@ dependencies = [
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e"
|
||||
dependencies = [
|
||||
"const-random-macro",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random-macro"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb"
|
||||
dependencies = [
|
||||
"getrandom 0.2.7",
|
||||
"once_cell",
|
||||
"proc-macro-hack",
|
||||
"tiny-keccak",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
@@ -1724,12 +1844,12 @@ name = "datafusion"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?branch=arrow2#744b2626081db95a254fc882820fc7812f95aa51"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"arrow2",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"comfy-table 5.0.1",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-expr",
|
||||
"futures",
|
||||
@@ -1744,7 +1864,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"rand 0.8.5",
|
||||
"smallvec",
|
||||
"sqlparser",
|
||||
"sqlparser 0.15.0",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1758,7 +1878,19 @@ dependencies = [
|
||||
"arrow2",
|
||||
"ordered-float 2.10.0",
|
||||
"parquet2",
|
||||
"sqlparser",
|
||||
"sqlparser 0.15.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "14.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15f1ffcbc1f040c9ab99f41db1c743d95aff267bb2e7286aaa010738b7402251"
|
||||
dependencies = [
|
||||
"arrow 26.0.0",
|
||||
"chrono",
|
||||
"ordered-float 3.1.0",
|
||||
"sqlparser 0.26.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1766,10 +1898,10 @@ name = "datafusion-expr"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?branch=arrow2#744b2626081db95a254fc882820fc7812f95aa51"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"arrow2",
|
||||
"datafusion-common",
|
||||
"sqlparser",
|
||||
"datafusion-common 7.0.0",
|
||||
"sqlparser 0.15.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1777,12 +1909,12 @@ name = "datafusion-physical-expr"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?branch=arrow2#744b2626081db95a254fc882820fc7812f95aa51"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"arrow2",
|
||||
"blake2",
|
||||
"blake3",
|
||||
"chrono",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"hashbrown",
|
||||
"lazy_static",
|
||||
@@ -1818,7 +1950,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datatypes",
|
||||
"frontend",
|
||||
"futures",
|
||||
@@ -1857,7 +1989,26 @@ dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"enum_dispatch",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.1.0",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datatypes2"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow 26.0.0",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-time",
|
||||
"datafusion-common 14.0.0",
|
||||
"enum_dispatch",
|
||||
"num",
|
||||
"num-traits",
|
||||
@@ -2159,6 +2310,16 @@ dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flatbuffers"
|
||||
version = "22.9.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ce016b9901aef3579617931fbb2df8fc9a9f7cb95a16eb8acc8148209bb9e70"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.24"
|
||||
@@ -2215,7 +2376,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"datanode",
|
||||
"datatypes",
|
||||
@@ -2235,7 +2396,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"sql",
|
||||
"sqlparser",
|
||||
"sqlparser 0.15.0",
|
||||
"store-api",
|
||||
"table",
|
||||
"tempdir",
|
||||
@@ -2517,6 +2678,16 @@ version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hash_hasher"
|
||||
version = "2.0.3"
|
||||
@@ -2529,7 +2700,7 @@ version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3218,7 +3389,7 @@ version = "0.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b9b8653cec6897f73b519a43fba5ee3d50f62fe9af80b428accdcc093b4a849"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"metrics-macros",
|
||||
"portable-atomic",
|
||||
]
|
||||
@@ -3324,7 +3495,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"log-store",
|
||||
@@ -3884,7 +4055,7 @@ version = "10.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53e9c8fc20af9b92d85d42ec86e5217b2eaf1340fbba75c4b4296de764ea7921"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow 10.0.0",
|
||||
"base64",
|
||||
"brotli",
|
||||
"byteorder",
|
||||
@@ -4504,7 +4675,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-physical-expr",
|
||||
"datatypes",
|
||||
"format_num",
|
||||
@@ -5035,7 +5206,7 @@ name = "rustpython-compiler-core"
|
||||
version = "0.1.2"
|
||||
source = "git+https://github.com/RustPython/RustPython?rev=02a1d1d#02a1d1d7db57afbb78049599c2585cc7cd59e6d3"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"log",
|
||||
@@ -5077,7 +5248,7 @@ name = "rustpython-parser"
|
||||
version = "0.1.2"
|
||||
source = "git+https://github.com/RustPython/RustPython?rev=02a1d1d#02a1d1d7db57afbb78049599c2585cc7cd59e6d3"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"lalrpop-util",
|
||||
"log",
|
||||
"num-bigint",
|
||||
@@ -5106,7 +5277,7 @@ version = "0.1.2"
|
||||
source = "git+https://github.com/RustPython/RustPython?rev=02a1d1d#02a1d1d7db57afbb78049599c2585cc7cd59e6d3"
|
||||
dependencies = [
|
||||
"adler32",
|
||||
"ahash",
|
||||
"ahash 0.7.6",
|
||||
"ascii",
|
||||
"atty",
|
||||
"bitflags",
|
||||
@@ -5118,7 +5289,7 @@ dependencies = [
|
||||
"exitcode",
|
||||
"flate2",
|
||||
"getrandom 0.2.7",
|
||||
"half",
|
||||
"half 1.8.2",
|
||||
"hex",
|
||||
"hexf-parse",
|
||||
"indexmap",
|
||||
@@ -5343,7 +5514,7 @@ dependencies = [
|
||||
"common-time",
|
||||
"console",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-expr",
|
||||
"datatypes",
|
||||
@@ -5428,7 +5599,7 @@ version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
|
||||
dependencies = [
|
||||
"half",
|
||||
"half 1.8.2",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -5775,7 +5946,7 @@ dependencies = [
|
||||
"mito",
|
||||
"once_cell",
|
||||
"snafu",
|
||||
"sqlparser",
|
||||
"sqlparser 0.15.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5813,6 +5984,15 @@ dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sre-engine"
|
||||
version = "0.1.2"
|
||||
@@ -6118,7 +6298,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-common 7.0.0",
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"derive_builder",
|
||||
|
||||
@@ -20,6 +20,7 @@ members = [
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/datatypes2",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
|
||||
@@ -23,6 +23,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
|
||||
// TODO(yingwen): We should hold vectors in the RecordBatch.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct RecordBatch {
|
||||
pub schema: SchemaRef,
|
||||
@@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
||||
} else {
|
||||
let mut row = Vec::with_capacity(self.columns);
|
||||
|
||||
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
|
||||
for col in 0..self.columns {
|
||||
let column_array = self.record_batch.df_recordbatch.column(col);
|
||||
match arrow_array_get(column_array.as_ref(), self.row_cursor)
|
||||
|
||||
@@ -147,6 +147,18 @@ impl From<i64> for Timestamp {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for i64 {
|
||||
fn from(t: Timestamp) -> Self {
|
||||
t.value
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for serde_json::Value {
|
||||
fn from(d: Timestamp) -> Self {
|
||||
serde_json::Value::String(d.to_iso8601_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum TimeUnit {
|
||||
Second,
|
||||
@@ -197,6 +209,7 @@ impl Hash for Timestamp {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::Offset;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -318,4 +331,39 @@ mod tests {
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_to_json_value() {
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:01+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Second)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Millisecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Microsecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Nanosecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
24
src/datatypes2/Cargo.toml
Normal file
24
src/datatypes2/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "datatypes2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
test = []
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion-common = "14.0"
|
||||
enum_dispatch = "0.3"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
arrow = "26.0"
|
||||
242
src/datatypes2/src/arrow_array.rs
Normal file
242
src/datatypes2/src/arrow_array.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{
|
||||
Array, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, ListArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::DataType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{ConversionSnafu, Result};
|
||||
use crate::value::{ListValue, Value};
|
||||
|
||||
pub type BinaryArray = arrow::array::LargeBinaryArray;
|
||||
pub type MutableBinaryArray = arrow::array::LargeBinaryBuilder;
|
||||
pub type StringArray = arrow::array::StringArray;
|
||||
pub type MutableStringArray = arrow::array::StringBuilder;
|
||||
|
||||
macro_rules! cast_array {
|
||||
($arr: ident, $CastType: ty) => {
|
||||
$arr.as_any()
|
||||
.downcast_ref::<$CastType>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
from: format!("{:?}", $arr.data_type()),
|
||||
})?
|
||||
};
|
||||
}
|
||||
|
||||
// TODO(yingwen): Remove this function.
|
||||
pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
if array.is_null(idx) {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
let result = match array.data_type() {
|
||||
DataType::Null => Value::Null,
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, BooleanArray).value(idx)),
|
||||
DataType::Binary => Value::Binary(cast_array!(array, BinaryArray).value(idx).into()),
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, Int8Array).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, Int16Array).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, Int32Array).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, Int64Array).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, UInt8Array).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, UInt16Array).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, UInt32Array).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, UInt64Array).value(idx)),
|
||||
DataType::Float32 => Value::Float32(cast_array!(array, Float32Array).value(idx).into()),
|
||||
DataType::Float64 => Value::Float64(cast_array!(array, Float64Array).value(idx).into()),
|
||||
DataType::Utf8 => Value::String(cast_array!(array, StringArray).value(idx).into()),
|
||||
DataType::Date32 => Value::Date(cast_array!(array, Date32Array).value(idx).into()),
|
||||
DataType::Date64 => Value::DateTime(cast_array!(array, Date64Array).value(idx).into()),
|
||||
DataType::Timestamp(t, _) => match t {
|
||||
arrow::datatypes::TimeUnit::Second => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampSecondArray).value(idx),
|
||||
TimeUnit::Second,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Millisecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMillisecondArray).value(idx),
|
||||
TimeUnit::Millisecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Microsecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMicrosecondArray).value(idx),
|
||||
TimeUnit::Microsecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Nanosecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampNanosecondArray).value(idx),
|
||||
TimeUnit::Nanosecond,
|
||||
)),
|
||||
},
|
||||
DataType::List(_) => {
|
||||
let array = cast_array!(array, ListArray).value(idx);
|
||||
let item_type = ConcreteDataType::try_from(array.data_type())?;
|
||||
let values = (0..array.len())
|
||||
.map(|i| arrow_array_get(&*array, i))
|
||||
.collect::<Result<Vec<Value>>>()?;
|
||||
Value::List(ListValue::new(Some(Box::new(values)), item_type))
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.data_type()),
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
||||
LargeBinaryArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::Int32Type;
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::types::TimestampType;
|
||||
|
||||
macro_rules! test_arrow_array_get_for_timestamps {
|
||||
( $($unit: ident), *) => {
|
||||
$(
|
||||
paste! {
|
||||
let mut builder = arrow::array::[<Timestamp $unit Array>]::builder(3);
|
||||
builder.append_value(1);
|
||||
builder.append_value(0);
|
||||
builder.append_value(-1);
|
||||
let ts_array = Arc::new(builder.finish()) as Arc<dyn Array>;
|
||||
let v = arrow_array_get(&ts_array, 1).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit(
|
||||
$crate::types::[<Timestamp $unit Type>]::default(),
|
||||
)),
|
||||
v.data_type()
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_array() {
|
||||
test_arrow_array_get_for_timestamps![Second, Millisecond, Microsecond, Nanosecond];
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrow_array_access() {
|
||||
let array1 = BooleanArray::from(vec![true, true, false, false]);
|
||||
assert_eq!(Value::Boolean(true), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array = Int64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap());
|
||||
let array1 = UInt64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Float32Array::from(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
assert_eq!(
|
||||
Value::Float32(2f32.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
let array1 = Float64Array::from(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
assert_eq!(
|
||||
Value::Float64(2f64.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
|
||||
let array2 = StringArray::from(vec![Some("hello"), None, Some("world")]);
|
||||
assert_eq!(
|
||||
Value::String("hello".into()),
|
||||
arrow_array_get(&array2, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array2, 1).unwrap());
|
||||
|
||||
let array3 = LargeBinaryArray::from(vec![
|
||||
Some("hello".as_bytes()),
|
||||
None,
|
||||
Some("world".as_bytes()),
|
||||
]);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap());
|
||||
|
||||
let array = TimestampSecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(value, Value::Timestamp(Timestamp::new(2, TimeUnit::Second)));
|
||||
let array = TimestampMillisecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond))
|
||||
);
|
||||
let array = TimestampMicrosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Microsecond))
|
||||
);
|
||||
let array = TimestampNanosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Nanosecond))
|
||||
);
|
||||
|
||||
// test list array
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
let arrow_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
|
||||
let v0 = arrow_array_get(&arrow_array, 0).unwrap();
|
||||
match v0 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(
|
||||
**items,
|
||||
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
assert_eq!(Value::Null, arrow_array_get(&arrow_array, 1).unwrap());
|
||||
let v2 = arrow_array_get(&arrow_array, 2).unwrap();
|
||||
match v2 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(**items, vec![Value::Int32(4), Value::Null, Value::Int32(6)]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
486
src/datatypes2/src/data_type.rs
Normal file
486
src/datatypes2/src/data_type.rs
Normal file
@@ -0,0 +1,486 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcreteDataType {
|
||||
Null(NullType),
|
||||
Boolean(BooleanType),
|
||||
|
||||
// Numeric types:
|
||||
Int8(Int8Type),
|
||||
Int16(Int16Type),
|
||||
Int32(Int32Type),
|
||||
Int64(Int64Type),
|
||||
UInt8(UInt8Type),
|
||||
UInt16(UInt16Type),
|
||||
UInt32(UInt32Type),
|
||||
UInt64(UInt64Type),
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types:
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
|
||||
// Date types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
}
|
||||
|
||||
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
|
||||
// returning all these properties to the `DataType` trait
|
||||
impl ConcreteDataType {
|
||||
pub fn is_float(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_boolean(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Boolean(_))
|
||||
}
|
||||
|
||||
pub fn is_stringifiable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_signed(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Int8(_)
|
||||
| ConcreteDataType::Int16(_)
|
||||
| ConcreteDataType::Int32(_)
|
||||
| ConcreteDataType::Int64(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_unsigned(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::UInt8(_)
|
||||
| ConcreteDataType::UInt16(_)
|
||||
| ConcreteDataType::UInt32(_)
|
||||
| ConcreteDataType::UInt64(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
}
|
||||
|
||||
/// Convert arrow data type to [ConcreteDataType].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given arrow data type is not supported.
|
||||
pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
|
||||
ConcreteDataType::try_from(dt).expect("Unimplemented type")
|
||||
}
|
||||
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Null(NullType))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
|
||||
let concrete_type = match dt {
|
||||
ArrowDataType::Null => Self::null_datatype(),
|
||||
ArrowDataType::Boolean => Self::boolean_datatype(),
|
||||
ArrowDataType::UInt8 => Self::uint8_datatype(),
|
||||
ArrowDataType::UInt16 => Self::uint16_datatype(),
|
||||
ArrowDataType::UInt32 => Self::uint32_datatype(),
|
||||
ArrowDataType::UInt64 => Self::uint64_datatype(),
|
||||
ArrowDataType::Int8 => Self::int8_datatype(),
|
||||
ArrowDataType::Int16 => Self::int16_datatype(),
|
||||
ArrowDataType::Int32 => Self::int32_datatype(),
|
||||
ArrowDataType::Int64 => Self::int64_datatype(),
|
||||
ArrowDataType::Float32 => Self::float32_datatype(),
|
||||
ArrowDataType::Float64 => Self::float64_datatype(),
|
||||
ArrowDataType::Date32 => Self::date_datatype(),
|
||||
ArrowDataType::Date64 => Self::datetime_datatype(),
|
||||
ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
|
||||
ArrowDataType::List(field) => Self::List(ListType::new(
|
||||
ConcreteDataType::from_arrow_type(field.data_type()),
|
||||
)),
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: dt.clone(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(concrete_type)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_new_concrete_type_functions {
|
||||
($($Type: ident), +) => {
|
||||
paste! {
|
||||
impl ConcreteDataType {
|
||||
$(
|
||||
pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
|
||||
ConcreteDataType::$Type([<$Type Type>]::default())
|
||||
}
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
pub fn timestamp_second_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_millisecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(
|
||||
TimestampMillisecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_microsecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(
|
||||
TimestampMicrosecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_nanosecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_datatype(unit: TimeUnit) -> Self {
|
||||
match unit {
|
||||
TimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts from arrow timestamp unit to
|
||||
pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
|
||||
match t {
|
||||
ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(item_type))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
#[enum_dispatch::enum_dispatch]
|
||||
pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Name of this data type.
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Returns id of the Logical data type.
|
||||
fn logical_type_id(&self) -> LogicalTypeId;
|
||||
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Creates a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
|
||||
/// Returns true if the data type is compatible with timestamp type so we can
|
||||
/// use it as a timestamp.
|
||||
fn is_timestamp_compatible(&self) -> bool;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::Field;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_concrete_type_as_datatype_trait() {
|
||||
let concrete_type = ConcreteDataType::boolean_datatype();
|
||||
|
||||
assert_eq!("Boolean", concrete_type.name());
|
||||
assert_eq!(Value::Boolean(false), concrete_type.default_value());
|
||||
assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
|
||||
assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_type() {
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
|
||||
ConcreteDataType::Null(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
|
||||
ConcreteDataType::Boolean(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
|
||||
ConcreteDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
|
||||
ConcreteDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
|
||||
ConcreteDataType::Int8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
|
||||
ConcreteDataType::Int16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
|
||||
ConcreteDataType::Int32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
|
||||
ConcreteDataType::Int64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
|
||||
ConcreteDataType::UInt8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
|
||||
ConcreteDataType::UInt16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
|
||||
ConcreteDataType::UInt32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
|
||||
ConcreteDataType::UInt64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
|
||||
ConcreteDataType::Float32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
|
||||
ConcreteDataType::Float64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert_eq!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
|
||||
"item",
|
||||
ArrowDataType::Int32,
|
||||
true,
|
||||
)))),
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
|
||||
);
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
|
||||
ConcreteDataType::Date(_)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_timestamp() {
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_timestamp_compatible() {
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_null() {
|
||||
assert!(ConcreteDataType::null_datatype().is_null());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_float() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float64_datatype().is_float());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_boolean() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_boolean());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_boolean());
|
||||
assert!(ConcreteDataType::boolean_datatype().is_boolean());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_stringifiable() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_signed() {
|
||||
assert!(ConcreteDataType::int8_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int16_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::uint8_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint16_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_signed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsigned() {
|
||||
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
|
||||
|
||||
assert!(ConcreteDataType::uint8_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint64_datatype().is_unsigned());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerics() {
|
||||
let nums = ConcreteDataType::numerics();
|
||||
assert_eq!(10, nums.len());
|
||||
}
|
||||
}
|
||||
144
src/datatypes2/src/error.rs
Normal file
144
src/datatypes2/src/error.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::{ErrorCompat, ErrorExt, Snafu, StatusCode};
|
||||
use snafu::Backtrace;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to serialize data, source: {}", source))]
|
||||
Serialize {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize data, source: {}, json: {}", source, json))]
|
||||
Deserialize {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
json: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert datafusion type: {}", from))]
|
||||
Conversion { from: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Bad array access, Index out of bounds: {}, size: {}", index, size))]
|
||||
BadArrayAccess {
|
||||
index: usize,
|
||||
size: usize,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unknown vector, {}", msg))]
|
||||
UnknownVector { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported arrow data type, type: {:?}", arrow_type))]
|
||||
UnsupportedArrowType {
|
||||
arrow_type: arrow::datatypes::DataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Timestamp column {} not found", name,))]
|
||||
TimestampNotFound { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to parse version in schema meta, value: {}, source: {}",
|
||||
value,
|
||||
source
|
||||
))]
|
||||
ParseSchemaVersion {
|
||||
value: String,
|
||||
source: std::num::ParseIntError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid timestamp index: {}", index))]
|
||||
InvalidTimestampIndex { index: usize, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Duplicate timestamp index, exists: {}, new: {}", exists, new))]
|
||||
DuplicateTimestampIndex {
|
||||
exists: usize,
|
||||
new: usize,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("{}", msg))]
|
||||
CastType { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Arrow failed to compute, source: {}", source))]
|
||||
ArrowCompute {
|
||||
source: arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported column default constraint expression: {}", expr))]
|
||||
UnsupportedDefaultExpr { expr: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Default value should not be null for non null column"))]
|
||||
NullDefault { backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Incompatible default value type, reason: {}", reason))]
|
||||
DefaultValueType {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Duplicated metadata for {}", key))]
|
||||
DuplicateMeta { key: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
// Inner encoding and decoding error should not be exposed to users.
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_error() {
|
||||
let mut map = HashMap::new();
|
||||
map.insert(true, 1);
|
||||
map.insert(false, 2);
|
||||
|
||||
let result = serde_json::to_string(&map).context(SerializeSnafu);
|
||||
assert!(result.is_err(), "serialize result is: {:?}", result);
|
||||
let err = serde_json::to_string(&map)
|
||||
.context(SerializeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
assert_eq!(StatusCode::Internal, err.status_code());
|
||||
}
|
||||
}
|
||||
33
src/datatypes2/src/lib.rs
Normal file
33
src/datatypes2/src/lib.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(generic_associated_types)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod arrow_array;
|
||||
pub mod data_type;
|
||||
pub mod error;
|
||||
pub mod macros;
|
||||
pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
mod timestamp;
|
||||
pub mod type_id;
|
||||
pub mod types;
|
||||
pub mod value;
|
||||
pub mod vectors;
|
||||
|
||||
pub use arrow;
|
||||
pub use error::{Error, Result};
|
||||
68
src/datatypes2/src/macros.rs
Normal file
68
src/datatypes2/src/macros.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Some helper macros for datatypes, copied from databend.
|
||||
|
||||
/// Apply the macro rules to all primitive types.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_primitive_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
$macro! {
|
||||
[$($x),*],
|
||||
{ i8 },
|
||||
{ i16 },
|
||||
{ i32 },
|
||||
{ i64 },
|
||||
{ u8 },
|
||||
{ u16 },
|
||||
{ u32 },
|
||||
{ u64 },
|
||||
{ f32 },
|
||||
{ f64 }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Match the logical type and apply `$body` to all primitive types and
|
||||
/// `nbody` to other types.
|
||||
#[macro_export]
|
||||
macro_rules! with_match_primitive_type_id {
|
||||
($key_type:expr, | $_:tt $T:ident | $body:tt, $nbody:tt) => {{
|
||||
macro_rules! __with_ty__ {
|
||||
( $_ $T:ident ) => {
|
||||
$body
|
||||
};
|
||||
}
|
||||
|
||||
use $crate::type_id::LogicalTypeId;
|
||||
use $crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
match $key_type {
|
||||
LogicalTypeId::Int8 => __with_ty__! { Int8Type },
|
||||
LogicalTypeId::Int16 => __with_ty__! { Int16Type },
|
||||
LogicalTypeId::Int32 => __with_ty__! { Int32Type },
|
||||
LogicalTypeId::Int64 => __with_ty__! { Int64Type },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { UInt8Type },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { UInt16Type },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { UInt32Type },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { UInt64Type },
|
||||
LogicalTypeId::Float32 => __with_ty__! { Float32Type },
|
||||
LogicalTypeId::Float64 => __with_ty__! { Float64Type },
|
||||
|
||||
_ => $nbody,
|
||||
}
|
||||
}};
|
||||
}
|
||||
20
src/datatypes2/src/prelude.rs
Normal file
20
src/datatypes2/src/prelude.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{MutableVector, Validity, Vector, VectorRef};
|
||||
443
src/datatypes2/src/scalars.rs
Normal file
443
src/datatypes2/src/scalars.rs
Normal file
@@ -0,0 +1,443 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
match iter.size_hint() {
|
||||
(_lower, Some(upper)) => upper,
|
||||
(0, None) => 1024,
|
||||
(lower, None) => lower,
|
||||
}
|
||||
}
|
||||
|
||||
/// Owned scalar value
|
||||
/// primitive types, bool, Vec<u8> ...
|
||||
pub trait Scalar: 'static + Sized + Default + Any
|
||||
where
|
||||
for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
|
||||
{
|
||||
type VectorType: ScalarVector<OwnedItem = Self>;
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self>
|
||||
where
|
||||
Self: 'a;
|
||||
/// Get a reference of the current value.
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_>;
|
||||
|
||||
/// Upcast GAT type's lifetime.
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short>;
|
||||
}
|
||||
|
||||
pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
|
||||
/// The corresponding [`Scalar`] type.
|
||||
type ScalarType: Scalar<RefType<'a> = Self>;
|
||||
|
||||
/// Convert the reference into an owned value.
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType;
|
||||
}
|
||||
|
||||
/// A sub trait of Vector to add scalar operation support.
|
||||
// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
|
||||
// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
|
||||
pub trait ScalarVector: Vector + Send + Sync + Sized + 'static
|
||||
where
|
||||
for<'a> Self::OwnedItem: Scalar<RefType<'a> = Self::RefItem<'a>>,
|
||||
{
|
||||
type OwnedItem: Scalar<VectorType = Self>;
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Iterator type of this vector.
|
||||
type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Builder type to build this vector.
|
||||
type Builder: ScalarVectorBuilder<VectorType = Self>;
|
||||
|
||||
/// Returns the reference to an element at given position.
|
||||
///
|
||||
/// Note: `get()` has bad performance, avoid call this function inside loop.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `idx >= self.len()`.
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
|
||||
|
||||
/// Returns iterator of current vector.
|
||||
fn iter_data(&self) -> Self::Iter<'_>;
|
||||
|
||||
fn from_slice(data: &[Self::RefItem<'_>]) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(data.len());
|
||||
for item in data {
|
||||
builder.push(Some(*item));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_iterator<'a>(it: impl Iterator<Item = Self::RefItem<'a>>) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
builder.push(Some(item));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_owned_iterator(it: impl Iterator<Item = Option<Self::OwnedItem>>) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
match item {
|
||||
Some(item) => builder.push(Some(item.as_scalar_ref())),
|
||||
None => builder.push(None),
|
||||
}
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_vec<I: Into<Self::OwnedItem>>(values: Vec<I>) -> Self {
|
||||
let it = values.into_iter();
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
builder.push(Some(item.into().as_scalar_ref()));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait over all vector builders.
|
||||
pub trait ScalarVectorBuilder: MutableVector {
|
||||
type VectorType: ScalarVector<Builder = Self>;
|
||||
|
||||
/// Create a new builder with initial `capacity`.
|
||||
fn with_capacity(capacity: usize) -> Self;
|
||||
|
||||
/// Push a value into the builder.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
|
||||
|
||||
/// Finish build and return a new vector.
|
||||
fn finish(&mut self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_for_native {
|
||||
($Native: ident, $DataType: ident) => {
|
||||
impl Scalar for $Native {
|
||||
type VectorType = PrimitiveVector<$DataType>;
|
||||
type RefType<'a> = $Native;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
|
||||
impl<'a> ScalarRef<'a> for $Native {
|
||||
type ScalarType = $Native;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_scalar_for_native!(u8, UInt8Type);
|
||||
impl_scalar_for_native!(u16, UInt16Type);
|
||||
impl_scalar_for_native!(u32, UInt32Type);
|
||||
impl_scalar_for_native!(u64, UInt64Type);
|
||||
impl_scalar_for_native!(i8, Int8Type);
|
||||
impl_scalar_for_native!(i16, Int16Type);
|
||||
impl_scalar_for_native!(i32, Int32Type);
|
||||
impl_scalar_for_native!(i64, Int64Type);
|
||||
impl_scalar_for_native!(f32, Float32Type);
|
||||
impl_scalar_for_native!(f64, Float64Type);
|
||||
|
||||
impl Scalar for bool {
|
||||
type VectorType = BooleanVector;
|
||||
type RefType<'a> = bool;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> bool {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: bool) -> bool {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for bool {
|
||||
type ScalarType = bool;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> bool {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for String {
|
||||
type VectorType = StringVector;
|
||||
type RefType<'a> = &'a str;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> &str {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: &'long str) -> &'short str {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a str {
|
||||
type ScalarType = String;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> String {
|
||||
self.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Vec<u8> {
|
||||
type VectorType = BinaryVector;
|
||||
type RefType<'a> = &'a [u8];
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: &'long [u8]) -> &'short [u8] {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a [u8] {
|
||||
type ScalarType = Vec<u8>;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> Vec<u8> {
|
||||
self.to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Date {
|
||||
type VectorType = DateVector;
|
||||
type RefType<'a> = Date;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Date {
|
||||
type ScalarType = Date;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type RefType<'a> = DateTime;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for DateTime {
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
type VectorType = ListVector;
|
||||
type RefType<'a> = ListValueRef<'a>;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
ListValueRef::Ref { val: self }
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
type ScalarType = ListValue;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
match self {
|
||||
ListValueRef::Indexed { vector, idx } => match vector.get(*idx) {
|
||||
// Normally should not get `Value::Null` if the `ListValueRef` comes
|
||||
// from the iterator of the ListVector, but we avoid panic and just
|
||||
// returns a default list value in such case since `ListValueRef` may
|
||||
// be constructed manually.
|
||||
Value::Null => ListValue::default(),
|
||||
Value::List(v) => v,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
ListValueRef::Ref { val } => (*val).clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::timestamp::TimestampSecond;
|
||||
use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
for item in items {
|
||||
builder.push(*item);
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
|
||||
where
|
||||
T::RefItem<'a>: PartialEq + std::fmt::Debug,
|
||||
{
|
||||
for (a, b) in expect.iter().zip(vector.iter_data()) {
|
||||
assert_eq!(*a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_i32_vector() {
|
||||
let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
|
||||
let vector: Int32Vector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_binary_vector() {
|
||||
let expect: Vec<Option<&'static [u8]>> = vec![
|
||||
Some(b"a"),
|
||||
Some(b"b"),
|
||||
Some(b"c"),
|
||||
None,
|
||||
Some(b"e"),
|
||||
Some(b""),
|
||||
];
|
||||
let vector: BinaryVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_date_vector() {
|
||||
let expect: Vec<Option<Date>> = vec![
|
||||
Some(Date::new(0)),
|
||||
Some(Date::new(-1)),
|
||||
None,
|
||||
Some(Date::new(1)),
|
||||
];
|
||||
let vector: DateVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let date = Date::new(1);
|
||||
assert_eq!(date, date.as_scalar_ref());
|
||||
assert_eq!(date, date.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_scalar() {
|
||||
let dt = DateTime::new(123);
|
||||
assert_eq!(dt, dt.as_scalar_ref());
|
||||
assert_eq!(dt, dt.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_scalar() {
|
||||
let list_value = ListValue::new(
|
||||
Some(Box::new(vec![Value::Int32(123)])),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
);
|
||||
let list_ref = ListValueRef::Ref { val: &list_value };
|
||||
assert_eq!(list_ref, list_value.as_scalar_ref());
|
||||
assert_eq!(list_value, list_ref.to_owned_scalar());
|
||||
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
|
||||
builder.push(None);
|
||||
builder.push(Some(list_value.as_scalar_ref()));
|
||||
let vector = builder.finish();
|
||||
|
||||
let ref_on_vec = ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 0,
|
||||
};
|
||||
assert_eq!(ListValue::default(), ref_on_vec.to_owned_scalar());
|
||||
let ref_on_vec = ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1,
|
||||
};
|
||||
assert_eq!(list_value, ref_on_vec.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_timestamp_vector() {
|
||||
let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampSecondVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
let val = vector.get_data(0).unwrap();
|
||||
assert_eq!(val, val.as_scalar_ref());
|
||||
assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
430
src/datatypes2/src/schema.rs
Normal file
430
src/datatypes2/src/schema.rs
Normal file
@@ -0,0 +1,430 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_schema;
|
||||
mod constraint;
|
||||
mod raw;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::error::{self, Error, Result};
|
||||
pub use crate::schema::column_schema::{ColumnSchema, Metadata};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
const VERSION_KEY: &str = "greptime:version";
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
arrow_schema: Arc<ArrowSchema>,
|
||||
/// Index of the timestamp key column.
|
||||
///
|
||||
/// Timestamp key column is the column holds the timestamp and forms part of
|
||||
/// the primary key. None means there is no timestamp key column.
|
||||
timestamp_index: Option<usize>,
|
||||
/// Version of the schema.
|
||||
///
|
||||
/// Initial value is zero. The version should bump after altering schema.
|
||||
version: u32,
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
/// Initial version of the schema.
|
||||
pub const INITIAL_VERSION: u32 = 0;
|
||||
|
||||
/// Create a schema from a vector of [ColumnSchema].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics when ColumnSchema's `default_constraint` can't be serialized into json.
|
||||
pub fn new(column_schemas: Vec<ColumnSchema>) -> Schema {
|
||||
// Builder won't fail in this case
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Try to Create a schema from a vector of [ColumnSchema].
|
||||
pub fn try_new(column_schemas: Vec<ColumnSchema>) -> Result<Schema> {
|
||||
SchemaBuilder::try_from(column_schemas)?.build()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn arrow_schema(&self) -> &Arc<ArrowSchema> {
|
||||
&self.arrow_schema
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn column_schemas(&self) -> &[ColumnSchema] {
|
||||
&self.column_schemas
|
||||
}
|
||||
|
||||
pub fn column_schema_by_name(&self, name: &str) -> Option<&ColumnSchema> {
|
||||
self.name_to_index
|
||||
.get(name)
|
||||
.map(|index| &self.column_schemas[*index])
|
||||
}
|
||||
|
||||
/// Retrieve the column's name by index
|
||||
/// # Panics
|
||||
/// This method **may** panic if the index is out of range of column schemas.
|
||||
#[inline]
|
||||
pub fn column_name_by_index(&self, idx: usize) -> &str {
|
||||
&self.column_schemas[idx].name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn column_index_by_name(&self, name: &str) -> Option<usize> {
|
||||
self.name_to_index.get(name).copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn contains_column(&self, name: &str) -> bool {
|
||||
self.name_to_index.contains_key(name)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn num_columns(&self) -> usize {
|
||||
self.column_schemas.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.column_schemas.is_empty()
|
||||
}
|
||||
|
||||
/// Returns index of the timestamp key column.
|
||||
#[inline]
|
||||
pub fn timestamp_index(&self) -> Option<usize> {
|
||||
self.timestamp_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn timestamp_column(&self) -> Option<&ColumnSchema> {
|
||||
self.timestamp_index.map(|idx| &self.column_schemas[idx])
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn version(&self) -> u32 {
|
||||
self.version
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &HashMap<String, String> {
|
||||
&self.arrow_schema.metadata
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SchemaBuilder {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
fields: Vec<Field>,
|
||||
timestamp_index: Option<usize>,
|
||||
version: u32,
|
||||
metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<ColumnSchema>> for SchemaBuilder {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schemas: Vec<ColumnSchema>) -> Result<SchemaBuilder> {
|
||||
SchemaBuilder::try_from_columns(column_schemas)
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaBuilder {
|
||||
pub fn try_from_columns(column_schemas: Vec<ColumnSchema>) -> Result<Self> {
|
||||
let FieldsAndIndices {
|
||||
fields,
|
||||
name_to_index,
|
||||
timestamp_index,
|
||||
} = collect_fields(&column_schemas)?;
|
||||
|
||||
Ok(Self {
|
||||
column_schemas,
|
||||
name_to_index,
|
||||
fields,
|
||||
timestamp_index,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn version(mut self, version: u32) -> Self {
|
||||
self.version = version;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add key value pair to metadata.
|
||||
///
|
||||
/// Old metadata with same key would be overwritten.
|
||||
pub fn add_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.metadata.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(mut self) -> Result<Schema> {
|
||||
if let Some(timestamp_index) = self.timestamp_index {
|
||||
validate_timestamp_index(&self.column_schemas, timestamp_index)?;
|
||||
}
|
||||
|
||||
self.metadata
|
||||
.insert(VERSION_KEY.to_string(), self.version.to_string());
|
||||
|
||||
let arrow_schema = ArrowSchema::new(self.fields).with_metadata(self.metadata);
|
||||
|
||||
Ok(Schema {
|
||||
column_schemas: self.column_schemas,
|
||||
name_to_index: self.name_to_index,
|
||||
arrow_schema: Arc::new(arrow_schema),
|
||||
timestamp_index: self.timestamp_index,
|
||||
version: self.version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct FieldsAndIndices {
|
||||
fields: Vec<Field>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
timestamp_index: Option<usize>,
|
||||
}
|
||||
|
||||
fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
let mut fields = Vec::with_capacity(column_schemas.len());
|
||||
let mut name_to_index = HashMap::with_capacity(column_schemas.len());
|
||||
let mut timestamp_index = None;
|
||||
for (index, column_schema) in column_schemas.iter().enumerate() {
|
||||
if column_schema.is_time_index() {
|
||||
ensure!(
|
||||
timestamp_index.is_none(),
|
||||
error::DuplicateTimestampIndexSnafu {
|
||||
exists: timestamp_index.unwrap(),
|
||||
new: index,
|
||||
}
|
||||
);
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
let field = Field::try_from(column_schema)?;
|
||||
fields.push(field);
|
||||
name_to_index.insert(column_schema.name.clone(), index);
|
||||
}
|
||||
|
||||
Ok(FieldsAndIndices {
|
||||
fields,
|
||||
name_to_index,
|
||||
timestamp_index,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: usize) -> Result<()> {
|
||||
ensure!(
|
||||
timestamp_index < column_schemas.len(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
|
||||
let column_schema = &column_schemas[timestamp_index];
|
||||
ensure!(
|
||||
column_schema.data_type.is_timestamp_compatible(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
column_schema.is_time_index(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub type SchemaRef = Arc<Schema>;
|
||||
|
||||
impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(arrow_schema: Arc<ArrowSchema>) -> Result<Schema> {
|
||||
let mut column_schemas = Vec::with_capacity(arrow_schema.fields.len());
|
||||
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
|
||||
for field in &arrow_schema.fields {
|
||||
let column_schema = ColumnSchema::try_from(field)?;
|
||||
name_to_index.insert(field.name().to_string(), column_schemas.len());
|
||||
column_schemas.push(column_schema);
|
||||
}
|
||||
|
||||
let mut timestamp_index = None;
|
||||
for (index, column_schema) in column_schemas.iter().enumerate() {
|
||||
if column_schema.is_time_index() {
|
||||
validate_timestamp_index(&column_schemas, index)?;
|
||||
ensure!(
|
||||
timestamp_index.is_none(),
|
||||
error::DuplicateTimestampIndexSnafu {
|
||||
exists: timestamp_index.unwrap(),
|
||||
new: index,
|
||||
}
|
||||
);
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
}
|
||||
|
||||
let version = try_parse_version(&arrow_schema.metadata, VERSION_KEY)?;
|
||||
|
||||
Ok(Self {
|
||||
column_schemas,
|
||||
name_to_index,
|
||||
arrow_schema,
|
||||
timestamp_index,
|
||||
version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ArrowSchema> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(arrow_schema: ArrowSchema) -> Result<Schema> {
|
||||
let arrow_schema = Arc::new(arrow_schema);
|
||||
|
||||
Schema::try_from(arrow_schema)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u32> {
|
||||
if let Some(value) = metadata.get(key) {
|
||||
let version = value
|
||||
.parse()
|
||||
.context(error::ParseSchemaVersionSnafu { value })?;
|
||||
|
||||
Ok(version)
|
||||
} else {
|
||||
Ok(Schema::INITIAL_VERSION)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_build_empty_schema() {
|
||||
let schema = SchemaBuilder::default().build().unwrap();
|
||||
assert_eq!(0, schema.num_columns());
|
||||
assert!(schema.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_no_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), false),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), true),
|
||||
];
|
||||
let schema = Schema::new(column_schemas.clone());
|
||||
|
||||
assert_eq!(2, schema.num_columns());
|
||||
assert!(!schema.is_empty());
|
||||
assert!(schema.timestamp_index().is_none());
|
||||
assert!(schema.timestamp_column().is_none());
|
||||
assert_eq!(Schema::INITIAL_VERSION, schema.version());
|
||||
|
||||
for column_schema in &column_schemas {
|
||||
let found = schema.column_schema_by_name(&column_schema.name).unwrap();
|
||||
assert_eq!(column_schema, found);
|
||||
}
|
||||
assert!(schema.column_schema_by_name("col3").is_none());
|
||||
|
||||
let new_schema = Schema::try_from(schema.arrow_schema().clone()).unwrap();
|
||||
|
||||
assert_eq!(schema, new_schema);
|
||||
assert_eq!(column_schemas, schema.column_schemas());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata() {
|
||||
let column_schemas = vec![ColumnSchema::new(
|
||||
"col1",
|
||||
ConcreteDataType::int32_datatype(),
|
||||
false,
|
||||
)];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.add_metadata("k1", "v1")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!("v1", schema.metadata().get("k1").unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_with_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas.clone())
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||
assert_eq!(&column_schemas[1], schema.timestamp_column().unwrap());
|
||||
assert_eq!(123, schema.version());
|
||||
|
||||
let new_schema = Schema::try_from(schema.arrow_schema().clone()).unwrap();
|
||||
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||
assert_eq!(schema, new_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_wrong_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), false),
|
||||
];
|
||||
assert!(SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.is_err());
|
||||
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), false)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
assert!(SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.is_err());
|
||||
}
|
||||
}
|
||||
305
src/datatypes2/src/schema/column_schema.rs
Normal file
305
src/datatypes2/src/schema/column_schema.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use arrow::datatypes::Field;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
pub type Metadata = BTreeMap<String, String>;
|
||||
|
||||
/// Key used to store whether the column is time index in arrow field's metadata.
|
||||
const TIME_INDEX_KEY: &str = "greptime:time_index";
|
||||
/// Key used to store default constraint in arrow field's metadata.
|
||||
const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
pub name: String,
|
||||
pub data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
is_time_index: bool,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ColumnSchema {
|
||||
pub fn new<T: Into<String>>(
|
||||
name: T,
|
||||
data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
name: name.into(),
|
||||
data_type,
|
||||
is_nullable,
|
||||
is_time_index: false,
|
||||
default_constraint: None,
|
||||
metadata: Metadata::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(&self) -> bool {
|
||||
self.is_time_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_nullable(&self) -> bool {
|
||||
self.is_nullable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
|
||||
self.default_constraint.as_ref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
self.metadata
|
||||
.insert(TIME_INDEX_KEY.to_string(), "true".to_string());
|
||||
} else {
|
||||
self.metadata.remove(TIME_INDEX_KEY);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_default_constraint(
|
||||
mut self,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
) -> Result<Self> {
|
||||
if let Some(constraint) = &default_constraint {
|
||||
constraint.validate(&self.data_type, self.is_nullable)?;
|
||||
}
|
||||
|
||||
self.default_constraint = default_constraint;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
|
||||
match &self.default_constraint {
|
||||
Some(c) => c
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some),
|
||||
None => {
|
||||
if self.is_nullable {
|
||||
// No default constraint, use null as default value.
|
||||
// TODO(yingwen): Use NullVector once it supports setting logical type.
|
||||
ColumnDefaultConstraint::null_value()
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut metadata = field.metadata().cloned().unwrap_or_default();
|
||||
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => {
|
||||
Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
let is_time_index = metadata.contains_key(TIME_INDEX_KEY);
|
||||
|
||||
Ok(ColumnSchema {
|
||||
name: field.name().clone(),
|
||||
data_type,
|
||||
is_nullable: field.is_nullable(),
|
||||
is_time_index,
|
||||
default_constraint,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ColumnSchema> for Field {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
|
||||
let mut metadata = column_schema.metadata.clone();
|
||||
if let Some(value) = &column_schema.default_constraint {
|
||||
// Adds an additional metadata to store the default constraint.
|
||||
let old = metadata.insert(
|
||||
DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
serde_json::to_string(&value).context(error::SerializeSnafu)?,
|
||||
);
|
||||
|
||||
ensure!(
|
||||
old.is_none(),
|
||||
error::DuplicateMetaSnafu {
|
||||
key: DEFAULT_CONSTRAINT_KEY,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Field::new(
|
||||
&column_schema.name,
|
||||
column_schema.data_type.as_arrow_type(),
|
||||
column_schema.is_nullable(),
|
||||
)
|
||||
.with_metadata(Some(metadata)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_column_schema() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name());
|
||||
assert_eq!(ArrowDataType::Int32, *field.data_type());
|
||||
assert!(field.is_nullable());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_default_constraint() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
|
||||
.unwrap();
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name());
|
||||
assert_eq!(ArrowDataType::Int32, *field.data_type());
|
||||
assert!(field.is_nullable());
|
||||
assert_eq!(
|
||||
"{\"Value\":{\"Int32\":99}}",
|
||||
field
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert("k1".to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("v1", field.metadata().unwrap().get("k1").unwrap());
|
||||
assert!(field
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_some());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_duplicate_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
Field::try_from(&column_schema).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_invalid_default_constraint() {
|
||||
ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_default_constraint_try_into_from() {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
|
||||
|
||||
let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
|
||||
let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
|
||||
|
||||
assert_eq!(default_constraint, from_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_create_default_null() {
|
||||
// Implicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
|
||||
// Explicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_no_default() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
assert!(column_schema.create_default_vector(5).unwrap().is_none());
|
||||
}
|
||||
}
|
||||
306
src/datatypes2/src/schema/constraint.rs
Normal file
306
src/datatypes2/src/schema/constraint.rs
Normal file
@@ -0,0 +1,306 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::util;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{Int64Vector, TimestampMillisecondVector, VectorRef};
|
||||
|
||||
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
|
||||
|
||||
/// Column's default constraint.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ColumnDefaultConstraint {
|
||||
// A function invocation
|
||||
// TODO(dennis): we save the function expression here, maybe use a struct in future.
|
||||
Function(String),
|
||||
// A value
|
||||
Value(Value),
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8]> for ColumnDefaultConstraint {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(bytes: &[u8]) -> Result<Self> {
|
||||
let json = String::from_utf8_lossy(bytes);
|
||||
serde_json::from_str(&json).context(error::DeserializeSnafu { json })
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ColumnDefaultConstraint> for Vec<u8> {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(value: ColumnDefaultConstraint) -> std::result::Result<Self, Self::Error> {
|
||||
let s = serde_json::to_string(&value).context(error::SerializeSnafu)?;
|
||||
Ok(s.into_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ColumnDefaultConstraint {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => write!(f, "{}", expr),
|
||||
ColumnDefaultConstraint::Value(v) => write!(f, "{}", v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ColumnDefaultConstraint {
|
||||
/// Returns a default null constraint.
|
||||
pub fn null_value() -> ColumnDefaultConstraint {
|
||||
ColumnDefaultConstraint::Value(Value::Null)
|
||||
}
|
||||
|
||||
/// Check whether the constraint is valid for columns with given `data_type`
|
||||
/// and `is_nullable` attributes.
|
||||
pub fn validate(&self, data_type: &ConcreteDataType, is_nullable: bool) -> Result<()> {
|
||||
ensure!(is_nullable || !self.maybe_null(), error::NullDefaultSnafu);
|
||||
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => {
|
||||
ensure!(
|
||||
expr == CURRENT_TIMESTAMP,
|
||||
error::UnsupportedDefaultExprSnafu { expr }
|
||||
);
|
||||
ensure!(
|
||||
data_type.is_timestamp_compatible(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: "return value of the function must has timestamp type",
|
||||
}
|
||||
);
|
||||
}
|
||||
ColumnDefaultConstraint::Value(v) => {
|
||||
if !v.is_null() {
|
||||
// Whether the value could be nullable has been checked before, only need
|
||||
// to check the type compatibility here.
|
||||
ensure!(
|
||||
data_type.logical_type_id() == v.logical_type_id(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: format!(
|
||||
"column has type {:?} but default value has type {:?}",
|
||||
data_type.logical_type_id(),
|
||||
v.logical_type_id()
|
||||
),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a vector that contains `num_rows` default values for given `data_type`.
|
||||
///
|
||||
/// If `is_nullable` is `true`, then this method would returns error if the created
|
||||
/// default value is null.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `num_rows == 0`.
|
||||
pub fn create_default_vector(
|
||||
&self,
|
||||
data_type: &ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
assert!(num_rows > 0);
|
||||
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => {
|
||||
// Functions should also ensure its return value is not null when
|
||||
// is_nullable is true.
|
||||
match &expr[..] {
|
||||
// TODO(dennis): we only supports current_timestamp right now,
|
||||
// it's better to use a expression framework in future.
|
||||
CURRENT_TIMESTAMP => create_current_timestamp_vector(data_type, num_rows),
|
||||
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
|
||||
}
|
||||
}
|
||||
ColumnDefaultConstraint::Value(v) => {
|
||||
ensure!(is_nullable || !v.is_null(), error::NullDefaultSnafu);
|
||||
|
||||
// TODO(yingwen):
|
||||
// 1. For null value, we could use NullVector once it supports custom logical type.
|
||||
// 2. For non null value, we could use ConstantVector, but it would cause all codes
|
||||
// attempt to downcast the vector fail if they don't check whether the vector is const
|
||||
// first.
|
||||
let mut mutable_vector = data_type.create_mutable_vector(1);
|
||||
mutable_vector.push_value_ref(v.as_value_ref())?;
|
||||
let base_vector = mutable_vector.to_vector();
|
||||
Ok(base_vector.replicate(&[num_rows]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this constraint might creates NULL.
|
||||
fn maybe_null(&self) -> bool {
|
||||
// Once we support more functions, we may return true if given function
|
||||
// could return null.
|
||||
matches!(self, ColumnDefaultConstraint::Value(Value::Null))
|
||||
}
|
||||
}
|
||||
|
||||
fn create_current_timestamp_vector(
|
||||
data_type: &ConcreteDataType,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
// FIXME(yingwen): We should implements cast in VectorOp so we could cast the millisecond vector
|
||||
// to other data type and avoid this match.
|
||||
match data_type {
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampMillisecondVector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
ConcreteDataType::Int64(_) => Ok(Arc::new(Int64Vector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
_ => error::DefaultValueTypeSnafu {
|
||||
reason: format!(
|
||||
"Not support to assign current timestamp to {:?} type",
|
||||
data_type
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
#[test]
|
||||
fn test_null_default_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
assert!(constraint.maybe_null());
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
assert!(!constraint.maybe_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_null_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint.validate(&data_type, false).unwrap_err();
|
||||
constraint.validate(&data_type, true).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_value_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint.validate(&data_type, false).unwrap();
|
||||
constraint.validate(&data_type, true).unwrap();
|
||||
|
||||
constraint
|
||||
.validate(&ConcreteDataType::uint32_datatype(), true)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_function_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap();
|
||||
constraint
|
||||
.validate(&ConcreteDataType::boolean_datatype(), false)
|
||||
.unwrap_err();
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("hello()".to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_null() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 10)
|
||||
.unwrap_err();
|
||||
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, true, 3)
|
||||
.unwrap();
|
||||
assert_eq!(3, v.len());
|
||||
for i in 0..v.len() {
|
||||
assert_eq!(Value::Null, v.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_value() {
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from_values(vec![10; 4]));
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_func() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
// Timestamp type.
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
assert_eq!(4, v.len());
|
||||
assert!(
|
||||
matches!(v.get(0), Value::Timestamp(_)),
|
||||
"v {:?} is not timestamp",
|
||||
v.get(0)
|
||||
);
|
||||
|
||||
// Int64 type.
|
||||
let data_type = ConcreteDataType::int64_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
assert_eq!(4, v.len());
|
||||
assert!(
|
||||
matches!(v.get(0), Value::Int64(_)),
|
||||
"v {:?} is not timestamp",
|
||||
v.get(0)
|
||||
);
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("no".to_string());
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_by_func_and_invalid_type() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
let data_type = ConcreteDataType::boolean_datatype();
|
||||
let err = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, Error::DefaultValueType { .. }), "{:?}", err);
|
||||
}
|
||||
}
|
||||
77
src/datatypes2/src/schema/raw.rs
Normal file
77
src/datatypes2/src/schema/raw.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
|
||||
/// Struct used to serialize and deserialize [`Schema`](crate::schema::Schema).
|
||||
///
|
||||
/// This struct only contains necessary data to recover the Schema.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RawSchema {
|
||||
pub column_schemas: Vec<ColumnSchema>,
|
||||
pub timestamp_index: Option<usize>,
|
||||
pub version: u32,
|
||||
}
|
||||
|
||||
impl TryFrom<RawSchema> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(raw: RawSchema) -> Result<Schema> {
|
||||
SchemaBuilder::try_from(raw.column_schemas)?
|
||||
.version(raw.version)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Schema> for RawSchema {
|
||||
fn from(schema: &Schema) -> RawSchema {
|
||||
RawSchema {
|
||||
column_schemas: schema.column_schemas.clone(),
|
||||
timestamp_index: schema.timestamp_index,
|
||||
version: schema.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_raw_convert() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let raw = RawSchema::from(&schema);
|
||||
let schema_new = Schema::try_from(raw).unwrap();
|
||||
|
||||
assert_eq!(schema, schema_new);
|
||||
}
|
||||
}
|
||||
20
src/datatypes2/src/serialize.rs
Normal file
20
src/datatypes2/src/serialize.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
pub trait Serializable: Send + Sync {
|
||||
/// Serialize a column of value with given type to JSON value
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>>;
|
||||
}
|
||||
135
src/datatypes2/src/timestamp.rs
Normal file
135
src/datatypes2/src/timestamp.rs
Normal file
@@ -0,0 +1,135 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::prelude::{Scalar, Value, ValueRef};
|
||||
use crate::scalars::ScalarRef;
|
||||
use crate::types::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType, WrapperType,
|
||||
};
|
||||
use crate::vectors::{
|
||||
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
|
||||
TimestampSecondVector,
|
||||
};
|
||||
|
||||
macro_rules! define_timestamp_with_unit {
|
||||
($unit: ident) => {
|
||||
paste! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct [<Timestamp $unit>](pub Timestamp);
|
||||
|
||||
impl [<Timestamp $unit>] {
|
||||
pub fn new(val: i64) -> Self {
|
||||
Self(Timestamp::new(val, TimeUnit::$unit))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for [<Timestamp $unit>] {
|
||||
fn default() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for Value {
|
||||
fn from(t: [<Timestamp $unit>]) -> Value {
|
||||
Value::Timestamp(t.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for serde_json::Value {
|
||||
fn from(t: [<Timestamp $unit>]) -> Self {
|
||||
t.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for ValueRef<'static> {
|
||||
fn from(t: [<Timestamp $unit>]) -> Self {
|
||||
ValueRef::Timestamp(t.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for [<Timestamp $unit>] {
|
||||
type VectorType = [<Timestamp $unit Vector>];
|
||||
type RefType<'a> = [<Timestamp $unit>];
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(
|
||||
long: Self::RefType<'long>,
|
||||
) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for [<Timestamp $unit>] {
|
||||
type ScalarType = [<Timestamp $unit>];
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for [<Timestamp $unit>] {
|
||||
type LogicalType = [<Timestamp $unit Type>];
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
Self::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for [<Timestamp $unit>] {
|
||||
fn from(val: i64) -> Self {
|
||||
[<Timestamp $unit>]::from_native(val)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_timestamp_with_unit!(Second);
|
||||
define_timestamp_with_unit!(Millisecond);
|
||||
define_timestamp_with_unit!(Microsecond);
|
||||
define_timestamp_with_unit!(Nanosecond);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_scalar() {
|
||||
let ts = TimestampSecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampMillisecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampMicrosecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampNanosecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
93
src/datatypes2/src/type_id.rs
Normal file
93
src/datatypes2/src/type_id.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Unique identifier for logical data type.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum LogicalTypeId {
|
||||
Null,
|
||||
|
||||
// Numeric types:
|
||||
Boolean,
|
||||
Int8,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
UInt8,
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Float32,
|
||||
Float64,
|
||||
|
||||
// String types:
|
||||
String,
|
||||
Binary,
|
||||
|
||||
// Date & Time types:
|
||||
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
|
||||
/// in days (32 bits).
|
||||
Date,
|
||||
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
TimestampMicrosecond,
|
||||
TimestampNanosecond,
|
||||
|
||||
List,
|
||||
}
|
||||
|
||||
impl LogicalTypeId {
|
||||
/// Create ConcreteDataType based on this id. This method is for test only as it
|
||||
/// would lost some info.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if data type is not supported.
|
||||
#[cfg(any(test, feature = "test"))]
|
||||
pub fn data_type(&self) -> crate::data_type::ConcreteDataType {
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
match self {
|
||||
LogicalTypeId::Null => ConcreteDataType::null_datatype(),
|
||||
LogicalTypeId::Boolean => ConcreteDataType::boolean_datatype(),
|
||||
LogicalTypeId::Int8 => ConcreteDataType::int8_datatype(),
|
||||
LogicalTypeId::Int16 => ConcreteDataType::int16_datatype(),
|
||||
LogicalTypeId::Int32 => ConcreteDataType::int32_datatype(),
|
||||
LogicalTypeId::Int64 => ConcreteDataType::int64_datatype(),
|
||||
LogicalTypeId::UInt8 => ConcreteDataType::uint8_datatype(),
|
||||
LogicalTypeId::UInt16 => ConcreteDataType::uint16_datatype(),
|
||||
LogicalTypeId::UInt32 => ConcreteDataType::uint32_datatype(),
|
||||
LogicalTypeId::UInt64 => ConcreteDataType::uint64_datatype(),
|
||||
LogicalTypeId::Float32 => ConcreteDataType::float32_datatype(),
|
||||
LogicalTypeId::Float64 => ConcreteDataType::float64_datatype(),
|
||||
LogicalTypeId::String => ConcreteDataType::string_datatype(),
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
37
src/datatypes2/src/types.rs
Normal file
37
src/datatypes2/src/types.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
|
||||
mod timestamp_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_type::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
NativeType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
pub use timestamp_type::*;
|
||||
60
src/datatypes2/src/types/binary_type.rs
Normal file
60
src/datatypes2/src/types/binary_type.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BinaryType;
|
||||
|
||||
impl BinaryType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BinaryType {
|
||||
fn name(&self) -> &str {
|
||||
"Binary"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Binary
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
StringBytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::LargeBinary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
59
src/datatypes2/src/types/boolean_type.rs
Normal file
59
src/datatypes2/src/types/boolean_type.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BooleanType;
|
||||
|
||||
impl BooleanType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BooleanType {
|
||||
fn name(&self) -> &str {
|
||||
"Boolean"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Boolean
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
bool::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Boolean
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
90
src/datatypes2/src/types/date_type.rs
Normal file
90
src/datatypes2/src/types/date_type.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Date32Type};
|
||||
use common_time::Date;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{DateVector, DateVectorBuilder, MutableVector, Vector};
|
||||
|
||||
/// Data type for Date (YYYY-MM-DD).
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateType;
|
||||
|
||||
impl DataType for DateType {
|
||||
fn name(&self) -> &str {
|
||||
"Date"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Date
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Date(Default::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date32
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalPrimitiveType for DateType {
|
||||
type ArrowPrimitive = Date32Type;
|
||||
type Native = i32;
|
||||
type Wrapper = Date;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
"Date"
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&DateVector> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast {} to DateVector", vector.vector_type_name(),),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Date>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::Date(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {:?} to Date", other,),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
91
src/datatypes2/src/types/datetime_type.rs
Normal file
91
src/datatypes2/src/types/datetime_type.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Date64Type};
|
||||
use common_time::DateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector};
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::{DateTimeVector, DateTimeVectorBuilder, PrimitiveVector};
|
||||
|
||||
/// Data type for [`DateTime`].
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateTimeType;
|
||||
|
||||
impl DataType for DateTimeType {
|
||||
fn name(&self) -> &str {
|
||||
"DateTime"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::DateTime
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::DateTime(DateTime::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalPrimitiveType for DateTimeType {
|
||||
type ArrowPrimitive = Date64Type;
|
||||
type Native = i64;
|
||||
type Wrapper = DateTime;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
"DateTime"
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to DateTimeVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::DateTime(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {:?} to DateTime", other,),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
95
src/datatypes2/src/types/list_type.rs
Normal file
95
src/datatypes2/src/types/list_type.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::{ListValue, Value};
|
||||
use crate::vectors::{ListVectorBuilder, MutableVector};
|
||||
|
||||
/// Used to represent the List datatype.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ListType {
|
||||
/// The type of List's item.
|
||||
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
|
||||
item_type: Box<ConcreteDataType>,
|
||||
}
|
||||
|
||||
impl Default for ListType {
|
||||
fn default() -> Self {
|
||||
ListType::new(ConcreteDataType::null_datatype())
|
||||
}
|
||||
}
|
||||
|
||||
impl ListType {
|
||||
/// Create a new `ListType` whose item's data type is `item_type`.
|
||||
pub fn new(item_type: ConcreteDataType) -> Self {
|
||||
ListType {
|
||||
item_type: Box::new(item_type),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for ListType {
|
||||
fn name(&self) -> &str {
|
||||
"List"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::List
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::List(ListValue::new(None, *self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
let field = Box::new(Field::new("item", self.item_type.as_arrow_type(), true));
|
||||
ArrowDataType::List(field)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(ListVectorBuilder::with_type_capacity(
|
||||
*self.item_type.clone(),
|
||||
capacity,
|
||||
))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::value::ListValue;
|
||||
|
||||
#[test]
|
||||
fn test_list_type() {
|
||||
let t = ListType::new(ConcreteDataType::boolean_datatype());
|
||||
assert_eq!("List", t.name());
|
||||
assert_eq!(LogicalTypeId::List, t.logical_type_id());
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(None, ConcreteDataType::boolean_datatype())),
|
||||
t.default_value()
|
||||
);
|
||||
assert_eq!(
|
||||
ArrowDataType::List(Box::new(Field::new("item", ArrowDataType::Boolean, true))),
|
||||
t.as_arrow_type()
|
||||
);
|
||||
}
|
||||
}
|
||||
58
src/datatypes2/src/types/null_type.rs
Normal file
58
src/datatypes2/src/types/null_type.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{MutableVector, NullVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct NullType;
|
||||
|
||||
impl NullType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(NullType)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for NullType {
|
||||
fn name(&self) -> &str {
|
||||
"Null"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Null
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Null
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Null
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, _capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(NullVectorBuilder::default())
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
358
src/datatypes2/src/types/primitive_type.rs
Normal file
358
src/datatypes2/src/types/primitive_type.rs
Normal file
@@ -0,0 +1,358 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
|
||||
use common_time::{Date, DateTime};
|
||||
use num::NumCast;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{DateTimeType, DateType};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
/// Data types that can be used as arrow's native type.
|
||||
pub trait NativeType: ArrowNativeType + NumCast {
|
||||
/// Largest numeric type this primitive type can be cast to.
|
||||
type LargestType: NativeType;
|
||||
}
|
||||
|
||||
macro_rules! impl_native_type {
|
||||
($Type: ident, $LargestType: ident) => {
|
||||
impl NativeType for $Type {
|
||||
type LargestType = $LargestType;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_native_type!(u8, u64);
|
||||
impl_native_type!(u16, u64);
|
||||
impl_native_type!(u32, u64);
|
||||
impl_native_type!(u64, u64);
|
||||
impl_native_type!(i8, i64);
|
||||
impl_native_type!(i16, i64);
|
||||
impl_native_type!(i32, i64);
|
||||
impl_native_type!(i64, i64);
|
||||
impl_native_type!(f32, f64);
|
||||
impl_native_type!(f64, f64);
|
||||
|
||||
/// Represents the wrapper type that wraps a native type using the `newtype pattern`,
|
||||
/// such as [Date](`common_time::Date`) is a wrapper type for the underlying native
|
||||
/// type `i32`.
|
||||
pub trait WrapperType:
|
||||
Copy
|
||||
+ Scalar
|
||||
+ PartialEq
|
||||
+ Into<Value>
|
||||
+ Into<ValueRef<'static>>
|
||||
+ Serialize
|
||||
+ Into<serde_json::Value>
|
||||
{
|
||||
/// Logical primitive type that this wrapper type belongs to.
|
||||
type LogicalType: LogicalPrimitiveType<Wrapper = Self, Native = Self::Native>;
|
||||
/// The underlying native type.
|
||||
type Native: NativeType;
|
||||
|
||||
/// Convert native type into this wrapper type.
|
||||
fn from_native(value: Self::Native) -> Self;
|
||||
|
||||
/// Convert this wrapper type into native type.
|
||||
fn into_native(self) -> Self::Native;
|
||||
}
|
||||
|
||||
/// Trait bridging the logical primitive type with [ArrowPrimitiveType].
|
||||
pub trait LogicalPrimitiveType: 'static + Sized {
|
||||
/// Arrow primitive type of this logical type.
|
||||
type ArrowPrimitive: ArrowPrimitiveType<Native = Self::Native>;
|
||||
/// Native (physical) type of this logical type.
|
||||
type Native: NativeType;
|
||||
/// Wrapper type that the vector returns.
|
||||
type Wrapper: WrapperType<LogicalType = Self, Native = Self::Native>
|
||||
+ for<'a> Scalar<VectorType = PrimitiveVector<Self>, RefType<'a> = Self::Wrapper>
|
||||
+ for<'a> ScalarRef<'a, ScalarType = Self::Wrapper>;
|
||||
|
||||
/// Construct the data type struct.
|
||||
fn build_data_type() -> ConcreteDataType;
|
||||
|
||||
/// Return the name of the type.
|
||||
fn type_name() -> &'static str;
|
||||
|
||||
/// Dynamic cast the vector to the concrete vector type.
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>>;
|
||||
|
||||
/// Cast value ref to the primitive type.
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>>;
|
||||
}
|
||||
|
||||
/// A new type for [WrapperType], complement the `Ord` feature for it. Wrapping non ordered
|
||||
/// primitive types like `f32` and `f64` in `OrdPrimitive` can make them be used in places that
|
||||
/// require `Ord`. For example, in `Median` or `Percentile` UDAFs.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct OrdPrimitive<T: WrapperType>(pub T);
|
||||
|
||||
impl<T: WrapperType> OrdPrimitive<T> {
|
||||
pub fn as_primitive(&self) -> T {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> Eq for OrdPrimitive<T> {}
|
||||
|
||||
impl<T: WrapperType> PartialOrd for OrdPrimitive<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> Ord for OrdPrimitive<T> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
Into::<Value>::into(self.0).cmp(&Into::<Value>::into(other.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> From<OrdPrimitive<T>> for Value {
|
||||
fn from(p: OrdPrimitive<T>) -> Self {
|
||||
p.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_wrapper {
|
||||
($Type: ident, $LogicalType: ident) => {
|
||||
impl WrapperType for $Type {
|
||||
type LogicalType = $LogicalType;
|
||||
type Native = $Type;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
value
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_wrapper!(u8, UInt8Type);
|
||||
impl_wrapper!(u16, UInt16Type);
|
||||
impl_wrapper!(u32, UInt32Type);
|
||||
impl_wrapper!(u64, UInt64Type);
|
||||
impl_wrapper!(i8, Int8Type);
|
||||
impl_wrapper!(i16, Int16Type);
|
||||
impl_wrapper!(i32, Int32Type);
|
||||
impl_wrapper!(i64, Int64Type);
|
||||
impl_wrapper!(f32, Float32Type);
|
||||
impl_wrapper!(f64, Float64Type);
|
||||
|
||||
impl WrapperType for Date {
|
||||
type LogicalType = DateType;
|
||||
type Native = i32;
|
||||
|
||||
fn from_native(value: i32) -> Self {
|
||||
Date::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> i32 {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for DateTime {
|
||||
type LogicalType = DateTimeType;
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
DateTime::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_logical_primitive_type {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
|
||||
// `struct DataType;` to ensure the serialized JSON string is compatible with previous
|
||||
// implementation.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct $DataType {}
|
||||
|
||||
impl LogicalPrimitiveType for $DataType {
|
||||
type ArrowPrimitive = arrow::datatypes::$DataType;
|
||||
type Native = $Native;
|
||||
type Wrapper = $Native;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId($DataType::default())
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<$DataType>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$DataType>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<$Native>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! define_non_timestamp_primitive {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
define_logical_primitive_type!($Native, $TypeId, $DataType);
|
||||
|
||||
impl DataType for $DataType {
|
||||
fn name(&self) -> &str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::$TypeId
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
$Native::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::$TypeId
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_non_timestamp_primitive!(u8, UInt8, UInt8Type);
|
||||
define_non_timestamp_primitive!(u16, UInt16, UInt16Type);
|
||||
define_non_timestamp_primitive!(u32, UInt32, UInt32Type);
|
||||
define_non_timestamp_primitive!(u64, UInt64, UInt64Type);
|
||||
define_non_timestamp_primitive!(i8, Int8, Int8Type);
|
||||
define_non_timestamp_primitive!(i16, Int16, Int16Type);
|
||||
define_non_timestamp_primitive!(i32, Int32, Int32Type);
|
||||
define_non_timestamp_primitive!(f32, Float32, Float32Type);
|
||||
define_non_timestamp_primitive!(f64, Float64, Float64Type);
|
||||
|
||||
// Timestamp primitive:
|
||||
define_logical_primitive_type!(i64, Int64, Int64Type);
|
||||
|
||||
impl DataType for Int64Type {
|
||||
fn name(&self) -> &str {
|
||||
"Int64"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Int64
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Int64(0)
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Int64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<Int64Type>::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ord_primitive() {
|
||||
struct Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
heap: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
|
||||
impl<T> Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
self.heap.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test {
|
||||
($Type:ident) => {
|
||||
let mut foo = Foo::<$Type> {
|
||||
heap: BinaryHeap::new(),
|
||||
};
|
||||
foo.push($Type::default());
|
||||
};
|
||||
}
|
||||
|
||||
test!(u8);
|
||||
test!(u16);
|
||||
test!(u32);
|
||||
test!(u64);
|
||||
test!(i8);
|
||||
test!(i16);
|
||||
test!(i32);
|
||||
test!(i64);
|
||||
test!(f32);
|
||||
test!(f64);
|
||||
}
|
||||
}
|
||||
60
src/datatypes2/src/types/string_type.rs
Normal file
60
src/datatypes2/src/types/string_type.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::prelude::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{MutableVector, StringVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct StringType;
|
||||
|
||||
impl StringType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for StringType {
|
||||
fn name(&self) -> &str {
|
||||
"String"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::String
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
StringBytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Utf8
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(StringVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
140
src/datatypes2/src/types/timestamp_type.rs
Normal file
140
src/datatypes2/src/types/timestamp_type.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{
|
||||
DataType as ArrowDataType, TimeUnit as ArrowTimeUnit,
|
||||
TimestampMicrosecondType as ArrowTimestampMicrosecondType,
|
||||
TimestampMillisecondType as ArrowTimestampMillisecondType,
|
||||
TimestampNanosecondType as ArrowTimestampNanosecondType,
|
||||
TimestampSecondType as ArrowTimestampSecondType,
|
||||
};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error;
|
||||
use crate::prelude::{
|
||||
DataType, LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector,
|
||||
};
|
||||
use crate::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::{
|
||||
PrimitiveVector, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
|
||||
TimestampMillisecondVector, TimestampMillisecondVectorBuilder, TimestampNanosecondVector,
|
||||
TimestampNanosecondVectorBuilder, TimestampSecondVector, TimestampSecondVectorBuilder,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[enum_dispatch(DataType)]
|
||||
pub enum TimestampType {
|
||||
Second(TimestampSecondType),
|
||||
Millisecond(TimestampMillisecondType),
|
||||
Microsecond(TimestampMicrosecondType),
|
||||
Nanosecond(TimestampNanosecondType),
|
||||
}
|
||||
|
||||
macro_rules! impl_data_type_for_timestamp {
|
||||
($unit: ident) => {
|
||||
paste! {
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct [<Timestamp $unit Type>];
|
||||
|
||||
impl DataType for [<Timestamp $unit Type>] {
|
||||
fn name(&self) -> &str {
|
||||
stringify!([<Timestamp $unit Type>])
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::[<Timestamp $unit>]
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Timestamp(Timestamp::new(0, TimeUnit::$unit))
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Timestamp(ArrowTimeUnit::$unit, None)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new([<Timestamp $unit Vector Builder>]::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl LogicalPrimitiveType for [<Timestamp $unit Type>] {
|
||||
type ArrowPrimitive = [<Arrow Timestamp $unit Type>];
|
||||
type Native = i64;
|
||||
type Wrapper = [<Timestamp $unit>];
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit(
|
||||
[<Timestamp $unit Type>]::default(),
|
||||
))
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
stringify!([<Timestamp $unit Type>])
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> crate::Result<&PrimitiveVector<Self>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<[<Timestamp $unit Vector>]>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to {}",
|
||||
vector.vector_type_name(), stringify!([<Timestamp $unit Vector>])
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> crate::Result<Option<Self::Wrapper>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::Timestamp(t) => match t.unit() {
|
||||
TimeUnit::$unit => Ok(Some([<Timestamp $unit>](t))),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast Timestamp value with different unit {:?} to {}",
|
||||
other, stringify!([<Timestamp $unit>])
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {:?} to {}", other, stringify!([<Timestamp $unit>])),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_data_type_for_timestamp!(Nanosecond);
|
||||
impl_data_type_for_timestamp!(Second);
|
||||
impl_data_type_for_timestamp!(Millisecond);
|
||||
impl_data_type_for_timestamp!(Microsecond);
|
||||
1275
src/datatypes2/src/value.rs
Normal file
1275
src/datatypes2/src/value.rs
Normal file
File diff suppressed because it is too large
Load Diff
309
src/datatypes2/src/vectors.rs
Normal file
309
src/datatypes2/src/vectors.rs
Normal file
@@ -0,0 +1,309 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::operations::VectorOp;
|
||||
|
||||
mod binary;
|
||||
mod boolean;
|
||||
mod constant;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod list;
|
||||
mod null;
|
||||
mod operations;
|
||||
mod primitive;
|
||||
mod string;
|
||||
mod timestamp;
|
||||
mod validity;
|
||||
|
||||
pub use binary::{BinaryVector, BinaryVectorBuilder};
|
||||
pub use boolean::{BooleanVector, BooleanVectorBuilder};
|
||||
pub use constant::ConstantVector;
|
||||
pub use date::{DateVector, DateVectorBuilder};
|
||||
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
|
||||
pub use helper::Helper;
|
||||
pub use list::{ListIter, ListVector, ListVectorBuilder};
|
||||
pub use null::{NullVector, NullVectorBuilder};
|
||||
pub use primitive::{
|
||||
Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int16Vector,
|
||||
Int16VectorBuilder, Int32Vector, Int32VectorBuilder, Int64Vector, Int64VectorBuilder,
|
||||
Int8Vector, Int8VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
|
||||
UInt16Vector, UInt16VectorBuilder, UInt32Vector, UInt32VectorBuilder, UInt64Vector,
|
||||
UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
|
||||
};
|
||||
pub use string::{StringVector, StringVectorBuilder};
|
||||
pub use timestamp::{
|
||||
TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
|
||||
TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
|
||||
TimestampSecondVector, TimestampSecondVectorBuilder,
|
||||
};
|
||||
pub use validity::Validity;
|
||||
|
||||
// TODO(yingwen): arrow 28.0 implements Clone for all arrays, we could upgrade to it and simplify
|
||||
// some codes in methods such as `to_arrow_array()` and `to_boxed_arrow_array()`.
|
||||
/// Vector of data values.
|
||||
pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
/// Returns the data type of the vector.
|
||||
///
|
||||
/// This may require heap allocation.
|
||||
fn data_type(&self) -> ConcreteDataType;
|
||||
|
||||
fn vector_type_name(&self) -> String;
|
||||
|
||||
/// Returns the vector as [Any](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Returns number of elements in the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert this vector to a new arrow [ArrayRef].
|
||||
fn to_arrow_array(&self) -> ArrayRef;
|
||||
|
||||
/// Convert this vector to a new boxed arrow [Array].
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
|
||||
|
||||
/// Returns the validity of the Array.
|
||||
fn validity(&self) -> Validity;
|
||||
|
||||
/// Returns the memory size of vector.
|
||||
fn memory_size(&self) -> usize;
|
||||
|
||||
/// The number of null slots on this [`Vector`].
|
||||
/// # Implementation
|
||||
/// This is `O(1)`.
|
||||
fn null_count(&self) -> usize;
|
||||
|
||||
/// Returns true when it's a ConstantColumn
|
||||
fn is_const(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns whether row is null.
|
||||
fn is_null(&self, row: usize) -> bool;
|
||||
|
||||
/// If the only value vector can contain is NULL.
|
||||
fn only_null(&self) -> bool {
|
||||
self.null_count() == self.len()
|
||||
}
|
||||
|
||||
/// Slices the `Vector`, returning a new `VectorRef`.
|
||||
///
|
||||
/// # Panics
|
||||
/// This function panics if `offset + length > self.len()`.
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef;
|
||||
|
||||
/// Returns the clone of value at `index`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if `index` is out of bound.
|
||||
fn get(&self, index: usize) -> Value;
|
||||
|
||||
/// Returns the clone of value at `index` or error if `index`
|
||||
/// is out of bound.
|
||||
fn try_get(&self, index: usize) -> Result<Value> {
|
||||
ensure!(
|
||||
index < self.len(),
|
||||
error::BadArrayAccessSnafu {
|
||||
index,
|
||||
size: self.len()
|
||||
}
|
||||
);
|
||||
Ok(self.get(index))
|
||||
}
|
||||
|
||||
/// Returns the reference of value at `index`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if `index` is out of bound.
|
||||
fn get_ref(&self, index: usize) -> ValueRef;
|
||||
}
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
|
||||
/// Mutable vector that could be used to build an immutable vector.
|
||||
pub trait MutableVector: Send + Sync {
|
||||
/// Returns the data type of the vector.
|
||||
fn data_type(&self) -> ConcreteDataType;
|
||||
|
||||
/// Returns the length of the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert to Any, to enable dynamic casting.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Convert to mutable Any, to enable dynamic casting.
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any;
|
||||
|
||||
/// Convert `self` to an (immutable) [VectorRef] and reset `self`.
|
||||
fn to_vector(&mut self) -> VectorRef;
|
||||
|
||||
/// Push value ref to this mutable vector.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()>;
|
||||
|
||||
/// Extend this mutable vector by slice of `vector`.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `offset + length > vector.len()`.
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Helper to define `try_from_arrow_array(array: arrow::array::ArrayRef)` function.
|
||||
macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
($Array: ident, $Vector: ident) => {
|
||||
impl $Vector {
|
||||
pub fn try_from_arrow_array(
|
||||
array: impl AsRef<dyn arrow::array::Array>,
|
||||
) -> crate::error::Result<$Vector> {
|
||||
use snafu::OptionExt;
|
||||
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<$Array>()
|
||||
.with_context(|| crate::error::ConversionSnafu {
|
||||
from: std::format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
|
||||
let concrete_array = $Array::from(data);
|
||||
Ok($Vector::from(concrete_array))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_validity_for_vector {
|
||||
($array: expr) => {
|
||||
Validity::from_array_data($array.data())
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_get_for_vector {
|
||||
($array: expr, $index: ident) => {
|
||||
if $array.is_valid($index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
unsafe { $array.value_unchecked($index).into() }
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_get_ref_for_vector {
|
||||
($array: expr, $index: ident) => {
|
||||
if $array.is_valid($index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
unsafe { $array.value_unchecked($index).into() }
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_extend_for_builder {
|
||||
($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
use snafu::OptionExt;
|
||||
|
||||
let sliced_vector = $vector.slice($offset, $length);
|
||||
let concrete_vector = sliced_vector
|
||||
.as_any()
|
||||
.downcast_ref::<$VectorType>()
|
||||
.with_context(|| crate::error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast vector from {} to {}",
|
||||
$vector.vector_type_name(),
|
||||
stringify!($VectorType)
|
||||
),
|
||||
})?;
|
||||
for value in concrete_vector.iter_data() {
|
||||
$mutable_vector.push(value);
|
||||
}
|
||||
Ok(())
|
||||
}};
|
||||
}
|
||||
|
||||
pub(crate) use {
|
||||
impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
|
||||
impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow::array::{Array, Int32Array, UInt8Array};
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::{Int32Type, LogicalPrimitiveType};
|
||||
use crate::vectors::helper::Helper;
|
||||
|
||||
#[test]
|
||||
fn test_df_columns_to_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let vector = Helper::try_into_vector(df_column).unwrap();
|
||||
assert_eq!(
|
||||
Int32Type::build_data_type().as_arrow_type(),
|
||||
vector.data_type().as_arrow_type()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i32_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
.unwrap();
|
||||
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i8_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
.unwrap();
|
||||
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
|
||||
}
|
||||
}
|
||||
353
src/datatypes2/src/vectors/binary.rs
Normal file
353
src/datatypes2/src/vectors/binary.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{BinaryArray, MutableBinaryArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of binary strings.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct BinaryVector {
|
||||
array: BinaryArray,
|
||||
}
|
||||
|
||||
impl BinaryVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BinaryVector {
|
||||
BinaryVector {
|
||||
array: BinaryArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BinaryArray> for BinaryVector {
|
||||
fn from(array: BinaryArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<Vec<u8>>>> for BinaryVector {
|
||||
fn from(data: Vec<Option<Vec<u8>>>) -> Self {
|
||||
Self {
|
||||
array: BinaryArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for BinaryVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"BinaryVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BinaryArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(BinaryArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
vectors::impl_get_for_vector!(self.array, index)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BinaryVector {
|
||||
type OwnedItem = Vec<u8>;
|
||||
type RefItem<'a> = &'a [u8];
|
||||
type Iter<'a> = ArrayIter<&'a BinaryArray>;
|
||||
type Builder = BinaryVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BinaryVectorBuilder {
|
||||
mutable_array: MutableBinaryArray,
|
||||
}
|
||||
|
||||
impl MutableVector for BinaryVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.mutable_array.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_binary()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, BinaryVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
type VectorType = BinaryVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableBinaryArray::with_capacity(capacity, 0),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BinaryVector {
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for BinaryVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.iter_data()
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null), // if binary vector not present, map to NULL
|
||||
Some(vec) => serde_json::to_value(vec),
|
||||
})
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(BinaryArray, BinaryVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::arrow_array::BinaryArray;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BinaryType;
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_misc() {
|
||||
let v = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
|
||||
assert_eq!(2, v.len());
|
||||
assert_eq!("BinaryVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(128, v.memory_size());
|
||||
|
||||
for i in 0..2 {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Binary(Bytes::from(vec![1, 2, 3])), v.get(i));
|
||||
assert_eq!(ValueRef::Binary(&[1, 2, 3]), v.get_ref(i));
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(2, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::LargeBinary, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_binary_vector_to_json() {
|
||||
let vector = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[[1,2,3],[1,2,3]]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_binary_vector_with_null_to_json() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(&[1, 2, 3]));
|
||||
builder.push(None);
|
||||
builder.push(Some(&[4, 5, 6]));
|
||||
let vector = builder.finish();
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[[1,2,3],null,[4,5,6]]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = BinaryArray::from_iter_values(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = BinaryArray::from(arrow_array.data().clone());
|
||||
let vector = BinaryVector::from(arrow_array);
|
||||
assert_eq!(original, vector.array);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_build_get() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(Some(b"happy"));
|
||||
builder.push(Some(b"world"));
|
||||
builder.push(None);
|
||||
|
||||
let vector = builder.finish();
|
||||
assert_eq!(b"hello", vector.get_data(0).unwrap());
|
||||
assert_eq!(None, vector.get_data(3));
|
||||
|
||||
assert_eq!(Value::Binary(b"hello".as_slice().into()), vector.get(0));
|
||||
assert_eq!(Value::Null, vector.get(3));
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!(b"hello", iter.next().unwrap().unwrap());
|
||||
assert_eq!(b"happy", iter.next().unwrap().unwrap());
|
||||
assert_eq!(b"world", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next().unwrap());
|
||||
assert_eq!(None, iter.next());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_validity() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert!(!validity.is_set(1));
|
||||
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_builder() {
|
||||
let input = BinaryVector::from_slice(&[b"world", b"one", b"two"]);
|
||||
|
||||
let mut builder = BinaryType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::Binary("hello".as_bytes()))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(BinaryVector::from_slice(&[b"hello", b"one", b"two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
371
src/datatypes2/src/vectors/boolean.rs
Normal file
371
src/datatypes2/src/vectors/boolean.rs
Normal file
@@ -0,0 +1,371 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of boolean.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct BooleanVector {
|
||||
array: BooleanArray,
|
||||
}
|
||||
|
||||
impl BooleanVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
pub(crate) fn as_boolean_array(&self) -> &BooleanArray {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BooleanVector {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn false_count(&self) -> usize {
|
||||
self.array.false_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<bool>> for BooleanVector {
|
||||
fn from(data: Vec<bool>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BooleanArray> for BooleanVector {
|
||||
fn from(array: BooleanArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<bool>>> for BooleanVector {
|
||||
fn from(data: Vec<Option<bool>>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanVector {
|
||||
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from_iter(iter),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for BooleanVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::boolean_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"BooleanVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BooleanArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(BooleanArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
vectors::impl_get_for_vector!(self.array, index)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BooleanVector {
|
||||
type OwnedItem = bool;
|
||||
type RefItem<'a> = bool;
|
||||
type Iter<'a> = ArrayIter<&'a BooleanArray>;
|
||||
type Builder = BooleanVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BooleanVectorBuilder {
|
||||
mutable_array: BooleanBuilder,
|
||||
}
|
||||
|
||||
impl MutableVector for BooleanVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::boolean_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.mutable_array.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_boolean()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, BooleanVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BooleanVectorBuilder {
|
||||
type VectorType = BooleanVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: BooleanBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BooleanVector {
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for BooleanVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.iter_data()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(crate::error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(BooleanArray, BooleanVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BooleanType;
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_misc() {
|
||||
let bools = vec![true, false, true, true, false, false, true, true, false];
|
||||
let v = BooleanVector::from(bools.clone());
|
||||
assert_eq!(9, v.len());
|
||||
assert_eq!("BooleanVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(64, v.memory_size());
|
||||
|
||||
for (i, b) in bools.iter().enumerate() {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Boolean(*b), v.get(i));
|
||||
assert_eq!(ValueRef::Boolean(*b), v.get_ref(i));
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(9, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::Boolean, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_boolean_vector_to_json() {
|
||||
let vector = BooleanVector::from(vec![true, false, true, true, false, false]);
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[true,false,true,true,false,false]",
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_boolean_vector_with_null_to_json() {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[true,null,false]",
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec() {
|
||||
let input = vec![false, true, false, true];
|
||||
let vec = BooleanVector::from(input.clone());
|
||||
assert_eq!(4, vec.len());
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(Some(v), vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_iter() {
|
||||
let input = vec![Some(false), Some(true), Some(false), Some(true)];
|
||||
let vec = input.iter().collect::<BooleanVector>();
|
||||
assert_eq!(4, vec.len());
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec_option() {
|
||||
let input = vec![Some(false), Some(true), None, Some(true)];
|
||||
let vec = BooleanVector::from(input.clone());
|
||||
assert_eq!(4, vec.len());
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_build_get() {
|
||||
let input = [Some(true), None, Some(false)];
|
||||
let mut builder = BooleanVectorBuilder::with_capacity(3);
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(input.len(), vector.len());
|
||||
|
||||
let res: Vec<_> = vector.iter_data().collect();
|
||||
assert_eq!(input, &res[..]);
|
||||
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vector.get_data(i));
|
||||
assert_eq!(Value::from(v), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_validity() {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
|
||||
let vector = BooleanVector::from(vec![true, false, false]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_builder() {
|
||||
let input = BooleanVector::from_slice(&[true, false, true]);
|
||||
|
||||
let mut builder = BooleanType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Boolean(true)).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(BooleanVector::from_slice(&[true, false, true]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
218
src/datatypes2/src/vectors/constant.rs
Normal file
218
src/datatypes2/src/vectors/constant.rs
Normal file
@@ -0,0 +1,218 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ConstantVector {
|
||||
length: usize,
|
||||
vector: VectorRef,
|
||||
}
|
||||
|
||||
impl ConstantVector {
|
||||
/// Create a new [ConstantVector].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `vector.len() != 1`.
|
||||
pub fn new(vector: VectorRef, length: usize) -> Self {
|
||||
assert_eq!(1, vector.len());
|
||||
|
||||
// Avoid const recursion.
|
||||
if vector.is_const() {
|
||||
let vec: &ConstantVector = unsafe { Helper::static_cast(&vector) };
|
||||
return Self::new(vec.inner().clone(), length);
|
||||
}
|
||||
Self { vector, length }
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &VectorRef {
|
||||
&self.vector
|
||||
}
|
||||
|
||||
/// Returns the constant value.
|
||||
pub fn get_constant_ref(&self) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_vector(&self, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), self.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return self.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(ConstantVector::new(
|
||||
self.vector.clone(),
|
||||
*offsets.last().unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn filter_vector(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
let length = self.len() - filter.false_count();
|
||||
if length == self.len() {
|
||||
return Ok(Arc::new(self.clone()));
|
||||
}
|
||||
Ok(Arc::new(ConstantVector::new(self.inner().clone(), length)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for ConstantVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
self.vector.data_type()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"ConstantVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.length
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let v = self.vector.replicate(&[self.length]);
|
||||
v.to_arrow_array()
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let v = self.vector.replicate(&[self.length]);
|
||||
v.to_boxed_arrow_array()
|
||||
}
|
||||
|
||||
fn is_const(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
if self.vector.is_null(0) {
|
||||
Validity::all_null(self.length)
|
||||
} else {
|
||||
Validity::all_valid(self.length)
|
||||
}
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.vector.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, _row: usize) -> bool {
|
||||
self.vector.is_null(0)
|
||||
}
|
||||
|
||||
fn only_null(&self) -> bool {
|
||||
self.vector.is_null(0)
|
||||
}
|
||||
|
||||
fn slice(&self, _offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
vector: self.vector.clone(),
|
||||
length,
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, _index: usize) -> Value {
|
||||
self.vector.get(0)
|
||||
}
|
||||
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
if self.only_null() {
|
||||
self.len()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ConstantVector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ConstantVector([{:?}; {}])", self.get(0), self.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for ConstantVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
std::iter::repeat(self.get(0))
|
||||
.take(self.len())
|
||||
.map(serde_json::Value::try_from)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
#[test]
|
||||
fn test_constant_vector_misc() {
|
||||
let a = Int32Vector::from_slice(vec![1]);
|
||||
let c = ConstantVector::new(Arc::new(a), 10);
|
||||
|
||||
assert_eq!("ConstantVector", c.vector_type_name());
|
||||
assert!(c.is_const());
|
||||
assert_eq!(10, c.len());
|
||||
assert!(c.validity().is_all_valid());
|
||||
assert!(!c.only_null());
|
||||
assert_eq!(64, c.memory_size());
|
||||
|
||||
for i in 0..10 {
|
||||
assert!(!c.is_null(i));
|
||||
assert_eq!(Value::Int32(1), c.get(i));
|
||||
}
|
||||
|
||||
let arrow_arr = c.to_arrow_array();
|
||||
assert_eq!(10, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::Int32, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_null_array() {
|
||||
let a = Int32Vector::from_slice(vec![1]);
|
||||
let c = ConstantVector::new(Arc::new(a), 10);
|
||||
|
||||
let s = format!("{:?}", c);
|
||||
assert_eq!(s, "ConstantVector([Int32(1); 10])");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_json() {
|
||||
let a = Int32Vector::from_slice(vec![1]);
|
||||
let c = ConstantVector::new(Arc::new(a), 10);
|
||||
|
||||
let s = serde_json::to_string(&c.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(s, "[1,1,1,1,1,1,1,1,1,1]");
|
||||
}
|
||||
}
|
||||
103
src/datatypes2/src/vectors/date.rs
Normal file
103
src/datatypes2/src/vectors/date.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
// Vector for [`Date`](common_time::Date).
|
||||
pub type DateVector = PrimitiveVector<DateType>;
|
||||
// Builder to build DateVector.
|
||||
pub type DateVectorBuilder = PrimitiveVectorBuilder<DateType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Array;
|
||||
use common_time::date::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::DateType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{Vector, VectorRef};
|
||||
|
||||
#[test]
|
||||
fn test_build_date_vector() {
|
||||
let mut builder = DateVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(Date::new(1)));
|
||||
builder.push(None);
|
||||
builder.push(Some(Date::new(-1)));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(3, vector.len());
|
||||
assert_eq!(Value::Date(Date::new(1)), vector.get(0));
|
||||
assert_eq!(ValueRef::Date(Date::new(1)), vector.get_ref(0));
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(None, vector.get_data(1));
|
||||
assert_eq!(Value::Null, vector.get(1));
|
||||
assert_eq!(ValueRef::Null, vector.get_ref(1));
|
||||
assert_eq!(Some(Date::new(-1)), vector.get_data(2));
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!(Some(Date::new(1)), iter.next().unwrap());
|
||||
assert_eq!(None, iter.next().unwrap());
|
||||
assert_eq!(Some(Date::new(-1)), iter.next().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
assert_eq!(2, vector.len());
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(Some(Date::new(2)), vector.get_data(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_vector_builder() {
|
||||
let input = DateVector::from_slice(&[1, 2, 3]);
|
||||
|
||||
let mut builder = DateType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::Date(Date::new(5)))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[5, 2, 3]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_from_arrow() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date_vector() {
|
||||
let vector = DateVector::from_slice(&[-1, 0, 1]);
|
||||
let serialized_json = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
r#"["1969-12-31","1970-01-01","1970-01-02"]"#,
|
||||
serialized_json
|
||||
);
|
||||
}
|
||||
}
|
||||
116
src/datatypes2/src/vectors/datetime.rs
Normal file
116
src/datatypes2/src/vectors/datetime.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateTimeType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
/// Vector of [`DateTime`](common_time::Date)
|
||||
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
|
||||
/// Builder for [`DateTimeVector`].
|
||||
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use common_time::DateTime;
|
||||
use datafusion_common::from_slice::FromSlice;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{
|
||||
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector() {
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_slice(&[1, 2, 3]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("DateTimeVector", v.vector_type_name());
|
||||
assert_eq!(
|
||||
&arrow::datatypes::DataType::Date64,
|
||||
v.to_arrow_array().data_type()
|
||||
);
|
||||
|
||||
assert_eq!(Some(DateTime::new(1)), v.get_data(0));
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(ValueRef::DateTime(DateTime::new(1)), v.get_ref(0));
|
||||
|
||||
let mut iter = v.iter_data();
|
||||
assert_eq!(Some(DateTime::new(1)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(2)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(3)), iter.next().unwrap());
|
||||
assert!(!v.is_null(0));
|
||||
assert_eq!(64, v.memory_size());
|
||||
|
||||
if let Value::DateTime(d) = v.get(0) {
|
||||
assert_eq!(1, d.val());
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
assert_eq!(
|
||||
"[\"1970-01-01 00:00:01\",\"1970-01-01 00:00:02\",\"1970-01-01 00:00:03\"]",
|
||||
serde_json::to_string(&v.serialize_to_json().unwrap()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector_builder() {
|
||||
let mut builder = DateTimeVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(DateTime::new(1)));
|
||||
builder.push(None);
|
||||
builder.push(Some(DateTime::new(-1)));
|
||||
|
||||
let v = builder.finish();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input = DateTimeVector::from_wrapper_slice(&[
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]);
|
||||
|
||||
let mut builder = DateTimeType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::DateTime(DateTime::new(5)))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice(&[
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_wrapper_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
}
|
||||
228
src/datatypes2/src/vectors/eq.rs
Normal file
228
src/datatypes2/src/vectors/eq.rs
Normal file
@@ -0,0 +1,228 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::TimestampType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, PrimitiveVector,
|
||||
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use crate::with_match_primitive_type_id;
|
||||
|
||||
impl Eq for dyn Vector + '_ {}
|
||||
|
||||
impl PartialEq for dyn Vector + '_ {
|
||||
fn eq(&self, other: &dyn Vector) -> bool {
|
||||
equal(self, other)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<dyn Vector> for Arc<dyn Vector + '_> {
|
||||
fn eq(&self, other: &dyn Vector) -> bool {
|
||||
equal(&**self, other)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! is_vector_eq {
|
||||
($VectorType: ident, $lhs: ident, $rhs: ident) => {{
|
||||
let lhs = $lhs.as_any().downcast_ref::<$VectorType>().unwrap();
|
||||
let rhs = $rhs.as_any().downcast_ref::<$VectorType>().unwrap();
|
||||
|
||||
lhs == rhs
|
||||
}};
|
||||
}
|
||||
|
||||
fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
if lhs.data_type() != rhs.data_type() || lhs.len() != rhs.len() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if lhs.is_const() || rhs.is_const() {
|
||||
// Length has been checked before, so we only need to compare inner
|
||||
// vector here.
|
||||
return equal(
|
||||
&**lhs
|
||||
.as_any()
|
||||
.downcast_ref::<ConstantVector>()
|
||||
.unwrap()
|
||||
.inner(),
|
||||
&**lhs
|
||||
.as_any()
|
||||
.downcast_ref::<ConstantVector>()
|
||||
.unwrap()
|
||||
.inner(),
|
||||
);
|
||||
}
|
||||
|
||||
use crate::data_type::ConcreteDataType::*;
|
||||
|
||||
let lhs_type = lhs.data_type();
|
||||
match lhs.data_type() {
|
||||
Null(_) => true,
|
||||
Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs),
|
||||
Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => {
|
||||
is_vector_eq!(TimestampSecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
is_vector_eq!(TimestampMillisecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
is_vector_eq!(TimestampMicrosecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
is_vector_eq!(TimestampNanosecondVector, lhs, rhs)
|
||||
}
|
||||
},
|
||||
List(_) => is_vector_eq!(ListVector, lhs, rhs),
|
||||
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
|
||||
| Float32(_) | Float64(_) => {
|
||||
with_match_primitive_type_id!(lhs_type.logical_type_id(), |$T| {
|
||||
let lhs = lhs.as_any().downcast_ref::<PrimitiveVector<$T>>().unwrap();
|
||||
let rhs = rhs.as_any().downcast_ref::<PrimitiveVector<$T>>().unwrap();
|
||||
|
||||
lhs == rhs
|
||||
},
|
||||
{
|
||||
unreachable!("should not compare {} with {}", lhs.vector_type_name(), rhs.vector_type_name())
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::{
|
||||
list, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector,
|
||||
NullVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||
};
|
||||
|
||||
fn assert_vector_ref_eq(vector: VectorRef) {
|
||||
let rhs = vector.clone();
|
||||
assert_eq!(vector, rhs);
|
||||
assert_dyn_vector_eq(&*vector, &*rhs);
|
||||
}
|
||||
|
||||
fn assert_dyn_vector_eq(lhs: &dyn Vector, rhs: &dyn Vector) {
|
||||
assert_eq!(lhs, rhs);
|
||||
}
|
||||
|
||||
fn assert_vector_ref_ne(lhs: VectorRef, rhs: VectorRef) {
|
||||
assert_ne!(lhs, rhs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_eq() {
|
||||
assert_vector_ref_eq(Arc::new(BinaryVector::from(vec![
|
||||
Some(b"hello".to_vec()),
|
||||
Some(b"world".to_vec()),
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![true])),
|
||||
5,
|
||||
)));
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMicrosecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampNanosecondVector::from_values([100, 120])));
|
||||
|
||||
let list_vector = list::tests::new_list_vector(&[
|
||||
Some(vec![Some(1), Some(2)]),
|
||||
None,
|
||||
Some(vec![Some(3), Some(4)]),
|
||||
]);
|
||||
assert_vector_ref_eq(Arc::new(list_vector));
|
||||
|
||||
assert_vector_ref_eq(Arc::new(NullVector::new(4)));
|
||||
assert_vector_ref_eq(Arc::new(StringVector::from(vec![
|
||||
Some("hello"),
|
||||
Some("world"),
|
||||
])));
|
||||
|
||||
assert_vector_ref_eq(Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int16Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int64Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Float32Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
|
||||
assert_vector_ref_eq(Arc::new(Float64Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_ne() {
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(BooleanVector::from(vec![true, true])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![true])),
|
||||
5,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![true])),
|
||||
4,
|
||||
)),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![true])),
|
||||
5,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![false])),
|
||||
4,
|
||||
)),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(BooleanVector::from(vec![true])),
|
||||
5,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice(vec![1])),
|
||||
4,
|
||||
)),
|
||||
);
|
||||
assert_vector_ref_ne(Arc::new(NullVector::new(5)), Arc::new(NullVector::new(8)));
|
||||
}
|
||||
}
|
||||
431
src/datatypes2/src/vectors/helper.rs
Normal file
431
src/datatypes2/src/vectors/helper.rs
Normal file
@@ -0,0 +1,431 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Vector helper functions, inspired by databend Series mod
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, StringArray};
|
||||
use arrow::compute;
|
||||
use arrow::compute::kernels::comparison;
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
|
||||
use datafusion_common::ScalarValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Float32Vector,
|
||||
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, ListVector,
|
||||
ListVectorBuilder, MutableVector, NullVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
|
||||
};
|
||||
|
||||
/// Helper functions for `Vector`.
|
||||
pub struct Helper;
|
||||
|
||||
impl Helper {
|
||||
/// Get a pointer to the underlying data of this vectors.
|
||||
/// Can be useful for fast comparisons.
|
||||
/// # Safety
|
||||
/// Assumes that the `vector` is T.
|
||||
pub unsafe fn static_cast<T: Any>(vector: &VectorRef) -> &T {
|
||||
let object = vector.as_ref();
|
||||
debug_assert!(object.as_any().is::<T>());
|
||||
&*(object as *const dyn Vector as *const T)
|
||||
}
|
||||
|
||||
pub fn check_get_scalar<T: Scalar>(vector: &VectorRef) -> Result<&<T as Scalar>::VectorType> {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
std::any::type_name::<T>(),
|
||||
),
|
||||
});
|
||||
arr
|
||||
}
|
||||
|
||||
pub fn check_get<T: 'static + Vector>(vector: &VectorRef) -> Result<&T> {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<T>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
std::any::type_name::<T>(),
|
||||
),
|
||||
});
|
||||
arr
|
||||
}
|
||||
|
||||
pub fn check_get_mutable_vector<T: 'static + MutableVector>(
|
||||
vector: &mut dyn MutableVector,
|
||||
) -> Result<&mut T> {
|
||||
let ty = vector.data_type();
|
||||
let arr = vector
|
||||
.as_mut_any()
|
||||
.downcast_mut()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
ty,
|
||||
std::any::type_name::<T>(),
|
||||
),
|
||||
});
|
||||
arr
|
||||
}
|
||||
|
||||
pub fn check_get_scalar_vector<T: Scalar>(
|
||||
vector: &VectorRef,
|
||||
) -> Result<&<T as Scalar>::VectorType> {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
std::any::type_name::<T>(),
|
||||
),
|
||||
});
|
||||
arr
|
||||
}
|
||||
|
||||
/// Try to cast an arrow scalar value into vector
|
||||
pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
|
||||
let vector = match value {
|
||||
ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
|
||||
ScalarValue::Boolean(v) => {
|
||||
ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Float32(v) => {
|
||||
ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Float64(v) => {
|
||||
ConstantVector::new(Arc::new(Float64Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Int8(v) => {
|
||||
ConstantVector::new(Arc::new(Int8Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Int16(v) => {
|
||||
ConstantVector::new(Arc::new(Int16Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Int32(v) => {
|
||||
ConstantVector::new(Arc::new(Int32Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Int64(v) => {
|
||||
ConstantVector::new(Arc::new(Int64Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::UInt8(v) => {
|
||||
ConstantVector::new(Arc::new(UInt8Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::UInt16(v) => {
|
||||
ConstantVector::new(Arc::new(UInt16Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::UInt32(v) => {
|
||||
ConstantVector::new(Arc::new(UInt32Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::UInt64(v) => {
|
||||
ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
|
||||
ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Binary(v)
|
||||
| ScalarValue::LargeBinary(v)
|
||||
| ScalarValue::FixedSizeBinary(_, v) => {
|
||||
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::List(v, field) => {
|
||||
let item_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
|
||||
if let Some(values) = v {
|
||||
let values = values
|
||||
.into_iter()
|
||||
.map(ScalarValue::try_into)
|
||||
.collect::<Result<_>>()?;
|
||||
let list_value = ListValue::new(Some(Box::new(values)), item_type);
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
let list_vector = builder.to_vector();
|
||||
ConstantVector::new(list_vector, length)
|
||||
}
|
||||
ScalarValue::Date32(v) => {
|
||||
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Date64(v) => {
|
||||
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampSecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMillisecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMicrosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampNanosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Decimal128(_, _, _)
|
||||
| ScalarValue::Time64(_)
|
||||
| ScalarValue::IntervalYearMonth(_)
|
||||
| ScalarValue::IntervalDayTime(_)
|
||||
| ScalarValue::IntervalMonthDayNano(_)
|
||||
| ScalarValue::Struct(_, _)
|
||||
| ScalarValue::Dictionary(_, _) => {
|
||||
return error::ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {}", value),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Arc::new(vector))
|
||||
}
|
||||
|
||||
/// Try to cast an arrow array into vector
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given arrow data type is not supported.
|
||||
pub fn try_into_vector(array: impl AsRef<dyn Array>) -> Result<VectorRef> {
|
||||
Ok(match array.as_ref().data_type() {
|
||||
ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::LargeBinary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int64 => Arc::new(Int64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::UInt8 => Arc::new(UInt8Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::UInt16 => Arc::new(UInt16Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::UInt32 => Arc::new(UInt32Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
|
||||
TimeUnit::Millisecond => {
|
||||
Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
},
|
||||
ArrowDataType::Float16
|
||||
| ArrowDataType::Time32(_)
|
||||
| ArrowDataType::Time64(_)
|
||||
| ArrowDataType::Duration(_)
|
||||
| ArrowDataType::Interval(_)
|
||||
| ArrowDataType::Binary
|
||||
| ArrowDataType::FixedSizeBinary(_)
|
||||
| ArrowDataType::LargeUtf8
|
||||
| ArrowDataType::LargeList(_)
|
||||
| ArrowDataType::FixedSizeList(_, _)
|
||||
| ArrowDataType::Struct(_)
|
||||
| ArrowDataType::Union(_, _, _)
|
||||
| ArrowDataType::Dictionary(_, _)
|
||||
| ArrowDataType::Decimal128(_, _)
|
||||
| ArrowDataType::Decimal256(_, _)
|
||||
| ArrowDataType::Map(_, _) => {
|
||||
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to cast slice of `arrays` to vectors.
|
||||
pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
|
||||
arrays.iter().map(Self::try_into_vector).collect()
|
||||
}
|
||||
|
||||
/// Perform SQL like operation on `names` and a scalar `s`.
|
||||
pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
|
||||
let array = StringArray::from(names);
|
||||
|
||||
let filter = comparison::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
|
||||
Helper::try_into_vector(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::{Field, Int32Type};
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vectors() {
|
||||
let arrays: Vec<ArrayRef> = vec![
|
||||
Arc::new(Int32Array::from(vec![1])),
|
||||
Arc::new(Int32Array::from(vec![2])),
|
||||
Arc::new(Int32Array::from(vec![3])),
|
||||
];
|
||||
let vectors = Helper::try_into_vectors(&arrays);
|
||||
assert!(vectors.is_ok());
|
||||
let vectors = vectors.unwrap();
|
||||
vectors.iter().for_each(|v| assert_eq!(1, v.len()));
|
||||
assert_eq!(Value::Int32(1), vectors[0].get(0));
|
||||
assert_eq!(Value::Int32(2), vectors[1].get(0));
|
||||
assert_eq!(Value::Int32(3), vectors[2].get(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_into_date_vector() {
|
||||
let vector = DateVector::from(vec![Some(1), Some(2), None]);
|
||||
let arrow_array = vector.to_arrow_array();
|
||||
assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
|
||||
let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
|
||||
assert_eq!(vector.len(), vector_converted.len());
|
||||
for i in 0..vector_converted.len() {
|
||||
assert_eq!(vector.get(i), vector_converted.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_date_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
assert_eq!(Value::Date(Date::new(42)), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_datetime_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
assert_eq!(Value::DateTime(DateTime::new(42)), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_list_value() {
|
||||
let value = ScalarValue::List(
|
||||
Some(vec![
|
||||
ScalarValue::Int32(Some(1)),
|
||||
ScalarValue::Int32(Some(2)),
|
||||
]),
|
||||
Box::new(Field::new("item", ArrowDataType::Int32, true)),
|
||||
);
|
||||
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
||||
vector.data_type()
|
||||
);
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
let v = vector.get(i);
|
||||
let items = v.as_list().unwrap().unwrap().items().as_ref().unwrap();
|
||||
assert_eq!(vec![Value::Int32(1), Value::Int32(2)], **items);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_like_utf8() {
|
||||
fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
|
||||
let actual = actual.as_any().downcast_ref::<StringVector>().unwrap();
|
||||
assert_eq!(*actual, StringVector::from(expected));
|
||||
}
|
||||
|
||||
let names: Vec<String> = vec!["greptime", "hello", "public", "world"]
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect();
|
||||
|
||||
let ret = Helper::like_utf8(names.clone(), "%ll%").unwrap();
|
||||
assert_vector(vec!["hello"], &ret);
|
||||
|
||||
let ret = Helper::like_utf8(names.clone(), "%time").unwrap();
|
||||
assert_vector(vec!["greptime"], &ret);
|
||||
|
||||
let ret = Helper::like_utf8(names.clone(), "%ld").unwrap();
|
||||
assert_vector(vec!["world"], &ret);
|
||||
|
||||
let ret = Helper::like_utf8(names, "%").unwrap();
|
||||
assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
|
||||
}
|
||||
|
||||
fn check_try_into_vector(array: impl Array + 'static) {
|
||||
let array: ArrayRef = Arc::new(array);
|
||||
let vector = Helper::try_into_vector(array.clone()).unwrap();
|
||||
assert_eq!(&array, &vector.to_arrow_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vector() {
|
||||
check_try_into_vector(NullArray::new(2));
|
||||
check_try_into_vector(BooleanArray::from(vec![true, false]));
|
||||
check_try_into_vector(LargeBinaryArray::from(vec![
|
||||
"hello".as_bytes(),
|
||||
"world".as_bytes(),
|
||||
]));
|
||||
check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(StringArray::from(vec!["hello", "world"]));
|
||||
check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
|
||||
let data = vec![None, Some(vec![Some(6), Some(7)])];
|
||||
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
check_try_into_vector(list_array);
|
||||
check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
|
||||
}
|
||||
}
|
||||
747
src/datatypes2/src/vectors/list.rs
Normal file
747
src/datatypes2/src/vectors/list.rs
Normal file
@@ -0,0 +1,747 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayData, ArrayRef, BooleanBufferBuilder, Int32BufferBuilder, ListArray,
|
||||
};
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::Result;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::ListType;
|
||||
use crate::value::{ListValue, ListValueRef, Value, ValueRef};
|
||||
use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of Lists, basically backed by Arrow's `ListArray`.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ListVector {
|
||||
array: ListArray,
|
||||
/// The datatype of the items in the list.
|
||||
item_type: ConcreteDataType,
|
||||
}
|
||||
|
||||
impl ListVector {
|
||||
/// Iterate elements as [VectorRef].
|
||||
pub fn values_iter(&self) -> impl Iterator<Item = Result<Option<VectorRef>>> + '_ {
|
||||
self.array
|
||||
.iter()
|
||||
.map(|value_opt| value_opt.map(Helper::try_into_vector).transpose())
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
|
||||
Self {
|
||||
array: ListArray::from(data),
|
||||
item_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for ListVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"ListVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(ListArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(ListArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
if !self.array.is_valid(index) {
|
||||
return Value::Null;
|
||||
}
|
||||
|
||||
let array = &self.array.value(index);
|
||||
let vector = Helper::try_into_vector(array).unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"arrow array with datatype {:?} cannot converted to our vector",
|
||||
array.data_type()
|
||||
)
|
||||
});
|
||||
let values = (0..vector.len())
|
||||
.map(|i| vector.get(i))
|
||||
.collect::<Vec<Value>>();
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
self.item_type.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
ValueRef::List(ListValueRef::Indexed {
|
||||
vector: self,
|
||||
idx: index,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for ListVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
self.array
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
None => Ok(JsonValue::Null),
|
||||
Some(v) => Helper::try_into_vector(v)
|
||||
.and_then(|v| v.serialize_to_json())
|
||||
.map(JsonValue::Array),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ListArray> for ListVector {
|
||||
fn from(array: ListArray) -> Self {
|
||||
let item_type = ConcreteDataType::from_arrow_type(match array.data_type() {
|
||||
ArrowDataType::List(field) => field.data_type(),
|
||||
other => panic!(
|
||||
"Try to create ListVector from an arrow array with type {:?}",
|
||||
other
|
||||
),
|
||||
});
|
||||
Self { array, item_type }
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(ListArray, ListVector);
|
||||
|
||||
pub struct ListIter<'a> {
|
||||
vector: &'a ListVector,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl<'a> ListIter<'a> {
|
||||
fn new(vector: &'a ListVector) -> ListIter {
|
||||
ListIter { vector, idx: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ListIter<'a> {
|
||||
type Item = Option<ListValueRef<'a>>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.idx >= self.vector.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let idx = self.idx;
|
||||
self.idx += 1;
|
||||
|
||||
if self.vector.is_null(idx) {
|
||||
return Some(None);
|
||||
}
|
||||
|
||||
Some(Some(ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.vector.len(), Some(self.vector.len()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for ListVector {
|
||||
type OwnedItem = ListValue;
|
||||
type RefItem<'a> = ListValueRef<'a>;
|
||||
type Iter<'a> = ListIter<'a>;
|
||||
type Builder = ListVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(ListValueRef::Indexed { vector: self, idx })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
ListIter::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Ports from arrow's GenericListBuilder.
|
||||
// See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs
|
||||
/// [ListVector] builder.
|
||||
pub struct ListVectorBuilder {
|
||||
item_type: ConcreteDataType,
|
||||
offsets_builder: Int32BufferBuilder,
|
||||
null_buffer_builder: NullBufferBuilder,
|
||||
values_builder: Box<dyn MutableVector>,
|
||||
}
|
||||
|
||||
impl ListVectorBuilder {
|
||||
/// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity`
|
||||
/// is the number of items to pre-allocate space for in this builder.
|
||||
pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
|
||||
offsets_builder.append(0);
|
||||
// The actual required capacity might be greater than the capacity of the `ListVector`
|
||||
// if the child vector has more than one element.
|
||||
let values_builder = item_type.create_mutable_vector(capacity);
|
||||
|
||||
ListVectorBuilder {
|
||||
item_type,
|
||||
offsets_builder,
|
||||
null_buffer_builder: NullBufferBuilder::new(capacity),
|
||||
values_builder,
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish the current variable-length list vector slot.
|
||||
fn finish_list(&mut self, is_valid: bool) {
|
||||
self.offsets_builder
|
||||
.append(i32::try_from(self.values_builder.len()).unwrap());
|
||||
self.null_buffer_builder.append(is_valid);
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
self.finish_list(false);
|
||||
}
|
||||
|
||||
fn push_list_value(&mut self, list_value: &ListValue) -> Result<()> {
|
||||
if let Some(items) = list_value.items() {
|
||||
for item in &**items {
|
||||
self.values_builder.push_value_ref(item.as_value_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
self.finish_list(true);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for ListVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::list_datatype(self.item_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.null_buffer_builder.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
if let Some(list_ref) = value.as_list()? {
|
||||
match list_ref {
|
||||
ListValueRef::Indexed { vector, idx } => match vector.get(idx).as_list()? {
|
||||
Some(list_value) => self.push_list_value(list_value)?,
|
||||
None => self.push_null(),
|
||||
},
|
||||
ListValueRef::Ref { val } => self.push_list_value(val)?,
|
||||
}
|
||||
} else {
|
||||
self.push_null();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
for idx in offset..offset + length {
|
||||
let value = vector.get_ref(idx);
|
||||
self.push_value_ref(value)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for ListVectorBuilder {
|
||||
type VectorType = ListVector;
|
||||
|
||||
fn with_capacity(_capacity: usize) -> Self {
|
||||
panic!("Must use ListVectorBuilder::with_type_capacity()");
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
// We expect the input ListValue has the same inner type as the builder when using
|
||||
// push(), so just panic if `push_value_ref()` returns error, which indicate an
|
||||
// invalid input value type.
|
||||
self.push_value_ref(value.into()).unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Failed to push value, expect value type {:?}, err:{}",
|
||||
self.item_type, e
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
let len = self.len();
|
||||
let values_vector = self.values_builder.to_vector();
|
||||
let values_arr = values_vector.to_arrow_array();
|
||||
let values_data = values_arr.data();
|
||||
|
||||
let offset_buffer = self.offsets_builder.finish();
|
||||
let null_bit_buffer = self.null_buffer_builder.finish();
|
||||
// Re-initialize the offsets_builder.
|
||||
self.offsets_builder.append(0);
|
||||
let data_type = ConcreteDataType::list_datatype(self.item_type.clone()).as_arrow_type();
|
||||
let array_data_builder = ArrayData::builder(data_type)
|
||||
.len(len)
|
||||
.add_buffer(offset_buffer)
|
||||
.add_child_data(values_data.clone())
|
||||
.null_bit_buffer(null_bit_buffer);
|
||||
|
||||
let array_data = unsafe { array_data_builder.build_unchecked() };
|
||||
let array = ListArray::from(array_data);
|
||||
|
||||
ListVector {
|
||||
array,
|
||||
item_type: self.item_type.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ports from https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/null_buffer_builder.rs
|
||||
/// Builder for creating the null bit buffer.
|
||||
/// This builder only materializes the buffer when we append `false`.
|
||||
/// If you only append `true`s to the builder, what you get will be
|
||||
/// `None` when calling [`finish`](#method.finish).
|
||||
/// This optimization is **very** important for the performance.
|
||||
#[derive(Debug)]
|
||||
struct NullBufferBuilder {
|
||||
bitmap_builder: Option<BooleanBufferBuilder>,
|
||||
/// Store the length of the buffer before materializing.
|
||||
len: usize,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl NullBufferBuilder {
|
||||
/// Creates a new empty builder.
|
||||
/// `capacity` is the number of bits in the null buffer.
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
bitmap_builder: None,
|
||||
len: 0,
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
if let Some(b) = &self.bitmap_builder {
|
||||
b.len()
|
||||
} else {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `true` into the builder
|
||||
/// to indicate that this item is not null.
|
||||
#[inline]
|
||||
fn append_non_null(&mut self) {
|
||||
if let Some(buf) = self.bitmap_builder.as_mut() {
|
||||
buf.append(true)
|
||||
} else {
|
||||
self.len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `false` into the builder
|
||||
/// to indicate that this item is null.
|
||||
#[inline]
|
||||
fn append_null(&mut self) {
|
||||
self.materialize_if_needed();
|
||||
self.bitmap_builder.as_mut().unwrap().append(false);
|
||||
}
|
||||
|
||||
/// Appends a boolean value into the builder.
|
||||
#[inline]
|
||||
fn append(&mut self, not_null: bool) {
|
||||
if not_null {
|
||||
self.append_non_null()
|
||||
} else {
|
||||
self.append_null()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the null buffer and resets the builder.
|
||||
/// Returns `None` if the builder only contains `true`s.
|
||||
fn finish(&mut self) -> Option<Buffer> {
|
||||
let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
|
||||
self.bitmap_builder = None;
|
||||
self.len = 0;
|
||||
buf
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn materialize_if_needed(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
self.materialize()
|
||||
}
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn materialize(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
|
||||
b.append_n(self.len, true);
|
||||
self.bitmap_builder = Some(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow::array::{Int32Array, Int32Builder, ListBuilder};
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::scalars::ScalarRef;
|
||||
use crate::types::ListType;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
pub fn new_list_vector(data: &[Option<Vec<Option<i32>>>]) -> ListVector {
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
let values = vec.iter().map(|v| Value::from(*v)).collect();
|
||||
let values = Some(Box::new(values));
|
||||
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
|
||||
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn new_list_array(data: &[Option<Vec<Option<i32>>>]) -> ListArray {
|
||||
let mut builder = ListBuilder::new(Int32Builder::new());
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
for value_opt in vec {
|
||||
builder.values().append_option(*value_opt);
|
||||
}
|
||||
|
||||
builder.append(true);
|
||||
} else {
|
||||
builder.append(false);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
list_vector.data_type()
|
||||
);
|
||||
assert_eq!("ListVector", list_vector.vector_type_name());
|
||||
assert_eq!(3, list_vector.len());
|
||||
assert!(!list_vector.is_null(0));
|
||||
assert!(list_vector.is_null(1));
|
||||
assert!(!list_vector.is_null(2));
|
||||
|
||||
let arrow_array = new_list_array(&data);
|
||||
assert_eq!(
|
||||
arrow_array,
|
||||
*list_vector
|
||||
.to_arrow_array()
|
||||
.as_any()
|
||||
.downcast_ref::<ListArray>()
|
||||
.unwrap()
|
||||
);
|
||||
let validity = list_vector.validity();
|
||||
assert!(!validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
assert!(validity.is_set(0));
|
||||
assert!(!validity.is_set(1));
|
||||
assert!(validity.is_set(2));
|
||||
assert_eq!(256, list_vector.memory_size());
|
||||
|
||||
let slice = list_vector.slice(0, 2).to_arrow_array();
|
||||
let sliced_array = slice.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(
|
||||
Int32Array::from_iter_values([1, 2, 3]),
|
||||
*sliced_array
|
||||
.value(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Int32Array>()
|
||||
.unwrap()
|
||||
);
|
||||
assert!(sliced_array.is_null(1));
|
||||
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![
|
||||
Value::Int32(1),
|
||||
Value::Int32(2),
|
||||
Value::Int32(3)
|
||||
])),
|
||||
ConcreteDataType::int32_datatype()
|
||||
)),
|
||||
list_vector.get(0)
|
||||
);
|
||||
let value_ref = list_vector.get_ref(0);
|
||||
assert!(matches!(
|
||||
value_ref,
|
||||
ValueRef::List(ListValueRef::Indexed { .. })
|
||||
));
|
||||
let value_ref = list_vector.get_ref(1);
|
||||
if let ValueRef::List(ListValueRef::Indexed { idx, .. }) = value_ref {
|
||||
assert_eq!(1, idx);
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
assert_eq!(Value::Null, list_vector.get(1));
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![
|
||||
Value::Int32(4),
|
||||
Value::Null,
|
||||
Value::Int32(6)
|
||||
])),
|
||||
ConcreteDataType::int32_datatype()
|
||||
)),
|
||||
list_vector.get(2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let arrow_array = new_list_array(&data);
|
||||
let array_ref: ArrayRef = Arc::new(arrow_array);
|
||||
let expect = new_list_vector(&data);
|
||||
|
||||
// Test try from ArrayRef
|
||||
let list_vector = ListVector::try_from_arrow_array(array_ref).unwrap();
|
||||
assert_eq!(expect, list_vector);
|
||||
|
||||
// Test from
|
||||
let arrow_array = new_list_array(&data);
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(expect, list_vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter_list_vector_values() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
list_vector.data_type()
|
||||
);
|
||||
let mut iter = list_vector.values_iter();
|
||||
assert_eq!(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap()
|
||||
);
|
||||
assert!(iter.next().unwrap().unwrap().is_none());
|
||||
assert_eq!(
|
||||
Arc::new(Int32Vector::from(vec![Some(4), None, Some(6)])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap(),
|
||||
);
|
||||
assert!(iter.next().is_none())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_to_json() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
assert_eq!(
|
||||
vec![json!([1, 2, 3]), json!(null), json!([4, null, 6]),],
|
||||
list_vector.serialize_to_json().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector_builder() {
|
||||
let mut builder =
|
||||
ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::List(ListValueRef::Ref {
|
||||
val: &ListValue::new(
|
||||
Some(Box::new(vec![
|
||||
Value::Int32(4),
|
||||
Value::Null,
|
||||
Value::Int32(6),
|
||||
])),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
),
|
||||
}))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
];
|
||||
let input = new_list_vector(&data);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(new_list_vector(&[
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector_for_scalar() {
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2);
|
||||
builder.push(None);
|
||||
builder.push(Some(ListValueRef::Ref {
|
||||
val: &ListValue::new(
|
||||
Some(Box::new(vec![
|
||||
Value::Int32(4),
|
||||
Value::Null,
|
||||
Value::Int32(6),
|
||||
])),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
),
|
||||
}));
|
||||
let vector = builder.finish();
|
||||
|
||||
let expect = new_list_vector(&[None, Some(vec![Some(4), None, Some(6)])]);
|
||||
assert_eq!(expect, vector);
|
||||
|
||||
assert!(vector.get_data(0).is_none());
|
||||
assert_eq!(
|
||||
ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1
|
||||
},
|
||||
vector.get_data(1).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
*vector.get(1).as_list().unwrap().unwrap(),
|
||||
vector.get_data(1).unwrap().to_owned_scalar()
|
||||
);
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert!(iter.next().unwrap().is_none());
|
||||
assert_eq!(
|
||||
ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1
|
||||
},
|
||||
iter.next().unwrap().unwrap()
|
||||
);
|
||||
assert!(iter.next().is_none());
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!(2, iter.size_hint().0);
|
||||
assert_eq!(
|
||||
ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1
|
||||
},
|
||||
iter.nth(1).unwrap().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
282
src/datatypes2/src/vectors/null.rs
Normal file
282
src/datatypes2/src/vectors/null.rs
Normal file
@@ -0,0 +1,282 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayData, ArrayRef, NullArray};
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::NullType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// A vector where all elements are nulls.
|
||||
#[derive(PartialEq)]
|
||||
pub struct NullVector {
|
||||
array: NullArray,
|
||||
}
|
||||
|
||||
// TODO(yingwen): Support null vector with other logical types.
|
||||
impl NullVector {
|
||||
/// Create a new `NullVector` with `n` elements.
|
||||
pub fn new(n: usize) -> Self {
|
||||
Self {
|
||||
array: NullArray::new(n),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NullArray> for NullVector {
|
||||
fn from(array: NullArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for NullVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::Null(NullType::default())
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"NullVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
// TODO(yingwen): Replaced by clone after upgrading to arrow 28.0.
|
||||
let data = self.to_array_data();
|
||||
Arc::new(NullArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(NullArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
Validity::all_null(self.array.len())
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, _row: usize) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn only_null(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn slice(&self, _offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self::new(length))
|
||||
}
|
||||
|
||||
fn get(&self, _index: usize) -> Value {
|
||||
// Skips bound check for null array.
|
||||
Value::Null
|
||||
}
|
||||
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
// Skips bound check for null array.
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NullVector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "NullVector({})", self.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for NullVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(std::iter::repeat(serde_json::Value::Null)
|
||||
.take(self.len())
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(NullArray, NullVector);
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct NullVectorBuilder {
|
||||
length: usize,
|
||||
}
|
||||
|
||||
impl MutableVector for NullVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::null_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.length
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
let vector = Arc::new(NullVector::new(self.length));
|
||||
self.length = 0;
|
||||
vector
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
ensure!(
|
||||
value.is_null(),
|
||||
error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value ref {:?} to null", value),
|
||||
}
|
||||
);
|
||||
|
||||
self.length += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<NullVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to NullVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
assert!(
|
||||
offset + length <= vector.len(),
|
||||
"offset {} + length {} must less than {}",
|
||||
offset,
|
||||
length,
|
||||
vector.len()
|
||||
);
|
||||
|
||||
self.length += length;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_null(vector: &NullVector, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(NullVector::new(*offsets.last().unwrap()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_misc() {
|
||||
let v = NullVector::new(32);
|
||||
|
||||
assert_eq!(v.len(), 32);
|
||||
assert_eq!(0, v.memory_size());
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(arrow_arr.null_count(), 32);
|
||||
|
||||
let array2 = arrow_arr.slice(8, 16);
|
||||
assert_eq!(array2.len(), 16);
|
||||
assert_eq!(array2.null_count(), 16);
|
||||
|
||||
assert_eq!("NullVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_null());
|
||||
assert!(v.only_null());
|
||||
|
||||
for i in 0..32 {
|
||||
assert!(v.is_null(i));
|
||||
assert_eq!(Value::Null, v.get(i));
|
||||
assert_eq!(ValueRef::Null, v.get_ref(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_null_vector() {
|
||||
let array = NullVector::new(1024 * 1024);
|
||||
assert_eq!(format!("{:?}", array), "NullVector(1048576)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_json() {
|
||||
let vector = NullVector::new(3);
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[null,null,null]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_validity() {
|
||||
let vector = NullVector::new(5);
|
||||
assert!(vector.validity().is_all_null());
|
||||
assert_eq!(5, vector.null_count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_builder() {
|
||||
let mut builder = NullType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Null).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = NullVector::new(3);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(input);
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
127
src/datatypes2/src/vectors/operations.rs
Normal file
127
src/datatypes2/src/vectors/operations.rs
Normal file
@@ -0,0 +1,127 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod filter;
|
||||
mod find_unique;
|
||||
mod replicate;
|
||||
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ListVector, NullVector, PrimitiveVector, StringVector, Vector,
|
||||
VectorRef,
|
||||
};
|
||||
|
||||
/// Vector compute operations.
|
||||
pub trait VectorOp {
|
||||
/// Copies each element according `offsets` parameter.
|
||||
/// - `i-th` element should be copied `offsets[i] - offsets[i - 1]` times
|
||||
/// - `0-th` element would be copied `offsets[0]` times
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `offsets.len() != self.len()`.
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef;
|
||||
|
||||
/// Mark `i-th` bit of `selected` to `true` if the `i-th` element of `self` is unique, which
|
||||
/// means there is no elements behind it have same value as it.
|
||||
///
|
||||
/// The caller should ensure
|
||||
/// 1. the length of `selected` bitmap is equal to `vector.len()`.
|
||||
/// 2. `vector` and `prev_vector` are sorted.
|
||||
///
|
||||
/// If there are multiple duplicate elements, this function retains the **first** element.
|
||||
/// The first element is considered as unique if the first element of `self` is different
|
||||
/// from its previous element, that is the last element of `prev_vector`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if
|
||||
/// - `selected.len() < self.len()`.
|
||||
/// - `prev_vector` and `self` have different data types.
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>);
|
||||
|
||||
/// Filters the vector, returns elements matching the `filter` (i.e. where the values are true).
|
||||
///
|
||||
/// Note that the nulls of `filter` are interpreted as `false` will lead to these elements being masked out.
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef>;
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_vector_op {
|
||||
($($VectorType: ident),+) => {$(
|
||||
impl VectorOp for $VectorType {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::replicate_scalar(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.map(|pv| pv.as_any().downcast_ref::<$VectorType>().unwrap());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, $VectorType, filter)
|
||||
}
|
||||
}
|
||||
)+};
|
||||
}
|
||||
|
||||
impl_scalar_vector_op!(BinaryVector, BooleanVector, ListVector, StringVector);
|
||||
|
||||
impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
std::sync::Arc::new(replicate::replicate_primitive(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorOp for NullVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::replicate_null(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<NullVector>());
|
||||
find_unique::find_unique_null(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, NullVector, filter)
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorOp for ConstantVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
self.replicate_vector(offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
self.filter_vector(filter)
|
||||
}
|
||||
}
|
||||
145
src/datatypes2/src/vectors/operations/filter.rs
Normal file
145
src/datatypes2/src/vectors/operations/filter.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
macro_rules! filter_non_constant {
|
||||
($vector: expr, $VectorType: ty, $filter: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute;
|
||||
use snafu::ResultExt;
|
||||
|
||||
let arrow_array = $vector.as_arrow();
|
||||
let filtered = compute::filter(arrow_array, $filter.as_boolean_array())
|
||||
.context(crate::error::ArrowComputeSnafu)?;
|
||||
Ok(Arc::new(<$VectorType>::try_from_arrow_array(filtered)?))
|
||||
}};
|
||||
}
|
||||
|
||||
pub(crate) use filter_non_constant;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use crate::types::WrapperType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BooleanVector, Int32Vector, NullVector, StringVector, VectorOp, VectorRef,
|
||||
};
|
||||
|
||||
fn check_filter_primitive(expect: &[i32], input: &[i32], filter: &[bool]) {
|
||||
let v = Int32Vector::from_slice(&input);
|
||||
let filter = BooleanVector::from_slice(filter);
|
||||
let out = v.filter(&filter).unwrap();
|
||||
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from_slice(&expect));
|
||||
assert_eq!(expect, out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_primitive() {
|
||||
check_filter_primitive(&[], &[], &[]);
|
||||
check_filter_primitive(&[5], &[5], &[true]);
|
||||
check_filter_primitive(&[], &[5], &[false]);
|
||||
check_filter_primitive(&[], &[5, 6], &[false, false]);
|
||||
check_filter_primitive(&[5, 6], &[5, 6], &[true, true]);
|
||||
check_filter_primitive(&[], &[5, 6, 7], &[false, false, false]);
|
||||
check_filter_primitive(&[5], &[5, 6, 7], &[true, false, false]);
|
||||
check_filter_primitive(&[6], &[5, 6, 7], &[false, true, false]);
|
||||
check_filter_primitive(&[7], &[5, 6, 7], &[false, false, true]);
|
||||
check_filter_primitive(&[5, 7], &[5, 6, 7], &[true, false, true]);
|
||||
}
|
||||
|
||||
fn check_filter_constant(expect_length: usize, input_length: usize, filter: &[bool]) {
|
||||
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[123])), input_length);
|
||||
let filter = BooleanVector::from_slice(filter);
|
||||
let out = v.filter(&filter).unwrap();
|
||||
|
||||
assert!(out.is_const());
|
||||
assert_eq!(expect_length, out.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_constant() {
|
||||
check_filter_constant(0, 0, &[]);
|
||||
check_filter_constant(1, 1, &[true]);
|
||||
check_filter_constant(0, 1, &[false]);
|
||||
check_filter_constant(1, 2, &[false, true]);
|
||||
check_filter_constant(2, 2, &[true, true]);
|
||||
check_filter_constant(1, 4, &[false, false, false, true]);
|
||||
check_filter_constant(2, 4, &[false, true, false, true]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_scalar() {
|
||||
let v = StringVector::from_slice(&["0", "1", "2", "3"]);
|
||||
let filter = BooleanVector::from_slice(&[false, true, false, true]);
|
||||
let out = v.filter(&filter).unwrap();
|
||||
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["1", "3"]));
|
||||
assert_eq!(expect, out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_null() {
|
||||
let v = NullVector::new(5);
|
||||
let filter = BooleanVector::from_slice(&[false, true, false, true, true]);
|
||||
let out = v.filter(&filter).unwrap();
|
||||
|
||||
let expect: VectorRef = Arc::new(NullVector::new(3));
|
||||
assert_eq!(expect, out);
|
||||
}
|
||||
|
||||
macro_rules! impl_filter_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use $crate::vectors::{$VectorType, VectorRef};
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
let filter = BooleanVector::from_slice(&[false, true, false, true, true]);
|
||||
let out = v.filter(&filter).unwrap();
|
||||
|
||||
let expect: VectorRef = Arc::new($VectorType::from_iterator(
|
||||
[1, 3, 4].into_iter().map($ValueType::$method),
|
||||
));
|
||||
assert_eq!(expect, out);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_date_like() {
|
||||
impl_filter_date_like_test!(DateVector, Date, new);
|
||||
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
TimestampMillisecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMicrosecondVector,
|
||||
TimestampMicrosecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native);
|
||||
}
|
||||
}
|
||||
367
src/datatypes2/src/vectors/operations/find_unique.rs
Normal file
367
src/datatypes2/src/vectors/operations/find_unique.rs
Normal file
@@ -0,0 +1,367 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{NullVector, Vector};
|
||||
|
||||
// To implement `find_unique()` correctly, we need to keep in mind that always marks an element as
|
||||
// selected when it is different from the previous one, and leaves the `selected` unchanged
|
||||
// in any other case.
|
||||
pub(crate) fn find_unique_scalar<'a, T: ScalarVector>(
|
||||
vector: &'a T,
|
||||
selected: &'a mut BitVec,
|
||||
prev_vector: Option<&'a T>,
|
||||
) where
|
||||
T::RefItem<'a>: PartialEq,
|
||||
{
|
||||
assert!(selected.len() >= vector.len());
|
||||
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for ((i, current), next) in vector
|
||||
.iter_data()
|
||||
.enumerate()
|
||||
.zip(vector.iter_data().skip(1))
|
||||
{
|
||||
if current != next {
|
||||
// If next element is a different element, we mark it as selected.
|
||||
selected.set(i + 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Marks first element as selected if it is different from previous element, otherwise
|
||||
// keep selected bitmap unchanged.
|
||||
let is_first_not_duplicate = prev_vector
|
||||
.map(|pv| {
|
||||
if pv.is_empty() {
|
||||
true
|
||||
} else {
|
||||
let last = pv.get_data(pv.len() - 1);
|
||||
last != vector.get_data(0)
|
||||
}
|
||||
})
|
||||
.unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_unique_null(
|
||||
vector: &NullVector,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&NullVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector.map(NullVector::is_empty).unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_unique_constant(
|
||||
vector: &ConstantVector,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&ConstantVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector
|
||||
.map(|pv| {
|
||||
if pv.is_empty() {
|
||||
true
|
||||
} else {
|
||||
vector.get_constant_ref() != pv.get_constant_ref()
|
||||
}
|
||||
})
|
||||
.unwrap_or(true);
|
||||
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use super::*;
|
||||
use crate::timestamp::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, Vector, VectorOp};
|
||||
|
||||
fn check_bitmap(expect: &[bool], selected: &BitVec) {
|
||||
let actual = selected.iter().collect::<Vec<_>>();
|
||||
assert_eq!(expect, actual);
|
||||
}
|
||||
|
||||
fn check_find_unique_scalar(expect: &[bool], input: &[i32], prev: Option<&[i32]>) {
|
||||
check_find_unique_scalar_opt(expect, input.iter().map(|v| Some(*v)), prev);
|
||||
}
|
||||
|
||||
fn check_find_unique_scalar_opt(
|
||||
expect: &[bool],
|
||||
input: impl Iterator<Item = Option<i32>>,
|
||||
prev: Option<&[i32]>,
|
||||
) {
|
||||
let input = Int32Vector::from(input.collect::<Vec<_>>());
|
||||
let prev = prev.map(Int32Vector::from_slice);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
|
||||
check_bitmap(expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar() {
|
||||
check_find_unique_scalar(&[], &[], None);
|
||||
check_find_unique_scalar(&[true], &[1], None);
|
||||
check_find_unique_scalar(&[true, false], &[1, 1], None);
|
||||
check_find_unique_scalar(&[true, true], &[1, 2], None);
|
||||
check_find_unique_scalar(&[true, true, true, true], &[1, 2, 3, 4], None);
|
||||
check_find_unique_scalar(&[true, false, true, false], &[1, 1, 3, 3], None);
|
||||
check_find_unique_scalar(&[true, false, false, false, true], &[2, 2, 2, 2, 3], None);
|
||||
|
||||
check_find_unique_scalar(&[true], &[5], Some(&[]));
|
||||
check_find_unique_scalar(&[true], &[5], Some(&[3]));
|
||||
check_find_unique_scalar(&[false], &[5], Some(&[5]));
|
||||
check_find_unique_scalar(&[false], &[5], Some(&[4, 5]));
|
||||
check_find_unique_scalar(&[false, true], &[5, 6], Some(&[4, 5]));
|
||||
check_find_unique_scalar(&[false, true, false], &[5, 6, 6], Some(&[4, 5]));
|
||||
check_find_unique_scalar(
|
||||
&[false, true, false, true, true],
|
||||
&[5, 6, 6, 7, 8],
|
||||
Some(&[4, 5]),
|
||||
);
|
||||
|
||||
check_find_unique_scalar_opt(
|
||||
&[true, true, false, true, false],
|
||||
[Some(1), Some(2), Some(2), None, None].into_iter(),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar_multi_times_with_prev() {
|
||||
let prev = Int32Vector::from_slice(&[1]);
|
||||
|
||||
let v1 = Int32Vector::from_slice(&[2, 3, 4]);
|
||||
let mut selected = BitVec::repeat(false, v1.len());
|
||||
v1.find_unique(&mut selected, Some(&prev));
|
||||
|
||||
// Though element in v2 are the same as prev, but we should still keep them.
|
||||
let v2 = Int32Vector::from_slice(&[1, 1, 1]);
|
||||
v2.find_unique(&mut selected, Some(&prev));
|
||||
|
||||
check_bitmap(&[true, true, true], &selected);
|
||||
}
|
||||
|
||||
fn new_bitmap(bits: &[bool]) -> BitVec {
|
||||
BitVec::from_iter(bits)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar_with_prev() {
|
||||
let prev = Int32Vector::from_slice(&[1]);
|
||||
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice(&[2, 3, 4, 5]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// All elements are different.
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice(&[1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// Though first element is duplicate, but we keep the flag unchanged.
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
// Same case as above, but now `prev` is None.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice(&[1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
// Same case as above, but now `prev` is empty.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice(&[1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, Some(&Int32Vector::from_slice(&[])));
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
let mut selected = new_bitmap(&[false, false, false, false]);
|
||||
let v = Int32Vector::from_slice(&[2, 2, 4, 5]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// only v[1] is duplicate.
|
||||
check_bitmap(&[true, false, true, true], &selected);
|
||||
}
|
||||
|
||||
fn check_find_unique_null(len: usize) {
|
||||
let input = NullVector::new(len);
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
if !expect.is_empty() {
|
||||
expect[0] = true;
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
let prev = Some(NullVector::new(1));
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
let expect = vec![false; len];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_null() {
|
||||
for len in 0..5 {
|
||||
check_find_unique_null(len);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_null_with_prev() {
|
||||
let prev = NullVector::new(1);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = NullVector::new(4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[false, false, true, false], &selected);
|
||||
|
||||
// Prev is None, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Prev is empty, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&NullVector::new(0)));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
}
|
||||
|
||||
fn check_find_unique_constant(len: usize) {
|
||||
let input = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[8])), len);
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
if !expect.is_empty() {
|
||||
expect[0] = true;
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
let prev = Some(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice(&[8])),
|
||||
1,
|
||||
));
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
let expect = vec![false; len];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_constant() {
|
||||
for len in 0..5 {
|
||||
check_find_unique_constant(len);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_constant_with_prev() {
|
||||
let prev = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[1])), 1);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[1])), 4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[false, false, true, false], &selected);
|
||||
|
||||
// Prev is None, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Prev is empty, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(
|
||||
&mut selected,
|
||||
Some(&ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice(&[1])),
|
||||
0,
|
||||
)),
|
||||
);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Different constant vector.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[2])), 4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_string() {
|
||||
let input = StringVector::from_slice(&["a", "a", "b", "c"]);
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
input.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
macro_rules! impl_find_unique_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method));
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
v.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from);
|
||||
}
|
||||
}
|
||||
170
src/datatypes2/src/vectors/operations/replicate.rs
Normal file
170
src/datatypes2/src/vectors/operations/replicate.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::prelude::*;
|
||||
pub(crate) use crate::vectors::null::replicate_null;
|
||||
pub(crate) use crate::vectors::primitive::replicate_primitive;
|
||||
|
||||
pub(crate) fn replicate_scalar<C: ScalarVector>(c: &C, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), c.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return c.slice(0, 0);
|
||||
}
|
||||
let mut builder = <<C as ScalarVector>::Builder>::with_capacity(c.len());
|
||||
|
||||
let mut previous_offset = 0;
|
||||
for (i, offset) in offsets.iter().enumerate() {
|
||||
let data = c.get_data(i);
|
||||
for _ in previous_offset..*offset {
|
||||
builder.push(data);
|
||||
}
|
||||
previous_offset = *offset;
|
||||
}
|
||||
builder.to_vector()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{Int32Vector, NullVector, StringVector, VectorOp};
|
||||
|
||||
#[test]
|
||||
fn test_replicate_primitive() {
|
||||
let v = Int32Vector::from_iterator(0..5);
|
||||
let offsets = [0, 1, 2, 3, 4];
|
||||
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(4, v.len());
|
||||
|
||||
for i in 0..4 {
|
||||
assert_eq!(Value::Int32(i as i32 + 1), v.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_nullable_primitive() {
|
||||
let v = Int32Vector::from(vec![None, Some(1), None, Some(2)]);
|
||||
let offsets = [2, 4, 6, 8];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(8, v.len());
|
||||
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from(vec![
|
||||
None,
|
||||
None,
|
||||
Some(1),
|
||||
Some(1),
|
||||
None,
|
||||
None,
|
||||
Some(2),
|
||||
Some(2),
|
||||
]));
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_scalar() {
|
||||
let v = StringVector::from_slice(&["0", "1", "2", "3"]);
|
||||
let offsets = [1, 3, 5, 6];
|
||||
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(6, v.len());
|
||||
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["0", "1", "1", "2", "2", "3"]));
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_constant() {
|
||||
let v = Arc::new(StringVector::from_slice(&["hello"]));
|
||||
let cv = ConstantVector::new(v.clone(), 2);
|
||||
let offsets = [1, 4];
|
||||
|
||||
let cv = cv.replicate(&offsets);
|
||||
assert_eq!(4, cv.len());
|
||||
|
||||
let expect: VectorRef = Arc::new(ConstantVector::new(v, 4));
|
||||
assert_eq!(expect, cv);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_null() {
|
||||
let v = NullVector::new(0);
|
||||
let offsets = [];
|
||||
let v = v.replicate(&offsets);
|
||||
assert!(v.is_empty());
|
||||
|
||||
let v = NullVector::new(3);
|
||||
let offsets = [1, 3, 5];
|
||||
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(5, v.len());
|
||||
}
|
||||
|
||||
macro_rules! impl_replicate_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
let offsets = [0, 1, 2, 3, 4];
|
||||
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(4, v.len());
|
||||
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
Value::$ValueType($ValueType::$method((i as i32 + 1).into())),
|
||||
v.get(i)
|
||||
);
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! impl_replicate_timestamp_test {
|
||||
($unit: ident) => {{
|
||||
paste!{
|
||||
use $crate::vectors::[<Timestamp $unit Vector>];
|
||||
use $crate::timestamp::[<Timestamp $unit>];
|
||||
let v = [<Timestamp $unit Vector>]::from_iterator((0..5).map([<Timestamp $unit>]::from));
|
||||
let offsets = [0, 1, 2, 3, 4];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(4, v.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(i as i64 + 1, TimeUnit::$unit)),
|
||||
v.get(i)
|
||||
);
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_date_like() {
|
||||
impl_replicate_date_like_test!(DateVector, Date, new);
|
||||
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_replicate_timestamp_test!(Second);
|
||||
impl_replicate_timestamp_test!(Millisecond);
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
impl_replicate_timestamp_test!(Nanosecond);
|
||||
}
|
||||
}
|
||||
552
src/datatypes2/src/vectors/primitive.rs
Normal file
552
src/datatypes2/src/vectors/primitive.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, PrimitiveArray, PrimitiveBuilder,
|
||||
};
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<UInt8Type>;
|
||||
pub type UInt16Vector = PrimitiveVector<UInt16Type>;
|
||||
pub type UInt32Vector = PrimitiveVector<UInt32Type>;
|
||||
pub type UInt64Vector = PrimitiveVector<UInt64Type>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<Int8Type>;
|
||||
pub type Int16Vector = PrimitiveVector<Int16Type>;
|
||||
pub type Int32Vector = PrimitiveVector<Int32Type>;
|
||||
pub type Int64Vector = PrimitiveVector<Int64Type>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<Float32Type>;
|
||||
pub type Float64Vector = PrimitiveVector<Float64Type>;
|
||||
|
||||
/// Vector for primitive data types.
|
||||
pub struct PrimitiveVector<T: LogicalPrimitiveType> {
|
||||
array: PrimitiveArray<T::ArrowPrimitive>,
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<T::ArrowPrimitive>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(data);
|
||||
Ok(Self::new(concrete_array))
|
||||
}
|
||||
|
||||
pub fn from_slice<P: AsRef<[T::Native]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied();
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_wrapper_slice<P: AsRef<[T::Wrapper]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied().map(WrapperType::into_native);
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_vec(array: Vec<T::Native>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(array),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &PrimitiveArray<T::ArrowPrimitive> {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
// To distinguish with `Vector::slice()`.
|
||||
fn get_slice(&self, offset: usize, length: usize) -> Self {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Self::from_array_data(data)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
format!("{}Vector", T::type_name())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> fmt::Debug for PrimitiveVector<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("PrimitiveVector")
|
||||
.field("array", &self.array)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> From<PrimitiveArray<T::ArrowPrimitive>> for PrimitiveVector<T> {
|
||||
fn from(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> From<Vec<Option<T::Native>>> for PrimitiveVector<T> {
|
||||
fn from(v: Vec<Option<T::Native>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveIter<'a, T: LogicalPrimitiveType> {
|
||||
iter: ArrayIter<&'a PrimitiveArray<T::ArrowPrimitive>>,
|
||||
}
|
||||
|
||||
impl<'a, T: LogicalPrimitiveType> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T::Wrapper>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T::Wrapper>> {
|
||||
self.iter
|
||||
.next()
|
||||
.map(|item| item.map(T::Wrapper::from_native))
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> ScalarVector for PrimitiveVector<T> {
|
||||
type OwnedItem = T::Wrapper;
|
||||
type RefItem<'a> = T::Wrapper;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(T::Wrapper::from_native(self.array.value(idx)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
PrimitiveIter {
|
||||
iter: self.array.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> Serializable for PrimitiveVector<T> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
let res = self
|
||||
.iter_data()
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
// use WrapperType's Into<serde_json::Value> bound instead of
|
||||
// serde_json::to_value to facilitate customized serialization
|
||||
// for WrapperType
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> PartialEq for PrimitiveVector<T> {
|
||||
fn eq(&self, other: &PrimitiveVector<T>) -> bool {
|
||||
self.array == other.array
|
||||
}
|
||||
}
|
||||
|
||||
pub type UInt8VectorBuilder = PrimitiveVectorBuilder<UInt8Type>;
|
||||
pub type UInt16VectorBuilder = PrimitiveVectorBuilder<UInt16Type>;
|
||||
pub type UInt32VectorBuilder = PrimitiveVectorBuilder<UInt32Type>;
|
||||
pub type UInt64VectorBuilder = PrimitiveVectorBuilder<UInt64Type>;
|
||||
|
||||
pub type Int8VectorBuilder = PrimitiveVectorBuilder<Int8Type>;
|
||||
pub type Int16VectorBuilder = PrimitiveVectorBuilder<Int16Type>;
|
||||
pub type Int32VectorBuilder = PrimitiveVectorBuilder<Int32Type>;
|
||||
pub type Int64VectorBuilder = PrimitiveVectorBuilder<Int64Type>;
|
||||
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<Float32Type>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<Float64Type>;
|
||||
|
||||
/// Builder to build a primitive vector.
|
||||
pub struct PrimitiveVectorBuilder<T: LogicalPrimitiveType> {
|
||||
mutable_array: PrimitiveBuilder<T::ArrowPrimitive>,
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.mutable_array.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
let primitive = T::cast_value_ref(value)?;
|
||||
match primitive {
|
||||
Some(v) => self.mutable_array.append_value(v.into_native()),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let primitive = T::cast_vector(vector)?;
|
||||
// Slice the underlying array to avoid creating a new Arc.
|
||||
let slice = primitive.get_slice(offset, length);
|
||||
for v in slice.iter_data() {
|
||||
self.push(v);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ScalarVectorBuilder for PrimitiveVectorBuilder<T>
|
||||
where
|
||||
T: LogicalPrimitiveType,
|
||||
T::Wrapper: Scalar<VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T::Wrapper: ScalarRef<'a, ScalarType = T::Wrapper>,
|
||||
for<'a> T::Wrapper: Scalar<RefType<'a> = T::Wrapper>,
|
||||
{
|
||||
type VectorType = PrimitiveVector<T>;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: PrimitiveBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array
|
||||
.append_option(value.map(|v| v.into_native()));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
PrimitiveVector {
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
|
||||
vector: &PrimitiveVector<T>,
|
||||
offsets: &[usize],
|
||||
) -> PrimitiveVector<T> {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.get_slice(0, 0);
|
||||
}
|
||||
|
||||
let mut builder = PrimitiveVectorBuilder::<T>::with_capacity(*offsets.last().unwrap() as usize);
|
||||
|
||||
let mut previous_offset = 0;
|
||||
|
||||
for (offset, value) in offsets.iter().zip(vector.array.iter()) {
|
||||
let repeat_times = *offset - previous_offset;
|
||||
match value {
|
||||
Some(data) => {
|
||||
unsafe {
|
||||
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
|
||||
builder
|
||||
.mutable_array
|
||||
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
builder.mutable_array.append_nulls(repeat_times);
|
||||
}
|
||||
}
|
||||
previous_offset = *offset;
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::Int32Array;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::Int64Type;
|
||||
|
||||
fn check_vec(v: Int32Vector) {
|
||||
assert_eq!(4, v.len());
|
||||
assert_eq!("Int32Vector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
|
||||
for i in 0..4 {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Int32(i as i32 + 1), v.get(i));
|
||||
assert_eq!(ValueRef::Int32(i as i32 + 1), v.get_ref(i));
|
||||
}
|
||||
|
||||
let json_value = v.serialize_to_json().unwrap();
|
||||
assert_eq!("[1,2,3,4]", serde_json::to_string(&json_value).unwrap(),);
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(4, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::Int32, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_values() {
|
||||
let v = Int32Vector::from_values(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_vec() {
|
||||
let v = Int32Vector::from_vec(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_slice() {
|
||||
let v = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_primitive_vector_with_null_to_json() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[1,2,null,4,null]",
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from(arrow_array);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_build_get() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(input.len(), vector.len());
|
||||
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vector.get_data(i));
|
||||
assert_eq!(Value::from(v), vector.get(i));
|
||||
}
|
||||
|
||||
let res: Vec<_> = vector.iter_data().collect();
|
||||
assert_eq!(input, &res[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_validity() {
|
||||
let input = [Some(1i32), Some(2i32), None, None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(2, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert_eq!(2, validity.null_count());
|
||||
assert!(!validity.is_set(2));
|
||||
assert!(!validity.is_set(3));
|
||||
|
||||
let vector = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_size() {
|
||||
let v = Int32Vector::from_slice((0..5).collect::<Vec<i32>>());
|
||||
assert_eq!(64, v.memory_size());
|
||||
let v = Int64Vector::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(128, v.memory_size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_builder() {
|
||||
let mut builder = Int64Type::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Int64(123)).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = Int64Vector::from_slice(&[7, 8, 9]);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(Int64Vector::from_slice(&[123, 8, 9]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_wrapper_slice() {
|
||||
macro_rules! test_from_wrapper_slice {
|
||||
($vec: ident, $ty: ident) => {
|
||||
let from_wrapper_slice = $vec::from_wrapper_slice(&[
|
||||
$ty::from_native($ty::MAX),
|
||||
$ty::from_native($ty::MIN),
|
||||
]);
|
||||
let from_slice = $vec::from_slice(&[$ty::MAX, $ty::MIN]);
|
||||
assert_eq!(from_wrapper_slice, from_slice);
|
||||
};
|
||||
}
|
||||
|
||||
test_from_wrapper_slice!(UInt8Vector, u8);
|
||||
test_from_wrapper_slice!(Int8Vector, i8);
|
||||
test_from_wrapper_slice!(UInt16Vector, u16);
|
||||
test_from_wrapper_slice!(Int16Vector, i16);
|
||||
test_from_wrapper_slice!(UInt32Vector, u32);
|
||||
test_from_wrapper_slice!(Int32Vector, i32);
|
||||
test_from_wrapper_slice!(UInt64Vector, u64);
|
||||
test_from_wrapper_slice!(Int64Vector, i64);
|
||||
test_from_wrapper_slice!(Float32Vector, f32);
|
||||
test_from_wrapper_slice!(Float64Vector, f64);
|
||||
}
|
||||
}
|
||||
370
src/datatypes2/src/vectors/string.rs
Normal file
370
src/datatypes2/src/vectors/string.rs
Normal file
@@ -0,0 +1,370 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of strings.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct StringVector {
|
||||
array: StringArray,
|
||||
}
|
||||
|
||||
impl StringVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringArray> for StringVector {
|
||||
fn from(array: StringArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<String>>> for StringVector {
|
||||
fn from(data: Vec<Option<String>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<&str>>> for StringVector {
|
||||
fn from(data: Vec<Option<&str>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<String>]> for StringVector {
|
||||
fn from(data: &[Option<String>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<&str>]> for StringVector {
|
||||
fn from(data: &[Option<&str>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for StringVector {
|
||||
fn from(data: Vec<String>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<&str>> for StringVector {
|
||||
fn from(data: Vec<&str>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for StringVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::string_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"StringVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(StringArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(StringArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
vectors::impl_get_for_vector!(self.array, index)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for StringVector {
|
||||
type OwnedItem = String;
|
||||
type RefItem<'a> = &'a str;
|
||||
type Iter<'a> = ArrayIter<&'a StringArray>;
|
||||
type Builder = StringVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringVectorBuilder {
|
||||
mutable_array: MutableStringArray,
|
||||
}
|
||||
|
||||
impl MutableVector for StringVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::string_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.mutable_array.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_string()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, StringVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
type VectorType = StringVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableStringArray::with_capacity(capacity, 0),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
StringVector {
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for StringVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.iter_data()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_build_get() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(4);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let vector = builder.finish();
|
||||
|
||||
assert_eq!(Some("hello"), vector.get_data(0));
|
||||
assert_eq!(None, vector.get_data(1));
|
||||
assert_eq!(Some("world"), vector.get_data(2));
|
||||
|
||||
// Get out of bound
|
||||
assert!(vector.try_get(3).is_err());
|
||||
|
||||
assert_eq!(Value::String("hello".into()), vector.get(0));
|
||||
assert_eq!(Value::Null, vector.get(1));
|
||||
assert_eq!(Value::String("world".into()), vector.get(2));
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!("hello", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next().unwrap());
|
||||
assert_eq!("world", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_builder() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push_value_ref(ValueRef::String("hello")).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = StringVector::from_slice(&["world", "one", "two"]);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["hello", "one", "two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
let strs = vec!["hello", "greptime", "rust"];
|
||||
let v = StringVector::from(strs.clone());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("StringVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(128, v.memory_size());
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(3, arrow_arr.len());
|
||||
assert_eq!(&DataType::Utf8, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.append_option(Some("A"));
|
||||
builder.append_option(Some("B"));
|
||||
builder.append_null();
|
||||
builder.append_option(Some("D"));
|
||||
let string_array: StringArray = builder.finish();
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_non_option_string() {
|
||||
let nul = String::from_utf8(vec![0]).unwrap();
|
||||
let corpus = vec!["😅😅😅", "😍😍😍😍", "🥵🥵", nul.as_str()];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["😅😅😅","😍😍😍😍","🥵🥵","\u0000"]"#, serialized);
|
||||
|
||||
let corpus = vec![
|
||||
"🀀🀀🀀".to_string(),
|
||||
"🀁🀁🀁".to_string(),
|
||||
"🀂🀂🀂".to_string(),
|
||||
"🀃🀃🀃".to_string(),
|
||||
"🀆🀆".to_string(),
|
||||
];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["🀀🀀🀀","🀁🀁🀁","🀂🀂🀂","🀃🀃🀃","🀆🀆"]"#, serialized);
|
||||
}
|
||||
}
|
||||
31
src/datatypes2/src/vectors/timestamp.rs
Normal file
31
src/datatypes2/src/vectors/timestamp.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
};
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
pub type TimestampSecondVector = PrimitiveVector<TimestampSecondType>;
|
||||
pub type TimestampSecondVectorBuilder = PrimitiveVectorBuilder<TimestampSecondType>;
|
||||
|
||||
pub type TimestampMillisecondVector = PrimitiveVector<TimestampMillisecondType>;
|
||||
pub type TimestampMillisecondVectorBuilder = PrimitiveVectorBuilder<TimestampMillisecondType>;
|
||||
|
||||
pub type TimestampMicrosecondVector = PrimitiveVector<TimestampMicrosecondType>;
|
||||
pub type TimestampMicrosecondVectorBuilder = PrimitiveVectorBuilder<TimestampMicrosecondType>;
|
||||
|
||||
pub type TimestampNanosecondVector = PrimitiveVector<TimestampNanosecondType>;
|
||||
pub type TimestampNanosecondVectorBuilder = PrimitiveVectorBuilder<TimestampNanosecondType>;
|
||||
159
src/datatypes2/src/vectors/validity.rs
Normal file
159
src/datatypes2/src/vectors/validity.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::ArrayData;
|
||||
use arrow::bitmap::Bitmap;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ValidityKind<'a> {
|
||||
/// Whether the array slot is valid or not (null).
|
||||
Slots {
|
||||
bitmap: &'a Bitmap,
|
||||
len: usize,
|
||||
null_count: usize,
|
||||
},
|
||||
/// All slots are valid.
|
||||
AllValid { len: usize },
|
||||
/// All slots are null.
|
||||
AllNull { len: usize },
|
||||
}
|
||||
|
||||
/// Validity of a vector.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Validity<'a> {
|
||||
kind: ValidityKind<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Validity<'a> {
|
||||
/// Creates a `Validity` from [`ArrayData`].
|
||||
pub fn from_array_data(data: &'a ArrayData) -> Validity<'a> {
|
||||
match data.null_bitmap() {
|
||||
Some(bitmap) => Validity {
|
||||
kind: ValidityKind::Slots {
|
||||
bitmap,
|
||||
len: data.len(),
|
||||
null_count: data.null_count(),
|
||||
},
|
||||
},
|
||||
None => Validity::all_valid(data.len()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `Validity` that all elements are valid.
|
||||
pub fn all_valid(len: usize) -> Validity<'a> {
|
||||
Validity {
|
||||
kind: ValidityKind::AllValid { len },
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `Validity` that all elements are null.
|
||||
pub fn all_null(len: usize) -> Validity<'a> {
|
||||
Validity {
|
||||
kind: ValidityKind::AllNull { len },
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether `i-th` bit is set.
|
||||
pub fn is_set(&self, i: usize) -> bool {
|
||||
match self.kind {
|
||||
ValidityKind::Slots { bitmap, .. } => bitmap.is_set(i),
|
||||
ValidityKind::AllValid { len } => i < len,
|
||||
ValidityKind::AllNull { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if all bits are null.
|
||||
pub fn is_all_null(&self) -> bool {
|
||||
match self.kind {
|
||||
ValidityKind::Slots {
|
||||
len, null_count, ..
|
||||
} => len == null_count,
|
||||
ValidityKind::AllValid { .. } => false,
|
||||
ValidityKind::AllNull { .. } => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if all bits are valid.
|
||||
pub fn is_all_valid(&self) -> bool {
|
||||
match self.kind {
|
||||
ValidityKind::Slots { null_count, .. } => null_count == 0,
|
||||
ValidityKind::AllValid { .. } => true,
|
||||
ValidityKind::AllNull { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of null slots on this [`Vector`].
|
||||
pub fn null_count(&self) -> usize {
|
||||
match self.kind {
|
||||
ValidityKind::Slots { null_count, .. } => null_count,
|
||||
ValidityKind::AllValid { .. } => 0,
|
||||
ValidityKind::AllNull { len } => len,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{Array, Int32Array};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_all_valid() {
|
||||
let validity = Validity::all_valid(5);
|
||||
assert!(validity.is_all_valid());
|
||||
assert!(!validity.is_all_null());
|
||||
assert_eq!(0, validity.null_count());
|
||||
for i in 0..5 {
|
||||
assert!(validity.is_set(i));
|
||||
}
|
||||
assert!(!validity.is_set(5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_null() {
|
||||
let validity = Validity::all_null(5);
|
||||
assert!(validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
assert_eq!(5, validity.null_count());
|
||||
for i in 0..5 {
|
||||
assert!(!validity.is_set(i));
|
||||
}
|
||||
assert!(!validity.is_set(5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_array_data() {
|
||||
let array = Int32Array::from_iter([None, Some(1), None]);
|
||||
let validity = Validity::from_array_data(array.data());
|
||||
assert_eq!(2, validity.null_count());
|
||||
assert!(!validity.is_set(0));
|
||||
assert!(validity.is_set(1));
|
||||
assert!(!validity.is_set(2));
|
||||
assert!(!validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
|
||||
let array = Int32Array::from_iter([None, None]);
|
||||
let validity = Validity::from_array_data(array.data());
|
||||
assert!(validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
assert_eq!(2, validity.null_count());
|
||||
|
||||
let array = Int32Array::from_iter_values([1, 2]);
|
||||
let validity = Validity::from_array_data(array.data());
|
||||
assert!(!validity.is_all_null());
|
||||
assert!(validity.is_all_valid());
|
||||
assert_eq!(0, validity.null_count());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user