feat: migrate orc-rs to datafusion-orc (#3923)

This commit is contained in:
maco
2024-05-13 13:15:06 +08:00
committed by GitHub
parent 6ab3aeb142
commit a0be7198f9
4 changed files with 391 additions and 128 deletions

492
Cargo.lock generated
View File

@@ -1272,7 +1272,7 @@ dependencies = [
"common-time",
"common-version",
"dashmap",
"datafusion",
"datafusion 37.0.0",
"datatypes",
"futures",
"futures-util",
@@ -1744,7 +1744,7 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-test-util",
"datafusion",
"datafusion 37.0.0",
"datatypes",
"derive_builder 0.12.0",
"futures",
@@ -1815,7 +1815,7 @@ dependencies = [
"common-telemetry",
"common-time",
"common-version",
"datafusion",
"datafusion 37.0.0",
"datatypes",
"num",
"num-traits",
@@ -1941,7 +1941,7 @@ dependencies = [
"common-telemetry",
"common-time",
"common-wal",
"datafusion-common",
"datafusion-common 37.0.0",
"datatypes",
"derive_builder 0.12.0",
"etcd-client",
@@ -2020,9 +2020,9 @@ dependencies = [
"common-macro",
"common-recordbatch",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"serde",
"snafu 0.8.2",
@@ -2040,8 +2040,8 @@ dependencies = [
"common-error",
"common-macro",
"common-telemetry",
"datafusion",
"datafusion-common",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datatypes",
"futures",
"pin-project",
@@ -2582,17 +2582,68 @@ dependencies = [
"bzip2",
"chrono",
"dashmap",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-execution",
"datafusion-expr",
"datafusion-functions",
"datafusion-common 37.0.0",
"datafusion-common-runtime 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions 37.0.0",
"datafusion-functions-aggregate",
"datafusion-functions-array",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-physical-plan",
"datafusion-sql",
"datafusion-functions-array 37.0.0",
"datafusion-optimizer 37.0.0",
"datafusion-physical-expr 37.0.0",
"datafusion-physical-plan 37.0.0",
"datafusion-sql 37.0.0",
"flate2",
"futures",
"glob",
"half 2.4.1",
"hashbrown 0.14.5",
"indexmap 2.2.6",
"itertools 0.12.1",
"log",
"num_cpus",
"object_store",
"parking_lot 0.12.2",
"parquet",
"pin-project-lite",
"rand",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile",
"tokio",
"tokio-util",
"url",
"uuid",
"xz2",
"zstd 0.13.1",
]
[[package]]
name = "datafusion"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85069782056753459dc47e386219aa1fdac5b731f26c28abb8c0ffd4b7c5ab11"
dependencies = [
"ahash 0.8.11",
"arrow",
"arrow-array",
"arrow-ipc",
"arrow-schema",
"async-compression 0.4.9",
"async-trait",
"bytes",
"bzip2",
"chrono",
"dashmap",
"datafusion-common 37.1.0",
"datafusion-common-runtime 37.1.0",
"datafusion-execution 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-functions 37.1.0",
"datafusion-functions-array 37.1.0",
"datafusion-optimizer 37.1.0",
"datafusion-physical-expr 37.1.0",
"datafusion-physical-plan 37.1.0",
"datafusion-sql 37.1.0",
"flate2",
"futures",
"glob",
@@ -2637,6 +2688,27 @@ dependencies = [
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "datafusion-common"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "309d9040751f6dc9e33c85dce6abb55a46ef7ea3644577dd014611c379447ef3"
dependencies = [
"ahash 0.8.11",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-schema",
"chrono",
"half 2.4.1",
"instant",
"libc",
"num_cpus",
"object_store",
"parquet",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "datafusion-common-runtime"
version = "37.0.0"
@@ -2645,6 +2717,15 @@ dependencies = [
"tokio",
]
[[package]]
name = "datafusion-common-runtime"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e4a44d8ef1b1e85d32234e6012364c411c3787859bb3bba893b0332cb03dfd"
dependencies = [
"tokio",
]
[[package]]
name = "datafusion-execution"
version = "37.0.0"
@@ -2653,8 +2734,29 @@ dependencies = [
"arrow",
"chrono",
"dashmap",
"datafusion-common",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"futures",
"hashbrown 0.14.5",
"log",
"object_store",
"parking_lot 0.12.2",
"rand",
"tempfile",
"url",
]
[[package]]
name = "datafusion-execution"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06a3a29ae36bcde07d179cc33b45656a8e7e4d023623e320e48dcf1200eeee95"
dependencies = [
"arrow",
"chrono",
"dashmap",
"datafusion-common 37.1.0",
"datafusion-expr 37.1.0",
"futures",
"hashbrown 0.14.5",
"log",
@@ -2674,7 +2776,7 @@ dependencies = [
"arrow",
"arrow-array",
"chrono",
"datafusion-common",
"datafusion-common 37.0.0",
"paste",
"serde_json",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2682,6 +2784,23 @@ dependencies = [
"strum_macros 0.26.2",
]
[[package]]
name = "datafusion-expr"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a3542aa322029c2121a671ce08000d4b274171070df13f697b14169ccf4f628"
dependencies = [
"ahash 0.8.11",
"arrow",
"arrow-array",
"chrono",
"datafusion-common 37.1.0",
"paste",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
"strum 0.26.2",
"strum_macros 0.26.2",
]
[[package]]
name = "datafusion-functions"
version = "37.0.0"
@@ -2692,10 +2811,10 @@ dependencies = [
"blake2",
"blake3",
"chrono",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion-common 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-physical-expr 37.0.0",
"hashbrown 0.14.5",
"hex",
"itertools 0.12.1",
@@ -2708,15 +2827,40 @@ dependencies = [
"uuid",
]
[[package]]
name = "datafusion-functions"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd221792c666eac174ecc09e606312844772acc12cbec61a420c2fca1ee70959"
dependencies = [
"arrow",
"base64 0.22.1",
"blake2",
"blake3",
"chrono",
"datafusion-common 37.1.0",
"datafusion-execution 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-physical-expr 37.1.0",
"hex",
"itertools 0.12.1",
"log",
"md-5",
"regex",
"sha2",
"unicode-segmentation",
"uuid",
]
[[package]]
name = "datafusion-functions-aggregate"
version = "37.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c"
dependencies = [
"arrow",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-physical-expr-common",
"log",
"paste",
@@ -2732,10 +2876,30 @@ dependencies = [
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
"datafusion-functions",
"datafusion-common 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions 37.0.0",
"itertools 0.12.1",
"log",
"paste",
]
[[package]]
name = "datafusion-functions-array"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e501801e84d9c6ef54caaebcda1b18a6196a24176c12fb70e969bc0572e03c55"
dependencies = [
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"datafusion-common 37.1.0",
"datafusion-execution 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-functions 37.1.0",
"itertools 0.12.1",
"log",
"paste",
@@ -2749,9 +2913,27 @@ dependencies = [
"arrow",
"async-trait",
"chrono",
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-physical-expr 37.0.0",
"hashbrown 0.14.5",
"itertools 0.12.1",
"log",
"regex-syntax 0.8.3",
]
[[package]]
name = "datafusion-optimizer"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76bd7f5087817deb961764e8c973d243b54f8572db414a8f0a8f33a48f991e0a"
dependencies = [
"arrow",
"async-trait",
"chrono",
"datafusion-common 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-physical-expr 37.1.0",
"hashbrown 0.14.5",
"itertools 0.12.1",
"log",
@@ -2772,9 +2954,9 @@ dependencies = [
"arrow-string",
"base64 0.22.1",
"chrono",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions-aggregate",
"datafusion-physical-expr-common",
"half 2.4.1",
@@ -2788,14 +2970,49 @@ dependencies = [
"regex",
]
[[package]]
name = "datafusion-physical-expr"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cabc0d9aaa0f5eb1b472112f16223c9ffd2fb04e58cbf65c0a331ee6e993f96"
dependencies = [
"ahash 0.8.11",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"arrow-string",
"base64 0.22.1",
"blake2",
"blake3",
"chrono",
"datafusion-common 37.1.0",
"datafusion-execution 37.1.0",
"datafusion-expr 37.1.0",
"half 2.4.1",
"hashbrown 0.14.5",
"hex",
"indexmap 2.2.6",
"itertools 0.12.1",
"log",
"md-5",
"paste",
"petgraph",
"rand",
"regex",
"sha2",
"unicode-segmentation",
]
[[package]]
name = "datafusion-physical-expr-common"
version = "37.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c"
dependencies = [
"arrow",
"datafusion-common",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
]
[[package]]
@@ -2811,12 +3028,12 @@ dependencies = [
"arrow-schema",
"async-trait",
"chrono",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-execution",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-common-runtime 37.0.0",
"datafusion-execution 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions-aggregate",
"datafusion-physical-expr",
"datafusion-physical-expr 37.0.0",
"datafusion-physical-expr-common",
"futures",
"half 2.4.1",
@@ -2831,6 +3048,37 @@ dependencies = [
"tokio",
]
[[package]]
name = "datafusion-physical-plan"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17c0523e9c8880f2492a88bbd857dde02bed1ed23f3e9211a89d3d7ec3b44af9"
dependencies = [
"ahash 0.8.11",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-schema",
"async-trait",
"chrono",
"datafusion-common 37.1.0",
"datafusion-common-runtime 37.1.0",
"datafusion-execution 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-physical-expr 37.1.0",
"futures",
"half 2.4.1",
"hashbrown 0.14.5",
"indexmap 2.2.6",
"itertools 0.12.1",
"log",
"once_cell",
"parking_lot 0.12.2",
"pin-project-lite",
"rand",
"tokio",
]
[[package]]
name = "datafusion-sql"
version = "37.0.0"
@@ -2839,8 +3087,24 @@ dependencies = [
"arrow",
"arrow-array",
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"log",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
"strum 0.26.2",
]
[[package]]
name = "datafusion-sql"
version = "37.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49eb54b42227136f6287573f2434b1de249fe1b8e6cd6cc73a634e4a3ec29356"
dependencies = [
"arrow",
"arrow-array",
"arrow-schema",
"datafusion-common 37.1.0",
"datafusion-expr 37.1.0",
"log",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
"strum 0.26.2",
@@ -2853,7 +3117,7 @@ source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e27
dependencies = [
"async-recursion",
"chrono",
"datafusion",
"datafusion 37.0.0",
"itertools 0.12.1",
"object_store",
"prost 0.12.4",
@@ -2888,9 +3152,9 @@ dependencies = [
"common-version",
"common-wal",
"dashmap",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"file-engine",
"futures",
@@ -2931,7 +3195,7 @@ dependencies = [
"common-macro",
"common-telemetry",
"common-time",
"datafusion-common",
"datafusion-common 37.0.0",
"enum_dispatch",
"num",
"num-traits",
@@ -3434,8 +3698,8 @@ dependencies = [
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"futures",
"object-store",
@@ -3531,8 +3795,8 @@ dependencies = [
"common-macro",
"common-telemetry",
"common-time",
"datafusion-common",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-substrait",
"datatypes",
"enum_dispatch",
@@ -5115,6 +5379,16 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "lzokay-native"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "792ba667add2798c6c3e988e630f4eb921b5cbc735044825b7111ef1582c8730"
dependencies = [
"byteorder",
"thiserror",
]
[[package]]
name = "mac_address"
version = "1.1.6"
@@ -5371,7 +5645,7 @@ dependencies = [
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion 37.0.0",
"datatypes",
"itertools 0.10.5",
"lazy_static",
@@ -5455,9 +5729,9 @@ dependencies = [
"crc32fast",
"criterion",
"crossbeam-utils",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"futures",
"humantime-serde",
@@ -6312,9 +6586,9 @@ dependencies = [
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"file-engine",
"futures",
@@ -6349,22 +6623,29 @@ checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc"
[[package]]
name = "orc-rust"
version = "0.2.43"
source = "git+https://github.com/MichaelScofield/orc-rs.git?rev=17347f5f084ac937863317df882218055c4ea8c1#17347f5f084ac937863317df882218055c4ea8c1"
version = "0.3.0"
source = "git+https://github.com/datafusion-contrib/datafusion-orc.git?rev=502217315726314c4008808fe169764529640599#502217315726314c4008808fe169764529640599"
dependencies = [
"arrow",
"async-trait",
"bytes",
"chrono",
"chrono-tz",
"datafusion 37.1.0",
"datafusion-expr 37.1.0",
"datafusion-physical-expr 37.1.0",
"fallible-streaming-iterator",
"flate2",
"futures",
"futures-util",
"lazy_static",
"paste",
"lz4_flex 0.11.3",
"lzokay-native",
"num",
"object_store",
"prost 0.11.9",
"snafu 0.7.5",
"snap",
"tokio",
"zigzag",
"zstd 0.12.4",
]
@@ -6580,8 +6861,8 @@ dependencies = [
"common-macro",
"common-meta",
"common-query",
"datafusion-common",
"datafusion-expr",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datatypes",
"itertools 0.10.5",
"serde",
@@ -7199,9 +7480,9 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"datafusion",
"datafusion-expr",
"datafusion-functions",
"datafusion 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions 37.0.0",
"datatypes",
"futures",
"greptime-proto",
@@ -7535,12 +7816,12 @@ dependencies = [
"common-recordbatch",
"common-telemetry",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-sql",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-optimizer 37.0.0",
"datafusion-physical-expr 37.0.0",
"datafusion-sql 37.0.0",
"datatypes",
"format_num",
"futures",
@@ -8894,11 +9175,11 @@ dependencies = [
"console",
"criterion",
"crossbeam-utils",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion-functions",
"datafusion-physical-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-functions 37.0.0",
"datafusion-physical-expr 37.0.0",
"datatypes",
"futures",
"lazy_static",
@@ -9179,8 +9460,8 @@ dependencies = [
"common-version",
"criterion",
"dashmap",
"datafusion",
"datafusion-common",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datatypes",
"derive_builder 0.12.0",
"futures",
@@ -9538,11 +9819,11 @@ dependencies = [
"common-macro",
"common-query",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion-sql",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-physical-expr 37.0.0",
"datafusion-sql 37.0.0",
"datatypes",
"hex",
"itertools 0.10.5",
@@ -9975,9 +10256,9 @@ dependencies = [
"common-function",
"common-macro",
"common-telemetry",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-substrait",
"datatypes",
"promql",
@@ -10171,10 +10452,10 @@ dependencies = [
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion 37.0.0",
"datafusion-common 37.0.0",
"datafusion-expr 37.0.0",
"datafusion-physical-expr 37.0.0",
"datatypes",
"derive_builder 0.12.0",
"futures",
@@ -10325,8 +10606,8 @@ dependencies = [
"common-telemetry",
"common-test-util",
"common-wal",
"datafusion",
"datafusion-expr",
"datafusion 37.0.0",
"datafusion-expr 37.0.0",
"datanode",
"datatypes",
"dotenv",
@@ -12319,15 +12600,6 @@ dependencies = [
"syn 2.0.61",
]
[[package]]
name = "zigzag"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70b40401a28d86ce16a330b863b86fd7dbee4d7c940587ab09ab8c019f9e3fdf"
dependencies = [
"num-traits",
]
[[package]]
name = "zstd"
version = "0.11.2+zstd.1.5.2"

View File

@@ -30,7 +30,7 @@ derive_builder.workspace = true
futures.workspace = true
lazy_static.workspace = true
object-store.workspace = true
orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" }
orc-rust = { git = "https://github.com/datafusion-contrib/datafusion-orc.git", rev = "502217315726314c4008808fe169764529640599" }
parquet.workspace = true
paste = "1.0"
regex = "1.7"

View File

@@ -77,7 +77,7 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: orc_rust::error::Error,
error: orc_rust::error::OrcError,
},
#[snafu(display("Failed to read object from path: {}", path))]

View File

@@ -21,9 +21,8 @@ use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener
use datafusion::error::{DataFusionError, Result as DfResult};
use futures::{StreamExt, TryStreamExt};
use object_store::ObjectStore;
use orc_rust::arrow_reader::{create_arrow_schema, Cursor};
use orc_rust::arrow_reader::ArrowReaderBuilder;
use orc_rust::async_arrow_reader::ArrowStreamReader;
use orc_rust::reader::Reader;
use snafu::ResultExt;
use tokio::io::{AsyncRead, AsyncSeek};
@@ -33,28 +32,20 @@ use crate::file_format::FileFormat;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct OrcFormat;
pub async fn new_orc_cursor<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>(
reader: R,
) -> Result<Cursor<R>> {
let reader = Reader::new_async(reader)
.await
.context(error::OrcReaderSnafu)?;
let cursor = Cursor::root(reader).context(error::OrcReaderSnafu)?;
Ok(cursor)
}
pub async fn new_orc_stream_reader<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>(
reader: R,
) -> Result<ArrowStreamReader<R>> {
let cursor = new_orc_cursor(reader).await?;
Ok(ArrowStreamReader::new(cursor, None))
let reader_build = ArrowReaderBuilder::try_new_async(reader)
.await
.context(error::OrcReaderSnafu)?;
Ok(reader_build.build_async())
}
pub async fn infer_orc_schema<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>(
reader: R,
) -> Result<Schema> {
let cursor = new_orc_cursor(reader).await?;
Ok(create_arrow_schema(&cursor))
let reader = new_orc_stream_reader(reader).await?;
Ok(reader.schema().as_ref().clone())
}
#[async_trait]