From a0be7198f92543ab97c6edc59ac62562d60b7c23 Mon Sep 17 00:00:00 2001 From: maco Date: Mon, 13 May 2024 13:15:06 +0800 Subject: [PATCH] feat: migrate orc-rs to datafusion-orc (#3923) --- Cargo.lock | 492 ++++++++++++++----- src/common/datasource/Cargo.toml | 2 +- src/common/datasource/src/error.rs | 2 +- src/common/datasource/src/file_format/orc.rs | 23 +- 4 files changed, 391 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5eba131e7..028c1a2f2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1272,7 +1272,7 @@ dependencies = [ "common-time", "common-version", "dashmap", - "datafusion", + "datafusion 37.0.0", "datatypes", "futures", "futures-util", @@ -1744,7 +1744,7 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-test-util", - "datafusion", + "datafusion 37.0.0", "datatypes", "derive_builder 0.12.0", "futures", @@ -1815,7 +1815,7 @@ dependencies = [ "common-telemetry", "common-time", "common-version", - "datafusion", + "datafusion 37.0.0", "datatypes", "num", "num-traits", @@ -1941,7 +1941,7 @@ dependencies = [ "common-telemetry", "common-time", "common-wal", - "datafusion-common", + "datafusion-common 37.0.0", "datatypes", "derive_builder 0.12.0", "etcd-client", @@ -2020,9 +2020,9 @@ dependencies = [ "common-macro", "common-recordbatch", "common-time", - "datafusion", - "datafusion-common", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "serde", "snafu 0.8.2", @@ -2040,8 +2040,8 @@ dependencies = [ "common-error", "common-macro", "common-telemetry", - "datafusion", - "datafusion-common", + "datafusion 37.0.0", + "datafusion-common 37.0.0", "datatypes", "futures", "pin-project", @@ -2582,17 +2582,68 @@ dependencies = [ "bzip2", "chrono", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", + "datafusion-common 37.0.0", + "datafusion-common-runtime 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-functions 37.0.0", "datafusion-functions-aggregate", - "datafusion-functions-array", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", + "datafusion-functions-array 37.0.0", + "datafusion-optimizer 37.0.0", + "datafusion-physical-expr 37.0.0", + "datafusion-physical-plan 37.0.0", + "datafusion-sql 37.0.0", + "flate2", + "futures", + "glob", + "half 2.4.1", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "num_cpus", + "object_store", + "parking_lot 0.12.2", + "parquet", + "pin-project-lite", + "rand", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd 0.13.1", +] + +[[package]] +name = "datafusion" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85069782056753459dc47e386219aa1fdac5b731f26c28abb8c0ffd4b7c5ab11" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression 0.4.9", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-common 37.1.0", + "datafusion-common-runtime 37.1.0", + "datafusion-execution 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-functions 37.1.0", + "datafusion-functions-array 37.1.0", + "datafusion-optimizer 37.1.0", + "datafusion-physical-expr 37.1.0", + "datafusion-physical-plan 37.1.0", + "datafusion-sql 37.1.0", "flate2", "futures", "glob", @@ -2637,6 +2688,27 @@ dependencies = [ "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "datafusion-common" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "309d9040751f6dc9e33c85dce6abb55a46ef7ea3644577dd014611c379447ef3" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half 2.4.1", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "datafusion-common-runtime" version = "37.0.0" @@ -2645,6 +2717,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-common-runtime" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e4a44d8ef1b1e85d32234e6012364c411c3787859bb3bba893b0332cb03dfd" +dependencies = [ + "tokio", +] + [[package]] name = "datafusion-execution" version = "37.0.0" @@ -2653,8 +2734,29 @@ dependencies = [ "arrow", "chrono", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot 0.12.2", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-execution" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06a3a29ae36bcde07d179cc33b45656a8e7e4d023623e320e48dcf1200eeee95" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common 37.1.0", + "datafusion-expr 37.1.0", "futures", "hashbrown 0.14.5", "log", @@ -2674,7 +2776,7 @@ dependencies = [ "arrow", "arrow-array", "chrono", - "datafusion-common", + "datafusion-common 37.0.0", "paste", "serde_json", "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2682,6 +2784,23 @@ dependencies = [ "strum_macros 0.26.2", ] +[[package]] +name = "datafusion-expr" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3542aa322029c2121a671ce08000d4b274171070df13f697b14169ccf4f628" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-array", + "chrono", + "datafusion-common 37.1.0", + "paste", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "strum 0.26.2", + "strum_macros 0.26.2", +] + [[package]] name = "datafusion-functions" version = "37.0.0" @@ -2692,10 +2811,10 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-physical-expr 37.0.0", "hashbrown 0.14.5", "hex", "itertools 0.12.1", @@ -2708,15 +2827,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd221792c666eac174ecc09e606312844772acc12cbec61a420c2fca1ee70959" +dependencies = [ + "arrow", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common 37.1.0", + "datafusion-execution 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-physical-expr 37.1.0", + "hex", + "itertools 0.12.1", + "log", + "md-5", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-functions-aggregate" version = "37.0.0" source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", "datafusion-physical-expr-common", "log", "paste", @@ -2732,10 +2876,30 @@ dependencies = [ "arrow-buffer", "arrow-ord", "arrow-schema", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", + "datafusion-common 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-functions 37.0.0", + "itertools 0.12.1", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-array" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e501801e84d9c6ef54caaebcda1b18a6196a24176c12fb70e969bc0572e03c55" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common 37.1.0", + "datafusion-execution 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-functions 37.1.0", "itertools 0.12.1", "log", "paste", @@ -2749,9 +2913,27 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-physical-expr 37.0.0", + "hashbrown 0.14.5", + "itertools 0.12.1", + "log", + "regex-syntax 0.8.3", +] + +[[package]] +name = "datafusion-optimizer" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bd7f5087817deb961764e8c973d243b54f8572db414a8f0a8f33a48f991e0a" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-physical-expr 37.1.0", "hashbrown 0.14.5", "itertools 0.12.1", "log", @@ -2772,9 +2954,9 @@ dependencies = [ "arrow-string", "base64 0.22.1", "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", "datafusion-functions-aggregate", "datafusion-physical-expr-common", "half 2.4.1", @@ -2788,14 +2970,49 @@ dependencies = [ "regex", ] +[[package]] +name = "datafusion-physical-expr" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cabc0d9aaa0f5eb1b472112f16223c9ffd2fb04e58cbf65c0a331ee6e993f96" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common 37.1.0", + "datafusion-execution 37.1.0", + "datafusion-expr 37.1.0", + "half 2.4.1", + "hashbrown 0.14.5", + "hex", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "md-5", + "paste", + "petgraph", + "rand", + "regex", + "sha2", + "unicode-segmentation", +] + [[package]] name = "datafusion-physical-expr-common" version = "37.0.0" source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "arrow", - "datafusion-common", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", ] [[package]] @@ -2811,12 +3028,12 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-common-runtime 37.0.0", + "datafusion-execution 37.0.0", + "datafusion-expr 37.0.0", "datafusion-functions-aggregate", - "datafusion-physical-expr", + "datafusion-physical-expr 37.0.0", "datafusion-physical-expr-common", "futures", "half 2.4.1", @@ -2831,6 +3048,37 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-physical-plan" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c0523e9c8880f2492a88bbd857dde02bed1ed23f3e9211a89d3d7ec3b44af9" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common 37.1.0", + "datafusion-common-runtime 37.1.0", + "datafusion-execution 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-physical-expr 37.1.0", + "futures", + "half 2.4.1", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "once_cell", + "parking_lot 0.12.2", + "pin-project-lite", + "rand", + "tokio", +] + [[package]] name = "datafusion-sql" version = "37.0.0" @@ -2839,8 +3087,24 @@ dependencies = [ "arrow", "arrow-array", "arrow-schema", - "datafusion-common", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "log", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "strum 0.26.2", +] + +[[package]] +name = "datafusion-sql" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49eb54b42227136f6287573f2434b1de249fe1b8e6cd6cc73a634e4a3ec29356" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common 37.1.0", + "datafusion-expr 37.1.0", "log", "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", "strum 0.26.2", @@ -2853,7 +3117,7 @@ source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e27 dependencies = [ "async-recursion", "chrono", - "datafusion", + "datafusion 37.0.0", "itertools 0.12.1", "object_store", "prost 0.12.4", @@ -2888,9 +3152,9 @@ dependencies = [ "common-version", "common-wal", "dashmap", - "datafusion", - "datafusion-common", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "file-engine", "futures", @@ -2931,7 +3195,7 @@ dependencies = [ "common-macro", "common-telemetry", "common-time", - "datafusion-common", + "datafusion-common 37.0.0", "enum_dispatch", "num", "num-traits", @@ -3434,8 +3698,8 @@ dependencies = [ "common-telemetry", "common-test-util", "common-time", - "datafusion", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "futures", "object-store", @@ -3531,8 +3795,8 @@ dependencies = [ "common-macro", "common-telemetry", "common-time", - "datafusion-common", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datafusion-substrait", "datatypes", "enum_dispatch", @@ -5115,6 +5379,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "lzokay-native" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "792ba667add2798c6c3e988e630f4eb921b5cbc735044825b7111ef1582c8730" +dependencies = [ + "byteorder", + "thiserror", +] + [[package]] name = "mac_address" version = "1.1.6" @@ -5371,7 +5645,7 @@ dependencies = [ "common-telemetry", "common-test-util", "common-time", - "datafusion", + "datafusion 37.0.0", "datatypes", "itertools 0.10.5", "lazy_static", @@ -5455,9 +5729,9 @@ dependencies = [ "crc32fast", "criterion", "crossbeam-utils", - "datafusion", - "datafusion-common", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "futures", "humantime-serde", @@ -6312,9 +6586,9 @@ dependencies = [ "common-telemetry", "common-test-util", "common-time", - "datafusion", - "datafusion-common", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "file-engine", "futures", @@ -6349,22 +6623,29 @@ checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" [[package]] name = "orc-rust" -version = "0.2.43" -source = "git+https://github.com/MichaelScofield/orc-rs.git?rev=17347f5f084ac937863317df882218055c4ea8c1#17347f5f084ac937863317df882218055c4ea8c1" +version = "0.3.0" +source = "git+https://github.com/datafusion-contrib/datafusion-orc.git?rev=502217315726314c4008808fe169764529640599#502217315726314c4008808fe169764529640599" dependencies = [ "arrow", + "async-trait", "bytes", "chrono", + "chrono-tz", + "datafusion 37.1.0", + "datafusion-expr 37.1.0", + "datafusion-physical-expr 37.1.0", "fallible-streaming-iterator", "flate2", "futures", "futures-util", - "lazy_static", - "paste", + "lz4_flex 0.11.3", + "lzokay-native", + "num", + "object_store", "prost 0.11.9", "snafu 0.7.5", + "snap", "tokio", - "zigzag", "zstd 0.12.4", ] @@ -6580,8 +6861,8 @@ dependencies = [ "common-macro", "common-meta", "common-query", - "datafusion-common", - "datafusion-expr", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datatypes", "itertools 0.10.5", "serde", @@ -7199,9 +7480,9 @@ dependencies = [ "common-query", "common-recordbatch", "common-telemetry", - "datafusion", - "datafusion-expr", - "datafusion-functions", + "datafusion 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-functions 37.0.0", "datatypes", "futures", "greptime-proto", @@ -7535,12 +7816,12 @@ dependencies = [ "common-recordbatch", "common-telemetry", "common-time", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-sql", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-optimizer 37.0.0", + "datafusion-physical-expr 37.0.0", + "datafusion-sql 37.0.0", "datatypes", "format_num", "futures", @@ -8894,11 +9175,11 @@ dependencies = [ "console", "criterion", "crossbeam-utils", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-functions 37.0.0", + "datafusion-physical-expr 37.0.0", "datatypes", "futures", "lazy_static", @@ -9179,8 +9460,8 @@ dependencies = [ "common-version", "criterion", "dashmap", - "datafusion", - "datafusion-common", + "datafusion 37.0.0", + "datafusion-common 37.0.0", "datatypes", "derive_builder 0.12.0", "futures", @@ -9538,11 +9819,11 @@ dependencies = [ "common-macro", "common-query", "common-time", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-sql", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-physical-expr 37.0.0", + "datafusion-sql 37.0.0", "datatypes", "hex", "itertools 0.10.5", @@ -9975,9 +10256,9 @@ dependencies = [ "common-function", "common-macro", "common-telemetry", - "datafusion", - "datafusion-common", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", "datafusion-substrait", "datatypes", "promql", @@ -10171,10 +10452,10 @@ dependencies = [ "common-telemetry", "common-test-util", "common-time", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion 37.0.0", + "datafusion-common 37.0.0", + "datafusion-expr 37.0.0", + "datafusion-physical-expr 37.0.0", "datatypes", "derive_builder 0.12.0", "futures", @@ -10325,8 +10606,8 @@ dependencies = [ "common-telemetry", "common-test-util", "common-wal", - "datafusion", - "datafusion-expr", + "datafusion 37.0.0", + "datafusion-expr 37.0.0", "datanode", "datatypes", "dotenv", @@ -12319,15 +12600,6 @@ dependencies = [ "syn 2.0.61", ] -[[package]] -name = "zigzag" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70b40401a28d86ce16a330b863b86fd7dbee4d7c940587ab09ab8c019f9e3fdf" -dependencies = [ - "num-traits", -] - [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 1a4792dcb0..ece0edd9fe 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -30,7 +30,7 @@ derive_builder.workspace = true futures.workspace = true lazy_static.workspace = true object-store.workspace = true -orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" } +orc-rust = { git = "https://github.com/datafusion-contrib/datafusion-orc.git", rev = "502217315726314c4008808fe169764529640599" } parquet.workspace = true paste = "1.0" regex = "1.7" diff --git a/src/common/datasource/src/error.rs b/src/common/datasource/src/error.rs index 527c20ff86..8f062868a9 100644 --- a/src/common/datasource/src/error.rs +++ b/src/common/datasource/src/error.rs @@ -77,7 +77,7 @@ pub enum Error { #[snafu(implicit)] location: Location, #[snafu(source)] - error: orc_rust::error::Error, + error: orc_rust::error::OrcError, }, #[snafu(display("Failed to read object from path: {}", path))] diff --git a/src/common/datasource/src/file_format/orc.rs b/src/common/datasource/src/file_format/orc.rs index f11798c330..23e0589c99 100644 --- a/src/common/datasource/src/file_format/orc.rs +++ b/src/common/datasource/src/file_format/orc.rs @@ -21,9 +21,8 @@ use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener use datafusion::error::{DataFusionError, Result as DfResult}; use futures::{StreamExt, TryStreamExt}; use object_store::ObjectStore; -use orc_rust::arrow_reader::{create_arrow_schema, Cursor}; +use orc_rust::arrow_reader::ArrowReaderBuilder; use orc_rust::async_arrow_reader::ArrowStreamReader; -use orc_rust::reader::Reader; use snafu::ResultExt; use tokio::io::{AsyncRead, AsyncSeek}; @@ -33,28 +32,20 @@ use crate::file_format::FileFormat; #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct OrcFormat; -pub async fn new_orc_cursor( - reader: R, -) -> Result> { - let reader = Reader::new_async(reader) - .await - .context(error::OrcReaderSnafu)?; - let cursor = Cursor::root(reader).context(error::OrcReaderSnafu)?; - Ok(cursor) -} - pub async fn new_orc_stream_reader( reader: R, ) -> Result> { - let cursor = new_orc_cursor(reader).await?; - Ok(ArrowStreamReader::new(cursor, None)) + let reader_build = ArrowReaderBuilder::try_new_async(reader) + .await + .context(error::OrcReaderSnafu)?; + Ok(reader_build.build_async()) } pub async fn infer_orc_schema( reader: R, ) -> Result { - let cursor = new_orc_cursor(reader).await?; - Ok(create_arrow_schema(&cursor)) + let reader = new_orc_stream_reader(reader).await?; + Ok(reader.schema().as_ref().clone()) } #[async_trait]