Compare commits

..

1 Commits

Author SHA1 Message Date
lancedb automation
b9249881bb chore: update lance dependency to v8.0.0-rc.3 2026-06-30 13:42:40 +00:00
8 changed files with 124 additions and 214 deletions

105
Cargo.lock generated
View File

@@ -1297,6 +1297,15 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "bitpacking"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019"
dependencies = [
"crunchy",
]
[[package]]
name = "bitvec"
version = "1.0.1"
@@ -3423,8 +3432,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"rand 0.9.4",
@@ -4726,8 +4735,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
[[package]]
name = "lance"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arc-swap",
"arrow",
@@ -4745,6 +4754,7 @@ dependencies = [
"async_cell",
"aws-credential-types",
"aws-sdk-dynamodb",
"bitpacking",
"byteorder",
"bytes",
"chrono",
@@ -4761,9 +4771,8 @@ dependencies = [
"futures",
"half",
"humantime",
"itertools 0.14.0",
"itertools 0.13.0",
"lance-arrow",
"lance-bitpacking",
"lance-core",
"lance-datafusion",
"lance-encoding",
@@ -4801,8 +4810,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4823,7 +4832,7 @@ dependencies = [
[[package]]
name = "lance-arrow-scalar"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4837,7 +4846,7 @@ dependencies = [
[[package]]
name = "lance-arrow-stats"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4846,19 +4855,18 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrayref",
"crunchy",
"paste",
"seq-macro",
]
[[package]]
name = "lance-core"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4870,7 +4878,7 @@ dependencies = [
"datafusion-common",
"datafusion-sql",
"futures",
"itertools 0.14.0",
"itertools 0.13.0",
"lance-arrow",
"lance-derive",
"libc",
@@ -4896,8 +4904,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"arrow-array",
@@ -4927,8 +4935,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"arrow-array",
@@ -4945,8 +4953,8 @@ dependencies = [
[[package]]
name = "lance-derive"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"proc-macro2",
"quote",
@@ -4955,8 +4963,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4972,7 +4980,7 @@ dependencies = [
"futures",
"hex",
"hyperloglogplus",
"itertools 0.14.0",
"itertools 0.13.0",
"lance-arrow",
"lance-bitpacking",
"lance-core",
@@ -4991,8 +4999,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5022,8 +5030,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arc-swap",
"arrow",
@@ -5035,6 +5043,7 @@ dependencies = [
"async-channel",
"async-recursion",
"async-trait",
"bitpacking",
"bitvec",
"bytes",
"chrono",
@@ -5047,12 +5056,11 @@ dependencies = [
"fst",
"futures",
"half",
"itertools 0.14.0",
"itertools 0.13.0",
"jieba-rs",
"jsonb",
"lance-arrow",
"lance-arrow-stats",
"lance-bitpacking",
"lance-core",
"lance-datafusion",
"lance-datagen",
@@ -5088,8 +5096,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"arrow-arith",
@@ -5130,8 +5138,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5142,13 +5150,12 @@ dependencies = [
"lance-core",
"num-traits",
"rand 0.9.4",
"rayon",
]
[[package]]
name = "lance-namespace"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"async-trait",
@@ -5160,8 +5167,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"arrow-ipc",
@@ -5215,15 +5222,15 @@ dependencies = [
[[package]]
name = "lance-select"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-schema",
"byteorder",
"bytes",
"itertools 0.14.0",
"itertools 0.13.0",
"lance-core",
"roaring",
"tracing",
@@ -5231,8 +5238,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow",
"arrow-array",
@@ -5271,8 +5278,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -5285,8 +5292,8 @@ dependencies = [
[[package]]
name = "lance-tokenizer"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "8.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-rc.3#2b4775ff3973c7f945f5bc104f2cbba5825f1beb"
dependencies = [
"icu_segmenter",
"jieba-rs",

View File

@@ -13,20 +13,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=8.0.0-rc.3", default-features = false, "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=8.0.0-rc.3", default-features = false, "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=8.0.0-rc.3", default-features = false, "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=8.0.0-rc.3", "tag" = "v8.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "58.0.0", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>9.0.0-beta.10</lance-core.version>
<lance-core.version>8.0.0-rc.3</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -3,7 +3,7 @@
use std::time::Duration;
use lancedb::{ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
use napi::bindgen_prelude::*;
use napi_derive::napi;
@@ -66,9 +66,11 @@ impl NativeMergeInsertBuilder {
#[napi(catch_unwind)]
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
let data = ipc_file_to_batches(buf.to_vec()).map_err(|e| {
napi::Error::from_reason(format!("Failed to read IPC file: {}", convert_error(&e)))
})?;
let data = ipc_file_to_batches(buf.to_vec())
.and_then(IntoArrow::into_arrow)
.map_err(|e| {
napi::Error::from_reason(format!("Failed to read IPC file: {}", convert_error(&e)))
})?;
let this = self.clone();

View File

@@ -166,10 +166,6 @@ required-features = ["bedrock"]
[[example]]
name = "simple"
[[example]]
name = "polars"
required-features = ["polars"]
[[example]]
name = "full_text_search"

View File

@@ -1,47 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! This example demonstrates ingesting a Polars DataFrame into LanceDB and
//! reading it back out as a Polars DataFrame.
use lancedb::arrow::IntoPolars;
use lancedb::query::ExecutableQuery;
use lancedb::{Result, connect};
use polars::prelude::{DataFrame, NamedFrom, Series};
fn make_dataframe() -> DataFrame {
let ids = Series::new("id", &[1i32, 2, 3, 4, 5]);
let names = Series::new("name", &["Alice", "Bob", "Carol", "Dave", "Eve"]);
let scores = Series::new("score", &[9.5f64, 8.1, 7.3, 9.0, 6.5]);
DataFrame::new(vec![ids, names, scores]).unwrap()
}
#[tokio::main]
async fn main() -> Result<()> {
let tmp = tempfile::tempdir().unwrap();
let db = connect(tmp.path().to_str().unwrap()).execute().await?;
// Ingest a Polars DataFrame directly — DataFrame now implements Scannable.
let df = make_dataframe();
println!("Input DataFrame:\n{df}");
let table = db.create_table("people", df).execute().await?;
// Append more rows.
let more = DataFrame::new(vec![
Series::new("id", &[6i32, 7]),
Series::new("name", &["Frank", "Grace"]),
Series::new("score", &[7.8f64, 8.9]),
])
.unwrap();
table.add(more).execute().await?;
// Read back as a Polars DataFrame.
let result_df = table.query().execute().await?.into_polars().await?;
println!(
"\nRound-tripped DataFrame ({} rows):\n{result_df}",
result_df.height()
);
Ok(())
}

View File

@@ -112,14 +112,54 @@ impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> RecordBatchStream
/// A trait for converting incoming data to Arrow
///
/// Integrations should implement this trait to allow data to be
/// imported directly from the integration. For example, implementing
/// this trait for `Vec<Vec<...>>` would allow the `Vec` to be directly
/// used in methods like [`crate::connection::Connection::create_table`]
/// or [`crate::table::Table::add`]
pub trait IntoArrow {
/// Convert the data into an iterator of Arrow batches
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>>;
}
pub type BoxedRecordBatchReader = Box<dyn arrow_array::RecordBatchReader + Send>;
impl<T: arrow_array::RecordBatchReader + Send + 'static> IntoArrow for T {
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>> {
Ok(Box::new(self))
}
}
/// A trait for converting incoming data to Arrow asynchronously
///
/// Serves the same purpose as [`IntoArrow`], but for asynchronous data.
///
/// Note: Arrow has no async equivalent to RecordBatchReader and so
pub trait IntoArrowStream {
/// Convert the data into a stream of Arrow batches
fn into_arrow(self) -> Result<SendableRecordBatchStream>;
}
impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> SimpleRecordBatchStream<S> {
pub fn new(stream: S, schema: Arc<arrow_schema::Schema>) -> Self {
Self { schema, stream }
}
}
impl IntoArrowStream for SendableRecordBatchStream {
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
Ok(self)
}
}
impl IntoArrowStream for datafusion_physical_plan::SendableRecordBatchStream {
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
let schema = self.schema();
let stream = self.map_err(|df_err| df_err.into());
Ok(Box::pin(SimpleRecordBatchStream::new(stream, schema)))
}
}
pub trait LanceDbDatagenExt {
fn into_ldb_stream(
self,
@@ -224,7 +264,9 @@ impl IntoPolars for SendableRecordBatchStream {
#[cfg(all(test, feature = "polars"))]
mod tests {
use super::SendableRecordBatchStream;
use crate::arrow::{IntoPolars, PolarsDataFrameRecordBatchReader, SimpleRecordBatchStream};
use crate::arrow::{
IntoArrow, IntoPolars, PolarsDataFrameRecordBatchReader, SimpleRecordBatchStream,
};
use polars::prelude::{DataFrame, NamedFrom, Series};
fn get_record_batch_reader_from_polars() -> Box<dyn arrow_array::RecordBatchReader + Send> {
@@ -238,7 +280,10 @@ mod tests {
float_series = Series::new("float", &[2.0]);
let df2 = DataFrame::new(vec![string_series, int_series, float_series]).unwrap();
Box::new(PolarsDataFrameRecordBatchReader::new(df1.vstack(&df2).unwrap()).unwrap())
PolarsDataFrameRecordBatchReader::new(df1.vstack(&df2).unwrap())
.unwrap()
.into_arrow()
.unwrap()
}
#[test]

View File

@@ -185,43 +185,6 @@ impl Scannable for SendableRecordBatchStream {
}
}
#[cfg(feature = "polars")]
impl Scannable for polars::frame::DataFrame {
fn schema(&self) -> SchemaRef {
crate::polars_arrow_convertors::convert_polars_df_schema_to_arrow_rb_schema(
self.schema().clone(),
)
.expect("failed to convert Polars DataFrame schema to Arrow schema")
}
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
let schema = Scannable::schema(self);
let batches: crate::Result<Vec<RecordBatch>> =
match crate::arrow::PolarsDataFrameRecordBatchReader::new(self.clone()) {
Err(e) => Err(e),
Ok(reader) => reader.map(|b| b.map_err(Into::into)).collect(),
};
match batches {
Err(e) => Box::pin(SimpleRecordBatchStream {
schema,
stream: once(async move { Err(e) }),
}),
Ok(batches) => {
let stream = futures::stream::iter(batches.into_iter().map(Ok));
Box::pin(SimpleRecordBatchStream { schema, stream })
}
}
}
fn num_rows(&self) -> Option<usize> {
Some(self.height())
}
fn rescannable(&self) -> bool {
true
}
}
#[async_trait]
impl StreamingWriteSource for Box<dyn Scannable> {
fn arrow_schema(&self) -> SchemaRef {
@@ -1126,60 +1089,4 @@ mod tests {
);
}
}
#[cfg(feature = "polars")]
mod polars_tests {
use super::*;
use crate::arrow::IntoPolars;
use crate::query::ExecutableQuery;
use polars::prelude::{DataFrame, NamedFrom, Series};
fn make_df() -> DataFrame {
DataFrame::new(vec![
Series::new("id", &[1i32, 2, 3]),
Series::new("val", &[1.1f64, 2.2, 3.3]),
])
.unwrap()
}
#[tokio::test]
async fn test_dataframe_scannable_round_trip() {
let tmp = tempfile::tempdir().unwrap();
let db = crate::connect(tmp.path().to_str().unwrap())
.execute()
.await
.unwrap();
let df = make_df();
let table = db.create_table("t", df.clone()).execute().await.unwrap();
// Append the same rows again.
table.add(df.clone()).execute().await.unwrap();
let result = table
.query()
.execute()
.await
.unwrap()
.into_polars()
.await
.unwrap();
assert_eq!(result.height(), df.height() * 2);
assert_eq!(result.schema(), df.schema());
}
#[tokio::test]
async fn test_dataframe_scannable_rescannable() {
let mut df = make_df();
assert!(df.rescannable());
let batches1: Vec<RecordBatch> = df.scan_as_stream().try_collect().await.unwrap();
assert_eq!(batches1.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
// Can be scanned again.
let batches2: Vec<RecordBatch> = df.scan_as_stream().try_collect().await.unwrap();
assert_eq!(batches2.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
}
}
}