feat: add output_schema method to queries (#2717)

This is a helper utility I need for some of my data loader work. It
makes it easy to see the output schema even when a `select` has been
applied.
This commit is contained in:
Weston Pace
2025-10-14 05:13:28 -07:00
committed by GitHub
parent 03eab0f091
commit 8f8e06a2da
17 changed files with 563 additions and 12 deletions

View File

@@ -22,7 +22,7 @@ use crate::error::NapiErrorExt;
use crate::iterator::RecordBatchIterator;
use crate::rerankers::Reranker;
use crate::rerankers::RerankerCallbacks;
use crate::util::parse_distance_type;
use crate::util::{parse_distance_type, schema_to_buffer};
#[napi]
pub struct Query {
@@ -88,6 +88,12 @@ impl Query {
self.inner = self.inner.clone().with_row_id();
}
#[napi(catch_unwind)]
pub async fn output_schema(&self) -> napi::Result<Buffer> {
let schema = self.inner.output_schema().await.default_error()?;
schema_to_buffer(&schema)
}
#[napi(catch_unwind)]
pub async fn execute(
&self,
@@ -273,6 +279,12 @@ impl VectorQuery {
.rerank(Arc::new(Reranker::new(callbacks)));
}
#[napi(catch_unwind)]
pub async fn output_schema(&self) -> napi::Result<Buffer> {
let schema = self.inner.output_schema().await.default_error()?;
schema_to_buffer(&schema)
}
#[napi(catch_unwind)]
pub async fn execute(
&self,
@@ -346,6 +358,12 @@ impl TakeQuery {
self.inner = self.inner.clone().with_row_id();
}
#[napi(catch_unwind)]
pub async fn output_schema(&self) -> napi::Result<Buffer> {
let schema = self.inner.output_schema().await.default_error()?;
schema_to_buffer(&schema)
}
#[napi(catch_unwind)]
pub async fn execute(
&self,

View File

@@ -3,7 +3,6 @@
use std::collections::HashMap;
use arrow_ipc::writer::FileWriter;
use lancedb::ipc::ipc_file_to_batches;
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
@@ -16,6 +15,7 @@ use crate::error::NapiErrorExt;
use crate::index::Index;
use crate::merge::NativeMergeInsertBuilder;
use crate::query::{Query, TakeQuery, VectorQuery};
use crate::util::schema_to_buffer;
#[napi]
pub struct Table {
@@ -64,14 +64,7 @@ impl Table {
#[napi(catch_unwind)]
pub async fn schema(&self) -> napi::Result<Buffer> {
let schema = self.inner_ref()?.schema().await.default_error()?;
let mut writer = FileWriter::try_new(vec![], &schema)
.map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?;
writer
.finish()
.map_err(|e| napi::Error::from_reason(format!("Failed to finish IPC file: {}", e)))?;
Ok(Buffer::from(writer.into_inner().map_err(|e| {
napi::Error::from_reason(format!("Failed to get IPC file: {}", e))
})?))
schema_to_buffer(&schema)
}
#[napi(catch_unwind)]

View File

@@ -1,7 +1,10 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use arrow_ipc::writer::FileWriter;
use arrow_schema::Schema;
use lancedb::DistanceType;
use napi::bindgen_prelude::Buffer;
pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<DistanceType> {
match distance_type.as_ref().to_lowercase().as_str() {
@@ -15,3 +18,15 @@ pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<Dista
))),
}
}
/// Convert an Arrow Schema to an Arrow IPC file buffer
pub fn schema_to_buffer(schema: &Schema) -> napi::Result<Buffer> {
let mut writer = FileWriter::try_new(vec![], schema)
.map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?;
writer
.finish()
.map_err(|e| napi::Error::from_reason(format!("Failed to finish IPC file: {}", e)))?;
Ok(Buffer::from(writer.into_inner().map_err(|e| {
napi::Error::from_reason(format!("Failed to get IPC file: {}", e))
})?))
}