mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-30 10:20:40 +00:00
feat(napi): Issue queries as node SDK (#868)
* Query as a fluent API and `AsyncIterator<RecordBatch>` * Much more docs * Add tests for auto infer vector search columns with different dimensions.
This commit is contained in:
@@ -16,10 +16,10 @@
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use arrow_array::{RecordBatch, RecordBatchReader};
|
||||
use arrow_ipc::{reader::StreamReader, writer::FileWriter};
|
||||
|
||||
use crate::Result;
|
||||
use crate::{Error, Result};
|
||||
|
||||
/// Convert a Arrow IPC file to a batch reader
|
||||
pub fn ipc_file_to_batches(buf: Vec<u8>) -> Result<impl RecordBatchReader> {
|
||||
@@ -28,6 +28,22 @@ pub fn ipc_file_to_batches(buf: Vec<u8>) -> Result<impl RecordBatchReader> {
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
/// Convert record batches to Arrow IPC file
|
||||
pub fn batches_to_ipc_file(batches: &[RecordBatch]) -> Result<Vec<u8>> {
|
||||
if batches.is_empty() {
|
||||
return Err(Error::Store {
|
||||
message: "No batches to write".to_string(),
|
||||
});
|
||||
}
|
||||
let schema = batches[0].schema();
|
||||
let mut writer = FileWriter::try_new(vec![], &schema)?;
|
||||
for batch in batches {
|
||||
writer.write(batch)?;
|
||||
}
|
||||
writer.finish()?;
|
||||
Ok(writer.into_inner()?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ use lance_linalg::distance::MetricType;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::utils::default_vector_column;
|
||||
use crate::Error;
|
||||
|
||||
const DEFAULT_TOP_K: usize = 10;
|
||||
|
||||
@@ -93,6 +94,19 @@ impl Query {
|
||||
let arrow_schema = Schema::from(self.dataset.schema());
|
||||
default_vector_column(&arrow_schema, Some(query.len() as i32))?
|
||||
};
|
||||
let field = self.dataset.schema().field(&column).ok_or(Error::Store {
|
||||
message: format!("Column {} not found in dataset schema", column),
|
||||
})?;
|
||||
if !matches!(field.data_type(), arrow_schema::DataType::FixedSizeList(f, dim) if f.data_type().is_floating() && dim == query.len() as i32)
|
||||
{
|
||||
return Err(Error::Store {
|
||||
message: format!(
|
||||
"Vector column '{}' does not match the dimension of the query vector: dim={}",
|
||||
column,
|
||||
query.len(),
|
||||
),
|
||||
});
|
||||
}
|
||||
scanner.nearest(&column, query, self.limit.unwrap_or(DEFAULT_TOP_K))?;
|
||||
} else {
|
||||
// If there is no vector query, it's ok to not have a limit
|
||||
|
||||
Reference in New Issue
Block a user