feat(napi): Issue queries as node SDK (#868)

* Query as a fluent API and `AsyncIterator<RecordBatch>`
* Much more docs
* Add tests for auto infer vector search columns with different
dimensions.
This commit is contained in:
Lei Xu
2024-01-25 22:14:14 -08:00
committed by GitHub
parent 9a07c9aad8
commit a6cf24b359
12 changed files with 370 additions and 56 deletions

View File

@@ -91,7 +91,6 @@ impl IndexBuilder {
#[napi]
pub async fn build(&self) -> napi::Result<()> {
println!("nodejs::index.rs : build");
self.inner
.build()
.await

47
nodejs/src/iterator.rs Normal file
View File

@@ -0,0 +1,47 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::StreamExt;
use lance::io::RecordBatchStream;
use napi::bindgen_prelude::*;
use napi_derive::napi;
use vectordb::ipc::batches_to_ipc_file;
/** Typescript-style Async Iterator over RecordBatches */
#[napi]
pub struct RecordBatchIterator {
inner: Box<dyn RecordBatchStream + Unpin>,
}
#[napi]
impl RecordBatchIterator {
pub(crate) fn new(inner: Box<dyn RecordBatchStream + Unpin>) -> Self {
Self { inner }
}
#[napi]
pub async unsafe fn next(&mut self) -> napi::Result<Option<Buffer>> {
if let Some(rst) = self.inner.next().await {
let batch = rst.map_err(|e| {
napi::Error::from_reason(format!("Failed to get next batch from stream: {}", e))
})?;
batches_to_ipc_file(&[batch])
.map_err(|e| napi::Error::from_reason(format!("Failed to write IPC file: {}", e)))
.map(|buf| Some(Buffer::from(buf)))
} else {
// We are done with the stream.
Ok(None)
}
}
}

View File

@@ -17,6 +17,7 @@ use napi_derive::*;
mod connection;
mod index;
mod iterator;
mod query;
mod table;

View File

@@ -16,7 +16,7 @@ use napi::bindgen_prelude::*;
use napi_derive::napi;
use vectordb::query::Query as LanceDBQuery;
use crate::table::Table;
use crate::{iterator::RecordBatchIterator, table::Table};
#[napi]
pub struct Query {
@@ -32,17 +32,50 @@ impl Query {
}
#[napi]
pub fn vector(&mut self, vector: Float32Array) {
let inn = self.inner.clone().nearest_to(&vector);
self.inner = inn;
pub fn column(&mut self, column: String) {
self.inner = self.inner.clone().column(&column);
}
#[napi]
pub fn to_arrow(&self) -> napi::Result<()> {
// let buf = self.inner.to_arrow().map_err(|e| {
// napi::Error::from_reason(format!("Failed to convert query to arrow: {}", e))
// })?;
// Ok(buf)
todo!()
pub fn filter(&mut self, filter: String) {
self.inner = self.inner.clone().filter(filter);
}
#[napi]
pub fn select(&mut self, columns: Vec<String>) {
self.inner = self.inner.clone().select(&columns);
}
#[napi]
pub fn limit(&mut self, limit: u32) {
self.inner = self.inner.clone().limit(limit as usize);
}
#[napi]
pub fn prefilter(&mut self, prefilter: bool) {
self.inner = self.inner.clone().prefilter(prefilter);
}
#[napi]
pub fn nearest_to(&mut self, vector: Float32Array) {
self.inner = self.inner.clone().nearest_to(&vector);
}
#[napi]
pub fn refine_factor(&mut self, refine_factor: u32) {
self.inner = self.inner.clone().refine_factor(refine_factor);
}
#[napi]
pub fn nprobes(&mut self, nprobe: u32) {
self.inner = self.inner.clone().nprobes(nprobe as usize);
}
#[napi]
pub async fn execute_stream(&self) -> napi::Result<RecordBatchIterator> {
let inner_stream = self.inner.execute_stream().await.map_err(|e| {
napi::Error::from_reason(format!("Failed to execute query stream: {}", e))
})?;
Ok(RecordBatchIterator::new(Box::new(inner_stream)))
}
}