mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-05 19:32:56 +00:00
feat: add support for add to async python API (#1037)
In order to add support for `add` we needed to migrate the rust `Table`
trait to a `Table` struct and `TableInternal` trait (similar to the way
the connection is designed).
While doing this we also cleaned up some inconsistencies between the
SDKs:
* Python and Node are garbage collected languages and it can be
difficult to trigger something to be freed. The convention for these
languages is to have some kind of close method. I added a close method
to both the table and connection which will drop the underlying rust
object.
* We made significant improvements to table creation in
cc5f2136a6
for the `node` SDK. I copied these changes to the `nodejs` SDK.
* The nodejs tables were using fs to create tmp directories and these
were not getting cleaned up. This is mostly harmless but annoying and so
I changed it up a bit to ensure we cleanup tmp directories.
* ~~countRows in the node SDK was returning `bigint`. I changed it to
return `number`~~ (this actually happened in a previous PR)
* Tables and connections now implement `std::fmt::Display` which is
hooked into python's `__repr__`. Node has no concept of a regular "to
string" function and so I added a `display` method.
* Python method signatures are changing so that optional parameters are
always `Optional[foo] = None` instead of something like `foo = False`.
This is because we want those defaults to be in rust whenever possible
(though we still need to mention the default in documentation).
* I changed the python `AsyncConnection/AsyncTable` classes from
abstract classes with a single implementation to just classes because we
no longer have the remote implementation in python.
Note: this does NOT add the `add` function to the remote table. This PR
was already large enough, and the remote implementation is unique
enough, that I am going to do all the remote stuff at a later date (we
should have the structure in place and correct so there shouldn't be any
refactor concerns)
---------
Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -18,11 +18,23 @@ use napi_derive::*;
|
||||
use crate::table::Table;
|
||||
use crate::ConnectionOptions;
|
||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection, CreateTableMode};
|
||||
use lancedb::ipc::ipc_file_to_batches;
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
|
||||
#[napi]
|
||||
pub struct Connection {
|
||||
conn: LanceDBConnection,
|
||||
inner: Option<LanceDBConnection>,
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
pub(crate) fn inner_new(inner: LanceDBConnection) -> Self {
|
||||
Self { inner: Some(inner) }
|
||||
}
|
||||
|
||||
fn get_inner(&self) -> napi::Result<&LanceDBConnection> {
|
||||
self.inner
|
||||
.as_ref()
|
||||
.ok_or_else(|| napi::Error::from_reason("Connection is closed"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
@@ -40,8 +52,8 @@ impl Connection {
|
||||
impl Connection {
|
||||
/// Create a new Connection instance from the given URI.
|
||||
#[napi(factory)]
|
||||
pub async fn new(options: ConnectionOptions) -> napi::Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(&options.uri);
|
||||
pub async fn new(uri: String, options: ConnectionOptions) -> napi::Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(&uri);
|
||||
if let Some(api_key) = options.api_key {
|
||||
builder = builder.api_key(&api_key);
|
||||
}
|
||||
@@ -52,18 +64,33 @@ impl Connection {
|
||||
builder =
|
||||
builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval));
|
||||
}
|
||||
Ok(Self {
|
||||
conn: builder
|
||||
Ok(Self::inner_new(
|
||||
builder
|
||||
.execute()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?,
|
||||
})
|
||||
))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn display(&self) -> napi::Result<String> {
|
||||
Ok(self.get_inner()?.to_string())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn is_open(&self) -> bool {
|
||||
self.inner.is_some()
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn close(&mut self) {
|
||||
self.inner.take();
|
||||
}
|
||||
|
||||
/// List all tables in the dataset.
|
||||
#[napi]
|
||||
pub async fn table_names(&self) -> napi::Result<Vec<String>> {
|
||||
self.conn
|
||||
self.get_inner()?
|
||||
.table_names()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))
|
||||
@@ -86,7 +113,7 @@ impl Connection {
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
let mode = Self::parse_create_mode_str(&mode)?;
|
||||
let tbl = self
|
||||
.conn
|
||||
.get_inner()?
|
||||
.create_table(&name, Box::new(batches))
|
||||
.mode(mode)
|
||||
.execute()
|
||||
@@ -95,10 +122,31 @@ impl Connection {
|
||||
Ok(Table::new(tbl))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn create_empty_table(
|
||||
&self,
|
||||
name: String,
|
||||
schema_buf: Buffer,
|
||||
mode: String,
|
||||
) -> napi::Result<Table> {
|
||||
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
||||
napi::Error::from_reason(format!("Failed to marshal schema from JS to Rust: {}", e))
|
||||
})?;
|
||||
let mode = Self::parse_create_mode_str(&mode)?;
|
||||
let tbl = self
|
||||
.get_inner()?
|
||||
.create_empty_table(&name, schema)
|
||||
.mode(mode)
|
||||
.execute()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
|
||||
Ok(Table::new(tbl))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn open_table(&self, name: String) -> napi::Result<Table> {
|
||||
let tbl = self
|
||||
.conn
|
||||
.get_inner()?
|
||||
.open_table(&name)
|
||||
.execute()
|
||||
.await
|
||||
@@ -109,7 +157,7 @@ impl Connection {
|
||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||
#[napi]
|
||||
pub async fn drop_table(&self, name: String) -> napi::Result<()> {
|
||||
self.conn
|
||||
self.get_inner()?
|
||||
.drop_table(&name)
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))
|
||||
|
||||
@@ -12,7 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Mutex;
|
||||
|
||||
use lance_linalg::distance::MetricType as LanceMetricType;
|
||||
use lancedb::index::IndexBuilder as LanceDbIndexBuilder;
|
||||
use lancedb::Table as LanceDbTable;
|
||||
use napi_derive::napi;
|
||||
|
||||
#[napi]
|
||||
@@ -40,58 +44,93 @@ impl From<MetricType> for LanceMetricType {
|
||||
|
||||
#[napi]
|
||||
pub struct IndexBuilder {
|
||||
inner: lancedb::index::IndexBuilder,
|
||||
inner: Mutex<Option<LanceDbIndexBuilder>>,
|
||||
}
|
||||
|
||||
impl IndexBuilder {
|
||||
fn modify(
|
||||
&self,
|
||||
mod_fn: impl Fn(LanceDbIndexBuilder) -> LanceDbIndexBuilder,
|
||||
) -> napi::Result<()> {
|
||||
let mut inner = self.inner.lock().unwrap();
|
||||
let inner_builder = inner.take().ok_or_else(|| {
|
||||
napi::Error::from_reason("IndexBuilder has already been consumed".to_string())
|
||||
})?;
|
||||
let inner_builder = mod_fn(inner_builder);
|
||||
inner.replace(inner_builder);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl IndexBuilder {
|
||||
pub fn new(tbl: &dyn lancedb::Table) -> Self {
|
||||
pub fn new(tbl: &LanceDbTable) -> Self {
|
||||
let inner = tbl.create_index(&[]);
|
||||
Self { inner }
|
||||
Self {
|
||||
inner: Mutex::new(Some(inner)),
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn replace(&mut self, v: bool) {
|
||||
self.inner.replace(v);
|
||||
pub fn replace(&self, v: bool) -> napi::Result<()> {
|
||||
self.modify(|b| b.replace(v))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn column(&mut self, c: String) {
|
||||
self.inner.columns(&[c.as_str()]);
|
||||
pub fn column(&self, c: String) -> napi::Result<()> {
|
||||
self.modify(|b| b.columns(&[c.as_str()]))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn name(&mut self, name: String) {
|
||||
self.inner.name(name.as_str());
|
||||
pub fn name(&self, name: String) -> napi::Result<()> {
|
||||
self.modify(|b| b.name(name.as_str()))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn ivf_pq(
|
||||
&mut self,
|
||||
pub fn ivf_pq(
|
||||
&self,
|
||||
metric_type: Option<MetricType>,
|
||||
num_partitions: Option<u32>,
|
||||
num_sub_vectors: Option<u32>,
|
||||
num_bits: Option<u32>,
|
||||
max_iterations: Option<u32>,
|
||||
sample_rate: Option<u32>,
|
||||
) {
|
||||
self.inner.ivf_pq();
|
||||
metric_type.map(|m| self.inner.metric_type(m.into()));
|
||||
num_partitions.map(|p| self.inner.num_partitions(p));
|
||||
num_sub_vectors.map(|s| self.inner.num_sub_vectors(s));
|
||||
num_bits.map(|b| self.inner.num_bits(b));
|
||||
max_iterations.map(|i| self.inner.max_iterations(i));
|
||||
sample_rate.map(|s| self.inner.sample_rate(s));
|
||||
) -> napi::Result<()> {
|
||||
self.modify(|b| {
|
||||
let mut b = b.ivf_pq();
|
||||
if let Some(metric_type) = metric_type {
|
||||
b = b.metric_type(metric_type.into());
|
||||
}
|
||||
if let Some(num_partitions) = num_partitions {
|
||||
b = b.num_partitions(num_partitions);
|
||||
}
|
||||
if let Some(num_sub_vectors) = num_sub_vectors {
|
||||
b = b.num_sub_vectors(num_sub_vectors);
|
||||
}
|
||||
if let Some(num_bits) = num_bits {
|
||||
b = b.num_bits(num_bits);
|
||||
}
|
||||
if let Some(max_iterations) = max_iterations {
|
||||
b = b.max_iterations(max_iterations);
|
||||
}
|
||||
if let Some(sample_rate) = sample_rate {
|
||||
b = b.sample_rate(sample_rate);
|
||||
}
|
||||
b
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn scalar(&mut self) {
|
||||
self.inner.scalar();
|
||||
pub fn scalar(&self) -> napi::Result<()> {
|
||||
self.modify(|b| b.scalar())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn build(&self) -> napi::Result<()> {
|
||||
self.inner
|
||||
let inner = self.inner.lock().unwrap().take().ok_or_else(|| {
|
||||
napi::Error::from_reason("IndexBuilder has already been consumed".to_string())
|
||||
})?;
|
||||
inner
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to build index: {}", e)))?;
|
||||
|
||||
@@ -24,7 +24,6 @@ mod table;
|
||||
#[napi(object)]
|
||||
#[derive(Debug)]
|
||||
pub struct ConnectionOptions {
|
||||
pub uri: String,
|
||||
pub api_key: Option<String>,
|
||||
pub host_override: Option<String>,
|
||||
/// (For LanceDB OSS only): The interval, in seconds, at which to check for
|
||||
@@ -54,6 +53,6 @@ pub struct WriteOptions {
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn connect(options: ConnectionOptions) -> napi::Result<Connection> {
|
||||
Connection::new(options).await
|
||||
pub async fn connect(uri: String, options: ConnectionOptions) -> napi::Result<Connection> {
|
||||
Connection::new(uri, options).await
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use lancedb::query::Query as LanceDBQuery;
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::{iterator::RecordBatchIterator, table::Table};
|
||||
use crate::iterator::RecordBatchIterator;
|
||||
|
||||
#[napi]
|
||||
pub struct Query {
|
||||
@@ -25,10 +25,8 @@ pub struct Query {
|
||||
|
||||
#[napi]
|
||||
impl Query {
|
||||
pub fn new(table: &Table) -> Self {
|
||||
Self {
|
||||
inner: table.table.query(),
|
||||
}
|
||||
pub fn new(query: LanceDBQuery) -> Self {
|
||||
Self { inner: query }
|
||||
}
|
||||
|
||||
#[napi]
|
||||
|
||||
@@ -14,10 +14,8 @@
|
||||
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use lance::dataset::ColumnAlteration as LanceColumnAlteration;
|
||||
use lancedb::{
|
||||
ipc::ipc_file_to_batches,
|
||||
table::{AddDataOptions, TableRef},
|
||||
};
|
||||
use lancedb::ipc::ipc_file_to_batches;
|
||||
use lancedb::table::{AddDataMode, Table as LanceDbTable};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
@@ -26,20 +24,52 @@ use crate::query::Query;
|
||||
|
||||
#[napi]
|
||||
pub struct Table {
|
||||
pub(crate) table: TableRef,
|
||||
// We keep a duplicate of the table name so we can use it for error
|
||||
// messages even if the table has been closed
|
||||
name: String,
|
||||
pub(crate) inner: Option<LanceDbTable>,
|
||||
}
|
||||
|
||||
impl Table {
|
||||
fn inner_ref(&self) -> napi::Result<&LanceDbTable> {
|
||||
self.inner
|
||||
.as_ref()
|
||||
.ok_or_else(|| napi::Error::from_reason(format!("Table {} is closed", self.name)))
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Table {
|
||||
pub(crate) fn new(table: TableRef) -> Self {
|
||||
Self { table }
|
||||
pub(crate) fn new(table: LanceDbTable) -> Self {
|
||||
Self {
|
||||
name: table.name().to_string(),
|
||||
inner: Some(table),
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn display(&self) -> String {
|
||||
match &self.inner {
|
||||
None => format!("ClosedTable({})", self.name),
|
||||
Some(inner) => inner.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn is_open(&self) -> bool {
|
||||
self.inner.is_some()
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn close(&mut self) {
|
||||
self.inner.take();
|
||||
}
|
||||
|
||||
/// Return Schema as empty Arrow IPC file.
|
||||
#[napi]
|
||||
pub async fn schema(&self) -> napi::Result<Buffer> {
|
||||
let schema =
|
||||
self.table.schema().await.map_err(|e| {
|
||||
self.inner_ref()?.schema().await.map_err(|e| {
|
||||
napi::Error::from_reason(format!("Failed to create IPC file: {}", e))
|
||||
})?;
|
||||
let mut writer = FileWriter::try_new(vec![], &schema)
|
||||
@@ -53,52 +83,59 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn add(&self, buf: Buffer) -> napi::Result<()> {
|
||||
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<()> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
self.table
|
||||
.add(Box::new(batches), AddDataOptions::default())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to add batches to table {}: {}",
|
||||
self.table, e
|
||||
))
|
||||
})
|
||||
let mut op = self.inner_ref()?.add(Box::new(batches));
|
||||
|
||||
op = if mode == "append" {
|
||||
op.mode(AddDataMode::Append)
|
||||
} else if mode == "overwrite" {
|
||||
op.mode(AddDataMode::Overwrite)
|
||||
} else {
|
||||
return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
|
||||
};
|
||||
|
||||
op.execute().await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to add batches to table {}: {}",
|
||||
self.name, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn count_rows(&self, filter: Option<String>) -> napi::Result<i64> {
|
||||
self.table
|
||||
self.inner_ref()?
|
||||
.count_rows(filter)
|
||||
.await
|
||||
.map(|val| val as i64)
|
||||
.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to count rows in table {}: {}",
|
||||
self.table, e
|
||||
self.name, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn delete(&self, predicate: String) -> napi::Result<()> {
|
||||
self.table.delete(&predicate).await.map_err(|e| {
|
||||
self.inner_ref()?.delete(&predicate).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to delete rows in table {}: predicate={}",
|
||||
self.table, e
|
||||
self.name, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn create_index(&self) -> IndexBuilder {
|
||||
IndexBuilder::new(self.table.as_ref())
|
||||
pub fn create_index(&self) -> napi::Result<IndexBuilder> {
|
||||
Ok(IndexBuilder::new(self.inner_ref()?))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn query(&self) -> Query {
|
||||
Query::new(self)
|
||||
pub fn query(&self) -> napi::Result<Query> {
|
||||
Ok(Query::new(self.inner_ref()?.query()))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
@@ -108,13 +145,13 @@ impl Table {
|
||||
.map(|sql| (sql.name, sql.value_sql))
|
||||
.collect::<Vec<_>>();
|
||||
let transforms = lance::dataset::NewColumnTransform::SqlExpressions(transforms);
|
||||
self.table
|
||||
self.inner_ref()?
|
||||
.add_columns(transforms, None)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to add columns to table {}: {}",
|
||||
self.table, err
|
||||
self.name, err
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
@@ -134,13 +171,13 @@ impl Table {
|
||||
.map(LanceColumnAlteration::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.table
|
||||
self.inner_ref()?
|
||||
.alter_columns(&alterations)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to alter columns in table {}: {}",
|
||||
self.table, err
|
||||
self.name, err
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
@@ -149,12 +186,15 @@ impl Table {
|
||||
#[napi]
|
||||
pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<()> {
|
||||
let col_refs = columns.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
self.table.drop_columns(&col_refs).await.map_err(|err| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to drop columns from table {}: {}",
|
||||
self.table, err
|
||||
))
|
||||
})?;
|
||||
self.inner_ref()?
|
||||
.drop_columns(&col_refs)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to drop columns from table {}: {}",
|
||||
self.name, err
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user