feat: port create_table to the async python API and the remote rust API (#1031)

I've also started `ASYNC_MIGRATION.MD` to keep track of the breaking
changes from sync to async python.
This commit is contained in:
Weston Pace
2024-02-29 13:29:29 -08:00
parent accf31fa92
commit 4299f719ec
29 changed files with 1406 additions and 53 deletions

View File

@@ -0,0 +1,34 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import * as os from "os";
import * as path from "path";
import * as fs from "fs";
import { connect } from "../dist/index.js";
describe("when working with a connection", () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "test-connection"));
it("should fail if creating table twice, unless overwrite is true", async() => {
const db = await connect(tmpDir);
let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
await expect(tbl.countRows()).resolves.toBe(2);
await expect(db.createTable("test", [{ id: 1 }, { id: 2 }])).rejects.toThrow();
tbl = await db.createTable("test", [{ id: 3 }], { mode: "overwrite" });
await expect(tbl.countRows()).resolves.toBe(1);
})
});

View File

@@ -201,17 +201,17 @@ describe("Read consistency interval", () => {
await table.add([{ id: 2 }]);
if (interval === undefined) {
expect(await table2.countRows()).toEqual(1n);
expect(await table2.countRows()).toEqual(1);
// TODO: once we implement time travel we can uncomment this part of the test.
// await table2.checkout_latest();
// expect(await table2.countRows()).toEqual(2);
} else if (interval === 0) {
expect(await table2.countRows()).toEqual(2n);
expect(await table2.countRows()).toEqual(2);
} else {
// interval == 0.1
expect(await table2.countRows()).toEqual(1n);
expect(await table2.countRows()).toEqual(1);
await new Promise(r => setTimeout(r, 100));
expect(await table2.countRows()).toEqual(2n);
expect(await table2.countRows()).toEqual(2);
}
});
});

View File

@@ -0,0 +1,15 @@
{
"extends": "../tsconfig.json",
"compilerOptions": {
"outDir": "./dist/spec",
"module": "commonjs",
"target": "es2022",
"types": [
"jest",
"node"
]
},
"include": [
"**/*",
]
}

View File

@@ -17,6 +17,24 @@ import { Connection as _NativeConnection } from "./native";
import { Table } from "./table";
import { Table as ArrowTable } from "apache-arrow";
export interface CreateTableOptions {
/**
* The mode to use when creating the table.
*
* If this is set to "create" and the table already exists then either
* an error will be thrown or, if existOk is true, then nothing will
* happen. Any provided data will be ignored.
*
* If this is set to "overwrite" then any existing table will be replaced.
*/
mode: "create" | "overwrite";
/**
* If this is true and the table already exists and the mode is "create"
* then no error will be raised.
*/
existOk: boolean;
}
/**
* A LanceDB Connection that allows you to open tables and create new ones.
*
@@ -53,10 +71,18 @@ export class Connection {
*/
async createTable(
name: string,
data: Record<string, unknown>[] | ArrowTable
data: Record<string, unknown>[] | ArrowTable,
options?: Partial<CreateTableOptions>
): Promise<Table> {
let mode: string = options?.mode ?? "create";
const existOk = options?.existOk ?? false;
if (mode === "create" && existOk) {
mode = "exist_ok";
}
const buf = toBuffer(data);
const innerTable = await this.inner.createTable(name, buf);
const innerTable = await this.inner.createTable(name, buf, mode);
return new Table(innerTable);
}

View File

@@ -85,7 +85,7 @@ export class Connection {
* - buf: The buffer containing the IPC file.
*
*/
createTable(name: string, buf: Buffer): Promise<Table>
createTable(name: string, buf: Buffer, mode: string): Promise<Table>
openTable(name: string): Promise<Table>
/** Drop table with the name. Or raise an error if the table does not exist. */
dropTable(name: string): Promise<void>
@@ -117,7 +117,7 @@ export class Table {
/** Return Schema as empty Arrow IPC file. */
schema(): Promise<Buffer>
add(buf: Buffer): Promise<void>
countRows(filter?: string | undefined | null): Promise<bigint>
countRows(filter?: string | undefined | null): Promise<number>
delete(predicate: string): Promise<void>
createIndex(): IndexBuilder
query(): Query

View File

@@ -50,7 +50,7 @@ export class Table {
}
/** Count the total number of rows in the dataset. */
async countRows(filter?: string): Promise<bigint> {
async countRows(filter?: string): Promise<number> {
return await this.inner.countRows(filter);
}

View File

@@ -51,8 +51,7 @@
"docs": "typedoc --plugin typedoc-plugin-markdown lancedb/index.ts",
"lint": "eslint lancedb --ext .js,.ts",
"prepublishOnly": "napi prepublish -t npm",
"//": "maxWorkers=1 is workaround for bigint issue in jest: https://github.com/jestjs/jest/issues/11617#issuecomment-1068732414",
"test": "npm run build && jest --maxWorkers=1",
"test": "npm run build && jest --verbose",
"universal": "napi universal",
"version": "napi version"
},

View File

@@ -17,7 +17,7 @@ use napi_derive::*;
use crate::table::Table;
use crate::ConnectionOptions;
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection, CreateTableMode};
use lancedb::ipc::ipc_file_to_batches;
#[napi]
@@ -25,6 +25,17 @@ pub struct Connection {
conn: LanceDBConnection,
}
impl Connection {
fn parse_create_mode_str(mode: &str) -> napi::Result<CreateTableMode> {
match mode {
"create" => Ok(CreateTableMode::Create),
"overwrite" => Ok(CreateTableMode::Overwrite),
"exist_ok" => Ok(CreateTableMode::exist_ok(|builder| builder)),
_ => Err(napi::Error::from_reason(format!("Invalid mode {}", mode))),
}
}
}
#[napi]
impl Connection {
/// Create a new Connection instance from the given URI.
@@ -65,12 +76,19 @@ impl Connection {
/// - buf: The buffer containing the IPC file.
///
#[napi]
pub async fn create_table(&self, name: String, buf: Buffer) -> napi::Result<Table> {
pub async fn create_table(
&self,
name: String,
buf: Buffer,
mode: String,
) -> napi::Result<Table> {
let batches = ipc_file_to_batches(buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
let mode = Self::parse_create_mode_str(&mode)?;
let tbl = self
.conn
.create_table(&name, Box::new(batches))
.mode(mode)
.execute()
.await
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;

View File

@@ -68,13 +68,17 @@ impl Table {
}
#[napi]
pub async fn count_rows(&self, filter: Option<String>) -> napi::Result<usize> {
self.table.count_rows(filter).await.map_err(|e| {
napi::Error::from_reason(format!(
"Failed to count rows in table {}: {}",
self.table, e
))
})
pub async fn count_rows(&self, filter: Option<String>) -> napi::Result<i64> {
self.table
.count_rows(filter)
.await
.map(|val| val as i64)
.map_err(|e| {
napi::Error::from_reason(format!(
"Failed to count rows in table {}: {}",
self.table, e
))
})
}
#[napi]