mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
2 Commits
python-v0.
...
docs/quick
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e278fc5a6 | ||
|
|
09fed1f286 |
@@ -105,7 +105,8 @@ markdown_extensions:
|
||||
nav:
|
||||
- Home:
|
||||
- LanceDB: index.md
|
||||
- 🏃🏼♂️ Quick start: basic.md
|
||||
- 👉 Quickstart: quickstart.md
|
||||
- 🏃🏼♂️ Basic Usage: basic.md
|
||||
- 📚 Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
@@ -237,7 +238,9 @@ nav:
|
||||
- 👾 JavaScript (lancedb): js/globals.md
|
||||
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
||||
|
||||
- Quick start: basic.md
|
||||
- Getting Started:
|
||||
- Quickstart: quickstart.md
|
||||
- Basic Usage: basic.md
|
||||
- Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Quick start
|
||||
# Basic Usage
|
||||
|
||||
!!! info "LanceDB can be run in a number of ways:"
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ Construct a MergeInsertBuilder. __Internal use only.__
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
execute(data): Promise<MergeResult>
|
||||
execute(data): Promise<MergeStats>
|
||||
```
|
||||
|
||||
Executes the merge insert operation
|
||||
@@ -44,9 +44,9 @@ Executes the merge insert operation
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`MergeResult`](../interfaces/MergeResult.md)>
|
||||
`Promise`<[`MergeStats`](../interfaces/MergeStats.md)>
|
||||
|
||||
the merge result
|
||||
Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ Returns the name of the table
|
||||
### add()
|
||||
|
||||
```ts
|
||||
abstract add(data, options?): Promise<AddResult>
|
||||
abstract add(data, options?): Promise<void>
|
||||
```
|
||||
|
||||
Insert records into this Table.
|
||||
@@ -54,17 +54,14 @@ Insert records into this Table.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`AddResult`](../interfaces/AddResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the new version number of the table
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### addColumns()
|
||||
|
||||
```ts
|
||||
abstract addColumns(newColumnTransforms): Promise<AddColumnsResult>
|
||||
abstract addColumns(newColumnTransforms): Promise<void>
|
||||
```
|
||||
|
||||
Add new columns with defined values.
|
||||
@@ -79,17 +76,14 @@ Add new columns with defined values.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`AddColumnsResult`](../interfaces/AddColumnsResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the new version number of the table after adding the columns.
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### alterColumns()
|
||||
|
||||
```ts
|
||||
abstract alterColumns(columnAlterations): Promise<AlterColumnsResult>
|
||||
abstract alterColumns(columnAlterations): Promise<void>
|
||||
```
|
||||
|
||||
Alter the name or nullability of columns.
|
||||
@@ -102,10 +96,7 @@ Alter the name or nullability of columns.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`AlterColumnsResult`](../interfaces/AlterColumnsResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the new version number of the table after altering the columns.
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
@@ -261,7 +252,7 @@ await table.createIndex("my_float_col");
|
||||
### delete()
|
||||
|
||||
```ts
|
||||
abstract delete(predicate): Promise<DeleteResult>
|
||||
abstract delete(predicate): Promise<void>
|
||||
```
|
||||
|
||||
Delete the rows that satisfy the predicate.
|
||||
@@ -272,10 +263,7 @@ Delete the rows that satisfy the predicate.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DeleteResult`](../interfaces/DeleteResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the new version number of the table
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
@@ -296,7 +284,7 @@ Return a brief description of the table
|
||||
### dropColumns()
|
||||
|
||||
```ts
|
||||
abstract dropColumns(columnNames): Promise<DropColumnsResult>
|
||||
abstract dropColumns(columnNames): Promise<void>
|
||||
```
|
||||
|
||||
Drop one or more columns from the dataset
|
||||
@@ -315,10 +303,7 @@ then call ``cleanup_files`` to remove the old files.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DropColumnsResult`](../interfaces/DropColumnsResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the new version number of the table after dropping the columns.
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
@@ -693,7 +678,7 @@ Return the table as an arrow table
|
||||
#### update(opts)
|
||||
|
||||
```ts
|
||||
abstract update(opts): Promise<UpdateResult>
|
||||
abstract update(opts): Promise<void>
|
||||
```
|
||||
|
||||
Update existing records in the Table
|
||||
@@ -704,10 +689,7 @@ Update existing records in the Table
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||
|
||||
A promise that resolves to an object containing
|
||||
the number of rows updated and the new version number
|
||||
`Promise`<`void`>
|
||||
|
||||
##### Example
|
||||
|
||||
@@ -718,7 +700,7 @@ table.update({where:"x = 2", values:{"vector": [10, 10]}})
|
||||
#### update(opts)
|
||||
|
||||
```ts
|
||||
abstract update(opts): Promise<UpdateResult>
|
||||
abstract update(opts): Promise<void>
|
||||
```
|
||||
|
||||
Update existing records in the Table
|
||||
@@ -729,10 +711,7 @@ Update existing records in the Table
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||
|
||||
A promise that resolves to an object containing
|
||||
the number of rows updated and the new version number
|
||||
`Promise`<`void`>
|
||||
|
||||
##### Example
|
||||
|
||||
@@ -743,7 +722,7 @@ table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
|
||||
#### update(updates, options)
|
||||
|
||||
```ts
|
||||
abstract update(updates, options?): Promise<UpdateResult>
|
||||
abstract update(updates, options?): Promise<void>
|
||||
```
|
||||
|
||||
Update existing records in the Table
|
||||
@@ -766,6 +745,10 @@ repeatedly calilng this method.
|
||||
* **updates**: `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||
the
|
||||
columns to update
|
||||
Keys in the map should specify the name of the column to update.
|
||||
Values in the map provide the new value of the column. These can
|
||||
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
||||
based on the row being updated (e.g. "my_col + 1")
|
||||
|
||||
* **options?**: `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
||||
additional options to control
|
||||
@@ -773,15 +756,7 @@ repeatedly calilng this method.
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||
|
||||
A promise that resolves to an object
|
||||
containing the number of rows updated and the new version number
|
||||
|
||||
Keys in the map should specify the name of the column to update.
|
||||
Values in the map provide the new value of the column. These can
|
||||
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
||||
based on the row being updated (e.g. "my_col + 1")
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -34,18 +34,13 @@
|
||||
|
||||
## Interfaces
|
||||
|
||||
- [AddColumnsResult](interfaces/AddColumnsResult.md)
|
||||
- [AddColumnsSql](interfaces/AddColumnsSql.md)
|
||||
- [AddDataOptions](interfaces/AddDataOptions.md)
|
||||
- [AddResult](interfaces/AddResult.md)
|
||||
- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
|
||||
- [ClientConfig](interfaces/ClientConfig.md)
|
||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||
- [CompactionStats](interfaces/CompactionStats.md)
|
||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||
- [DeleteResult](interfaces/DeleteResult.md)
|
||||
- [DropColumnsResult](interfaces/DropColumnsResult.md)
|
||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
||||
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
||||
@@ -59,7 +54,7 @@
|
||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||
- [MergeResult](interfaces/MergeResult.md)
|
||||
- [MergeStats](interfaces/MergeStats.md)
|
||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||
@@ -70,7 +65,6 @@
|
||||
- [TableStatistics](interfaces/TableStatistics.md)
|
||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||
- [UpdateResult](interfaces/UpdateResult.md)
|
||||
- [Version](interfaces/Version.md)
|
||||
|
||||
## Type Aliases
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / AddColumnsResult
|
||||
|
||||
# Interface: AddColumnsResult
|
||||
|
||||
## Properties
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / AddResult
|
||||
|
||||
# Interface: AddResult
|
||||
|
||||
## Properties
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / AlterColumnsResult
|
||||
|
||||
# Interface: AlterColumnsResult
|
||||
|
||||
## Properties
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DeleteResult
|
||||
|
||||
# Interface: DeleteResult
|
||||
|
||||
## Properties
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DropColumnsResult
|
||||
|
||||
# Interface: DropColumnsResult
|
||||
|
||||
## Properties
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,39 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / MergeResult
|
||||
|
||||
# Interface: MergeResult
|
||||
|
||||
## Properties
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
numDeletedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numInsertedRows
|
||||
|
||||
```ts
|
||||
numInsertedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numUpdatedRows
|
||||
|
||||
```ts
|
||||
numUpdatedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
31
docs/src/js/interfaces/MergeStats.md
Normal file
31
docs/src/js/interfaces/MergeStats.md
Normal file
@@ -0,0 +1,31 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / MergeStats
|
||||
|
||||
# Interface: MergeStats
|
||||
|
||||
## Properties
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
numDeletedRows: bigint;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numInsertedRows
|
||||
|
||||
```ts
|
||||
numInsertedRows: bigint;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numUpdatedRows
|
||||
|
||||
```ts
|
||||
numUpdatedRows: bigint;
|
||||
```
|
||||
@@ -1,23 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / UpdateResult
|
||||
|
||||
# Interface: UpdateResult
|
||||
|
||||
## Properties
|
||||
|
||||
### rowsUpdated
|
||||
|
||||
```ts
|
||||
rowsUpdated: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
101
docs/src/quickstart.md
Normal file
101
docs/src/quickstart.md
Normal file
@@ -0,0 +1,101 @@
|
||||
|
||||
# Getting Started with LanceDB: A Minimal Vector Search Tutorial
|
||||
|
||||
Let's set up a LanceDB database, insert vector data, and perform a simple vector search. We'll use simple character classes like "knight" and "rogue" to illustrate semantic relevance.
|
||||
|
||||
## 1. Install Dependencies
|
||||
|
||||
Before starting, make sure you have the necessary packages:
|
||||
|
||||
```bash
|
||||
pip install lancedb pandas numpy
|
||||
```
|
||||
|
||||
## 2. Import Required Libraries
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
```
|
||||
|
||||
## 3. Connect to LanceDB
|
||||
|
||||
You can use a local directory to store your database:
|
||||
|
||||
```python
|
||||
db = lancedb.connect("./lancedb")
|
||||
```
|
||||
|
||||
## 4. Create Sample Data
|
||||
|
||||
Add sample text data and corresponding 4D vectors:
|
||||
|
||||
```python
|
||||
data = pd.DataFrame([
|
||||
{"id": "1", "vector": [1.0, 0.0, 0.0, 0.0], "text": "knight"},
|
||||
{"id": "2", "vector": [0.9, 0.1, 0.0, 0.0], "text": "warrior"},
|
||||
{"id": "3", "vector": [0.0, 1.0, 0.0, 0.0], "text": "rogue"},
|
||||
{"id": "4", "vector": [0.0, 0.9, 0.1, 0.0], "text": "thief"},
|
||||
{"id": "5", "vector": [0.5, 0.5, 0.0, 0.0], "text": "ranger"},
|
||||
])
|
||||
```
|
||||
|
||||
## 5. Create a Table in LanceDB
|
||||
|
||||
```python
|
||||
table = db.create_table("rpg_classes", data=data, mode="overwrite")
|
||||
```
|
||||
|
||||
Let's see how the table looks:
|
||||
```python
|
||||
print(data)
|
||||
```
|
||||
|
||||
| id | vector | text |
|
||||
|----|--------|------|
|
||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight |
|
||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior |
|
||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue |
|
||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief |
|
||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger |
|
||||
|
||||
|
||||
|
||||
## 6. Perform a Vector Search
|
||||
|
||||
Search for the most similar character classes to our query vector:
|
||||
|
||||
```python
|
||||
# Query as if we are searching for "rogue"
|
||||
results = table.search([0.95, 0.05, 0.0, 0.0]).limit(3).to_df()
|
||||
print(results)
|
||||
```
|
||||
|
||||
This will return the top 3 closest classes to the vector, effectively showing how LanceDB can be used for semantic search.
|
||||
|
||||
| id | vector | text | _distance |
|
||||
|------|------------------------|----------|-----------|
|
||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue | 0.00 |
|
||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief | 0.02 |
|
||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
||||
|
||||
Let's try searching for "knight"
|
||||
|
||||
```python
|
||||
query_vector = [1.0, 0.0, 0.0, 0.0]
|
||||
results = table.search(query_vector).limit(3).to_pandas()
|
||||
print(results)
|
||||
```
|
||||
|
||||
| id | vector | text | _distance |
|
||||
|------|------------------------|----------|-----------|
|
||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight | 0.00 |
|
||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior | 0.02 |
|
||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
||||
|
||||
## Next Steps
|
||||
|
||||
That's it - you just conducted vector search!
|
||||
|
||||
For more beginner tips, check out the [Basic Usage](basic.md) guide.
|
||||
@@ -34,7 +34,6 @@ import {
|
||||
} from "../lancedb/embedding";
|
||||
import { Index } from "../lancedb/indices";
|
||||
import { instanceOfFullTextQuery } from "../lancedb/query";
|
||||
import exp = require("constants");
|
||||
|
||||
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
"Given a table",
|
||||
@@ -96,9 +95,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
});
|
||||
|
||||
it("should overwrite data if asked", async () => {
|
||||
const addRes = await table.add([{ id: 1 }, { id: 2 }]);
|
||||
expect(addRes).toHaveProperty("version");
|
||||
expect(addRes.version).toBe(2);
|
||||
await table.add([{ id: 1 }, { id: 2 }]);
|
||||
await table.add([{ id: 1 }], { mode: "overwrite" });
|
||||
await expect(table.countRows()).resolves.toBe(1);
|
||||
});
|
||||
@@ -114,11 +111,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
await table.add([{ id: 1 }]);
|
||||
expect(await table.countRows("id == 1")).toBe(1);
|
||||
expect(await table.countRows("id == 7")).toBe(0);
|
||||
const updateRes = await table.update({ id: "7" });
|
||||
expect(updateRes).toHaveProperty("version");
|
||||
expect(updateRes.version).toBe(3);
|
||||
expect(updateRes).toHaveProperty("rowsUpdated");
|
||||
expect(updateRes.rowsUpdated).toBe(1);
|
||||
await table.update({ id: "7" });
|
||||
expect(await table.countRows("id == 1")).toBe(0);
|
||||
expect(await table.countRows("id == 7")).toBe(1);
|
||||
await table.add([{ id: 2 }]);
|
||||
@@ -345,16 +338,15 @@ describe("merge insert", () => {
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
const mergeInsertRes = await table
|
||||
const stats = await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newData);
|
||||
expect(mergeInsertRes).toHaveProperty("version");
|
||||
expect(mergeInsertRes.version).toBe(2);
|
||||
expect(mergeInsertRes.numInsertedRows).toBe(1);
|
||||
expect(mergeInsertRes.numUpdatedRows).toBe(2);
|
||||
expect(mergeInsertRes.numDeletedRows).toBe(0);
|
||||
|
||||
expect(stats.numInsertedRows).toBe(1n);
|
||||
expect(stats.numUpdatedRows).toBe(2n);
|
||||
expect(stats.numDeletedRows).toBe(0n);
|
||||
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
@@ -373,12 +365,10 @@ describe("merge insert", () => {
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
const mergeInsertRes = await table
|
||||
await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll({ where: "target.b = 'b'" })
|
||||
.execute(newData);
|
||||
expect(mergeInsertRes).toHaveProperty("version");
|
||||
expect(mergeInsertRes.version).toBe(2);
|
||||
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
@@ -1038,19 +1028,15 @@ describe("schema evolution", function () {
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
// Can create a non-nullable column only through addColumns at the moment.
|
||||
const addColumnsRes = await table.addColumns([
|
||||
await table.addColumns([
|
||||
{ name: "price", valueSql: "cast(10.0 as double)" },
|
||||
]);
|
||||
expect(addColumnsRes).toHaveProperty("version");
|
||||
expect(addColumnsRes.version).toBe(2);
|
||||
expect(await table.schema()).toEqual(schema);
|
||||
|
||||
const alterColumnsRes = await table.alterColumns([
|
||||
await table.alterColumns([
|
||||
{ path: "id", rename: "new_id" },
|
||||
{ path: "price", nullable: true },
|
||||
]);
|
||||
expect(alterColumnsRes).toHaveProperty("version");
|
||||
expect(alterColumnsRes.version).toBe(3);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("new_id", new Int64(), true),
|
||||
@@ -1168,9 +1154,7 @@ describe("schema evolution", function () {
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
const dropColumnsRes = await table.dropColumns(["vector"]);
|
||||
expect(dropColumnsRes).toHaveProperty("version");
|
||||
expect(dropColumnsRes.version).toBe(2);
|
||||
await table.dropColumns(["vector"]);
|
||||
|
||||
const expectedSchema = new Schema([new Field("id", new Int64(), true)]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
|
||||
@@ -28,13 +28,7 @@ export {
|
||||
FragmentSummaryStats,
|
||||
Tags,
|
||||
TagContents,
|
||||
MergeResult,
|
||||
AddResult,
|
||||
AddColumnsResult,
|
||||
AlterColumnsResult,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
UpdateResult,
|
||||
MergeStats,
|
||||
} from "./native.js";
|
||||
|
||||
export {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
||||
import { MergeResult, NativeMergeInsertBuilder } from "./native";
|
||||
import { MergeStats, NativeMergeInsertBuilder } from "./native";
|
||||
|
||||
/** A builder used to create and run a merge insert operation */
|
||||
export class MergeInsertBuilder {
|
||||
@@ -73,9 +73,9 @@ export class MergeInsertBuilder {
|
||||
/**
|
||||
* Executes the merge insert operation
|
||||
*
|
||||
* @returns {Promise<MergeResult>} the merge result
|
||||
* @returns Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
||||
*/
|
||||
async execute(data: Data): Promise<MergeResult> {
|
||||
async execute(data: Data): Promise<MergeStats> {
|
||||
let schema: Schema;
|
||||
if (this.#schema instanceof Promise) {
|
||||
schema = await this.#schema;
|
||||
|
||||
@@ -16,18 +16,12 @@ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||
import { IndexOptions } from "./indices";
|
||||
import { MergeInsertBuilder } from "./merge";
|
||||
import {
|
||||
AddColumnsResult,
|
||||
AddColumnsSql,
|
||||
AddResult,
|
||||
AlterColumnsResult,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
IndexConfig,
|
||||
IndexStatistics,
|
||||
OptimizeStats,
|
||||
TableStatistics,
|
||||
Tags,
|
||||
UpdateResult,
|
||||
Table as _NativeTable,
|
||||
} from "./native";
|
||||
import {
|
||||
@@ -132,19 +126,12 @@ export abstract class Table {
|
||||
/**
|
||||
* Insert records into this Table.
|
||||
* @param {Data} data Records to be inserted into the Table
|
||||
* @returns {Promise<AddResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table
|
||||
*/
|
||||
abstract add(
|
||||
data: Data,
|
||||
options?: Partial<AddDataOptions>,
|
||||
): Promise<AddResult>;
|
||||
abstract add(data: Data, options?: Partial<AddDataOptions>): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
* @param opts.values The values to update. The keys are the column names and the values
|
||||
* are the values to set.
|
||||
* @returns {Promise<UpdateResult>} A promise that resolves to an object containing
|
||||
* the number of rows updated and the new version number
|
||||
* @example
|
||||
* ```ts
|
||||
* table.update({where:"x = 2", values:{"vector": [10, 10]}})
|
||||
@@ -154,13 +141,11 @@ export abstract class Table {
|
||||
opts: {
|
||||
values: Map<string, IntoSql> | Record<string, IntoSql>;
|
||||
} & Partial<UpdateOptions>,
|
||||
): Promise<UpdateResult>;
|
||||
): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
* @param opts.valuesSql The values to update. The keys are the column names and the values
|
||||
* are the values to set. The values are SQL expressions.
|
||||
* @returns {Promise<UpdateResult>} A promise that resolves to an object containing
|
||||
* the number of rows updated and the new version number
|
||||
* @example
|
||||
* ```ts
|
||||
* table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
|
||||
@@ -170,7 +155,7 @@ export abstract class Table {
|
||||
opts: {
|
||||
valuesSql: Map<string, string> | Record<string, string>;
|
||||
} & Partial<UpdateOptions>,
|
||||
): Promise<UpdateResult>;
|
||||
): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
*
|
||||
@@ -188,8 +173,6 @@ export abstract class Table {
|
||||
* repeatedly calilng this method.
|
||||
* @param {Map<string, string> | Record<string, string>} updates - the
|
||||
* columns to update
|
||||
* @returns {Promise<UpdateResult>} A promise that resolves to an object
|
||||
* containing the number of rows updated and the new version number
|
||||
*
|
||||
* Keys in the map should specify the name of the column to update.
|
||||
* Values in the map provide the new value of the column. These can
|
||||
@@ -201,16 +184,12 @@ export abstract class Table {
|
||||
abstract update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<UpdateResult>;
|
||||
): Promise<void>;
|
||||
|
||||
/** Count the total number of rows in the dataset. */
|
||||
abstract countRows(filter?: string): Promise<number>;
|
||||
/**
|
||||
* Delete the rows that satisfy the predicate.
|
||||
* @returns {Promise<DeleteResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table
|
||||
*/
|
||||
abstract delete(predicate: string): Promise<DeleteResult>;
|
||||
/** Delete the rows that satisfy the predicate. */
|
||||
abstract delete(predicate: string): Promise<void>;
|
||||
/**
|
||||
* Create an index to speed up queries.
|
||||
*
|
||||
@@ -364,23 +343,15 @@ export abstract class Table {
|
||||
* the SQL expression to use to calculate the value of the new column. These
|
||||
* expressions will be evaluated for each row in the table, and can
|
||||
* reference existing columns in the table.
|
||||
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table after adding the columns.
|
||||
*/
|
||||
abstract addColumns(
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
): Promise<AddColumnsResult>;
|
||||
abstract addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Alter the name or nullability of columns.
|
||||
* @param {ColumnAlteration[]} columnAlterations One or more alterations to
|
||||
* apply to columns.
|
||||
* @returns {Promise<AlterColumnsResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table after altering the columns.
|
||||
*/
|
||||
abstract alterColumns(
|
||||
columnAlterations: ColumnAlteration[],
|
||||
): Promise<AlterColumnsResult>;
|
||||
abstract alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
|
||||
/**
|
||||
* Drop one or more columns from the dataset
|
||||
*
|
||||
@@ -391,10 +362,8 @@ export abstract class Table {
|
||||
* @param {string[]} columnNames The names of the columns to drop. These can
|
||||
* be nested column references (e.g. "a.b.c") or top-level column names
|
||||
* (e.g. "a").
|
||||
* @returns {Promise<DropColumnsResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table after dropping the columns.
|
||||
*/
|
||||
abstract dropColumns(columnNames: string[]): Promise<DropColumnsResult>;
|
||||
abstract dropColumns(columnNames: string[]): Promise<void>;
|
||||
/** Retrieve the version of the table */
|
||||
|
||||
abstract version(): Promise<number>;
|
||||
@@ -560,12 +529,12 @@ export class LocalTable extends Table {
|
||||
return tbl.schema;
|
||||
}
|
||||
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<AddResult> {
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
||||
const mode = options?.mode ?? "append";
|
||||
const schema = await this.schema();
|
||||
|
||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||
return await this.inner.add(buffer, mode);
|
||||
await this.inner.add(buffer, mode);
|
||||
}
|
||||
|
||||
async update(
|
||||
@@ -578,7 +547,7 @@ export class LocalTable extends Table {
|
||||
valuesSql: Map<string, string> | Record<string, string>;
|
||||
} & Partial<UpdateOptions>),
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<UpdateResult> {
|
||||
) {
|
||||
const isValues =
|
||||
"values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
|
||||
const isValuesSql =
|
||||
@@ -625,15 +594,15 @@ export class LocalTable extends Table {
|
||||
columns = Object.entries(optsOrUpdates as Record<string, string>);
|
||||
predicate = options?.where;
|
||||
}
|
||||
return await this.inner.update(predicate, columns);
|
||||
await this.inner.update(predicate, columns);
|
||||
}
|
||||
|
||||
async countRows(filter?: string): Promise<number> {
|
||||
return await this.inner.countRows(filter);
|
||||
}
|
||||
|
||||
async delete(predicate: string): Promise<DeleteResult> {
|
||||
return await this.inner.delete(predicate);
|
||||
async delete(predicate: string): Promise<void> {
|
||||
await this.inner.delete(predicate);
|
||||
}
|
||||
|
||||
async createIndex(column: string, options?: Partial<IndexOptions>) {
|
||||
@@ -721,15 +690,11 @@ export class LocalTable extends Table {
|
||||
|
||||
// TODO: Support BatchUDF
|
||||
|
||||
async addColumns(
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
): Promise<AddColumnsResult> {
|
||||
return await this.inner.addColumns(newColumnTransforms);
|
||||
async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
|
||||
await this.inner.addColumns(newColumnTransforms);
|
||||
}
|
||||
|
||||
async alterColumns(
|
||||
columnAlterations: ColumnAlteration[],
|
||||
): Promise<AlterColumnsResult> {
|
||||
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||
const processedAlterations = columnAlterations.map((alteration) => {
|
||||
if (typeof alteration.dataType === "string") {
|
||||
return {
|
||||
@@ -750,11 +715,11 @@ export class LocalTable extends Table {
|
||||
}
|
||||
});
|
||||
|
||||
return await this.inner.alterColumns(processedAlterations);
|
||||
await this.inner.alterColumns(processedAlterations);
|
||||
}
|
||||
|
||||
async dropColumns(columnNames: string[]): Promise<DropColumnsResult> {
|
||||
return await this.inner.dropColumns(columnNames);
|
||||
async dropColumns(columnNames: string[]): Promise<void> {
|
||||
await this.inner.dropColumns(columnNames);
|
||||
}
|
||||
|
||||
async version(): Promise<number> {
|
||||
|
||||
@@ -5,7 +5,7 @@ use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeIns
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::{error::convert_error, table::MergeResult};
|
||||
use crate::error::convert_error;
|
||||
|
||||
#[napi]
|
||||
#[derive(Clone)]
|
||||
@@ -37,7 +37,7 @@ impl NativeMergeInsertBuilder {
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
|
||||
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeStats> {
|
||||
let data = ipc_file_to_batches(buf.to_vec())
|
||||
.and_then(IntoArrow::into_arrow)
|
||||
.map_err(|e| {
|
||||
@@ -46,13 +46,14 @@ impl NativeMergeInsertBuilder {
|
||||
|
||||
let this = self.clone();
|
||||
|
||||
let res = this.inner.execute(data).await.map_err(|e| {
|
||||
let stats = this.inner.execute(data).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to execute merge insert: {}",
|
||||
convert_error(&e)
|
||||
))
|
||||
})?;
|
||||
Ok(res.into())
|
||||
|
||||
Ok(stats.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,3 +62,20 @@ impl From<MergeInsertBuilder> for NativeMergeInsertBuilder {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct MergeStats {
|
||||
pub num_inserted_rows: BigInt,
|
||||
pub num_updated_rows: BigInt,
|
||||
pub num_deleted_rows: BigInt,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::MergeStats> for MergeStats {
|
||||
fn from(stats: lancedb::table::MergeStats) -> Self {
|
||||
Self {
|
||||
num_inserted_rows: stats.num_inserted_rows.into(),
|
||||
num_updated_rows: stats.num_updated_rows.into(),
|
||||
num_deleted_rows: stats.num_deleted_rows.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
|
||||
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<()> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
let mut op = self.inner_ref()?.add(batches);
|
||||
@@ -88,8 +88,7 @@ impl Table {
|
||||
return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
|
||||
};
|
||||
|
||||
let res = op.execute().await.default_error()?;
|
||||
Ok(res.into())
|
||||
op.execute().await.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
@@ -102,9 +101,8 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn delete(&self, predicate: String) -> napi::Result<DeleteResult> {
|
||||
let res = self.inner_ref()?.delete(&predicate).await.default_error()?;
|
||||
Ok(res.into())
|
||||
pub async fn delete(&self, predicate: String) -> napi::Result<()> {
|
||||
self.inner_ref()?.delete(&predicate).await.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
@@ -170,7 +168,7 @@ impl Table {
|
||||
&self,
|
||||
only_if: Option<String>,
|
||||
columns: Vec<(String, String)>,
|
||||
) -> napi::Result<UpdateResult> {
|
||||
) -> napi::Result<u64> {
|
||||
let mut op = self.inner_ref()?.update();
|
||||
if let Some(only_if) = only_if {
|
||||
op = op.only_if(only_if);
|
||||
@@ -178,8 +176,7 @@ impl Table {
|
||||
for (column_name, value) in columns {
|
||||
op = op.column(column_name, value);
|
||||
}
|
||||
let res = op.execute().await.default_error()?;
|
||||
Ok(res.into())
|
||||
op.execute().await.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
@@ -193,28 +190,21 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn add_columns(
|
||||
&self,
|
||||
transforms: Vec<AddColumnsSql>,
|
||||
) -> napi::Result<AddColumnsResult> {
|
||||
pub async fn add_columns(&self, transforms: Vec<AddColumnsSql>) -> napi::Result<()> {
|
||||
let transforms = transforms
|
||||
.into_iter()
|
||||
.map(|sql| (sql.name, sql.value_sql))
|
||||
.collect::<Vec<_>>();
|
||||
let transforms = NewColumnTransform::SqlExpressions(transforms);
|
||||
let res = self
|
||||
.inner_ref()?
|
||||
self.inner_ref()?
|
||||
.add_columns(transforms, None)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(res.into())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn alter_columns(
|
||||
&self,
|
||||
alterations: Vec<ColumnAlteration>,
|
||||
) -> napi::Result<AlterColumnsResult> {
|
||||
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
||||
for alteration in &alterations {
|
||||
if alteration.rename.is_none()
|
||||
&& alteration.nullable.is_none()
|
||||
@@ -231,23 +221,21 @@ impl Table {
|
||||
.collect::<std::result::Result<Vec<_>, String>>()
|
||||
.map_err(napi::Error::from_reason)?;
|
||||
|
||||
let res = self
|
||||
.inner_ref()?
|
||||
self.inner_ref()?
|
||||
.alter_columns(&alterations)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(res.into())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<DropColumnsResult> {
|
||||
pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<()> {
|
||||
let col_refs = columns.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
let res = self
|
||||
.inner_ref()?
|
||||
self.inner_ref()?
|
||||
.drop_columns(&col_refs)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(res.into())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
@@ -654,105 +642,6 @@ pub struct Version {
|
||||
pub metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct UpdateResult {
|
||||
pub rows_updated: i64,
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::UpdateResult> for UpdateResult {
|
||||
fn from(value: lancedb::table::UpdateResult) -> Self {
|
||||
Self {
|
||||
rows_updated: value.rows_updated as i64,
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct AddResult {
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AddResult> for AddResult {
|
||||
fn from(value: lancedb::table::AddResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DeleteResult {
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
fn from(value: lancedb::table::DeleteResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct MergeResult {
|
||||
pub version: i64,
|
||||
pub num_inserted_rows: i64,
|
||||
pub num_updated_rows: i64,
|
||||
pub num_deleted_rows: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
fn from(value: lancedb::table::MergeResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
num_inserted_rows: value.num_inserted_rows as i64,
|
||||
num_updated_rows: value.num_updated_rows as i64,
|
||||
num_deleted_rows: value.num_deleted_rows as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct AddColumnsResult {
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AddColumnsResult> for AddColumnsResult {
|
||||
fn from(value: lancedb::table::AddColumnsResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct AlterColumnsResult {
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
|
||||
fn from(value: lancedb::table::AlterColumnsResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DropColumnsResult {
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DropColumnsResult> for DropColumnsResult {
|
||||
fn from(value: lancedb::table::DropColumnsResult) -> Self {
|
||||
Self {
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct TagContents {
|
||||
pub version: i64,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.22.1-beta.2"
|
||||
current_version = "0.22.1-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.22.1-beta.2"
|
||||
version = "0.22.1-beta.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -36,10 +36,8 @@ class Table:
|
||||
async def schema(self) -> pa.Schema: ...
|
||||
async def add(
|
||||
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
|
||||
) -> AddResult: ...
|
||||
async def update(
|
||||
self, updates: Dict[str, str], where: Optional[str]
|
||||
) -> UpdateResult: ...
|
||||
) -> None: ...
|
||||
async def update(self, updates: Dict[str, str], where: Optional[str]) -> None: ...
|
||||
async def count_rows(self, filter: Optional[str]) -> int: ...
|
||||
async def create_index(
|
||||
self,
|
||||
@@ -53,12 +51,10 @@ class Table:
|
||||
async def checkout_latest(self): ...
|
||||
async def restore(self, version: Optional[int] = None): ...
|
||||
async def list_indices(self) -> list[IndexConfig]: ...
|
||||
async def delete(self, filter: str) -> DeleteResult: ...
|
||||
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
||||
async def add_columns_with_schema(self, schema: pa.Schema) -> AddColumnsResult: ...
|
||||
async def alter_columns(
|
||||
self, columns: list[dict[str, Any]]
|
||||
) -> AlterColumnsResult: ...
|
||||
async def delete(self, filter: str): ...
|
||||
async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
|
||||
async def add_columns_with_schema(self, schema: pa.Schema) -> None: ...
|
||||
async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
|
||||
async def optimize(
|
||||
self,
|
||||
*,
|
||||
@@ -212,28 +208,3 @@ class OptimizeStats:
|
||||
class Tag(TypedDict):
|
||||
version: int
|
||||
manifest_size: int
|
||||
|
||||
class AddResult:
|
||||
version: int
|
||||
|
||||
class DeleteResult:
|
||||
version: int
|
||||
|
||||
class UpdateResult:
|
||||
rows_updated: int
|
||||
version: int
|
||||
|
||||
class MergeResult:
|
||||
version: int
|
||||
num_updated_rows: int
|
||||
num_inserted_rows: int
|
||||
num_deleted_rows: int
|
||||
|
||||
class AddColumnsResult:
|
||||
version: int
|
||||
|
||||
class AlterColumnsResult:
|
||||
version: int
|
||||
|
||||
class DropColumnsResult:
|
||||
version: int
|
||||
|
||||
@@ -8,9 +8,6 @@ from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .common import DATA
|
||||
from ._lancedb import (
|
||||
MergeInsertResult,
|
||||
)
|
||||
|
||||
|
||||
class LanceMergeInsertBuilder(object):
|
||||
@@ -81,7 +78,7 @@ class LanceMergeInsertBuilder(object):
|
||||
new_data: DATA,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> MergeInsertResult:
|
||||
):
|
||||
"""
|
||||
Executes the merge insert operation
|
||||
|
||||
@@ -98,10 +95,5 @@ class LanceMergeInsertBuilder(object):
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
Returns
|
||||
-------
|
||||
MergeInsertResult
|
||||
version: the new version number of the table after doing merge insert.
|
||||
"""
|
||||
return self._table._do_merge(self, new_data, on_bad_vectors, fill_value)
|
||||
|
||||
@@ -415,7 +415,6 @@ class LanceModel(pydantic.BaseModel):
|
||||
>>> table.add([
|
||||
... TestModel(name="test", vector=[1.0, 2.0])
|
||||
... ])
|
||||
AddResult(version=2)
|
||||
>>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
|
||||
[TestModel(name='test', vector=FixedSizeList(dim=2))]
|
||||
"""
|
||||
|
||||
@@ -7,16 +7,7 @@ from functools import cached_property
|
||||
from typing import Dict, Iterable, List, Optional, Union, Literal
|
||||
import warnings
|
||||
|
||||
from lancedb._lancedb import (
|
||||
AddColumnsResult,
|
||||
AddResult,
|
||||
AlterColumnsResult,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
IndexConfig,
|
||||
MergeResult,
|
||||
UpdateResult,
|
||||
)
|
||||
from lancedb._lancedb import IndexConfig
|
||||
from lancedb.embeddings.base import EmbeddingFunctionConfig
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
|
||||
from lancedb.remote.db import LOOP
|
||||
@@ -272,7 +263,7 @@ class RemoteTable(Table):
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> AddResult:
|
||||
) -> int:
|
||||
"""Add more data to the [Table](Table). It has the same API signature as
|
||||
the OSS version.
|
||||
|
||||
@@ -295,12 +286,8 @@ class RemoteTable(Table):
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddResult
|
||||
An object containing the new version number of the table after adding data.
|
||||
"""
|
||||
return LOOP.run(
|
||||
LOOP.run(
|
||||
self._table.add(
|
||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
@@ -426,12 +413,10 @@ class RemoteTable(Table):
|
||||
new_data: DATA,
|
||||
on_bad_vectors: str,
|
||||
fill_value: float,
|
||||
) -> MergeResult:
|
||||
return LOOP.run(
|
||||
self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
|
||||
)
|
||||
):
|
||||
LOOP.run(self._table._do_merge(merge, new_data, on_bad_vectors, fill_value))
|
||||
|
||||
def delete(self, predicate: str) -> DeleteResult:
|
||||
def delete(self, predicate: str):
|
||||
"""Delete rows from the table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
@@ -446,11 +431,6 @@ class RemoteTable(Table):
|
||||
|
||||
The filter must not be empty, or it will error.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DeleteResult
|
||||
An object containing the new version number of the table after deletion.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
@@ -483,7 +463,7 @@ class RemoteTable(Table):
|
||||
x vector _distance # doctest: +SKIP
|
||||
0 2 [3.0, 4.0] 85.0 # doctest: +SKIP
|
||||
"""
|
||||
return LOOP.run(self._table.delete(predicate))
|
||||
LOOP.run(self._table.delete(predicate))
|
||||
|
||||
def update(
|
||||
self,
|
||||
@@ -491,7 +471,7 @@ class RemoteTable(Table):
|
||||
values: Optional[dict] = None,
|
||||
*,
|
||||
values_sql: Optional[Dict[str, str]] = None,
|
||||
) -> UpdateResult:
|
||||
):
|
||||
"""
|
||||
This can be used to update zero to all rows depending on how many
|
||||
rows match the where clause.
|
||||
@@ -509,12 +489,6 @@ class RemoteTable(Table):
|
||||
reference existing columns. For example, {"x": "x + 1"} will increment
|
||||
the x column by 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
UpdateResult
|
||||
- rows_updated: The number of rows that were updated
|
||||
- version: The new version number of the table after the update
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
@@ -539,7 +513,7 @@ class RemoteTable(Table):
|
||||
2 2 [10.0, 10.0] # doctest: +SKIP
|
||||
|
||||
"""
|
||||
return LOOP.run(
|
||||
LOOP.run(
|
||||
self._table.update(where=where, updates=values, updates_sql=values_sql)
|
||||
)
|
||||
|
||||
@@ -587,15 +561,13 @@ class RemoteTable(Table):
|
||||
def count_rows(self, filter: Optional[str] = None) -> int:
|
||||
return LOOP.run(self._table.count_rows(filter))
|
||||
|
||||
def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
|
||||
def add_columns(self, transforms: Dict[str, str]):
|
||||
return LOOP.run(self._table.add_columns(transforms))
|
||||
|
||||
def alter_columns(
|
||||
self, *alterations: Iterable[Dict[str, str]]
|
||||
) -> AlterColumnsResult:
|
||||
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||
return LOOP.run(self._table.alter_columns(*alterations))
|
||||
|
||||
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
|
||||
def drop_columns(self, columns: Iterable[str]):
|
||||
return LOOP.run(self._table.drop_columns(columns))
|
||||
|
||||
def drop_index(self, index_name: str):
|
||||
|
||||
@@ -78,13 +78,6 @@ if TYPE_CHECKING:
|
||||
CleanupStats,
|
||||
CompactionStats,
|
||||
Tag,
|
||||
AddColumnsResult,
|
||||
AddResult,
|
||||
AlterColumnsResult,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
MergeResult,
|
||||
UpdateResult,
|
||||
)
|
||||
from .db import LanceDBConnection
|
||||
from .index import IndexConfig
|
||||
@@ -557,7 +550,6 @@ class Table(ABC):
|
||||
Can append new data with [Table.add()][lancedb.table.Table.add].
|
||||
|
||||
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
||||
AddResult(version=2)
|
||||
|
||||
Can query the table with [Table.search][lancedb.table.Table.search].
|
||||
|
||||
@@ -902,7 +894,7 @@ class Table(ABC):
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> AddResult:
|
||||
):
|
||||
"""Add more data to the [Table](Table).
|
||||
|
||||
Parameters
|
||||
@@ -924,10 +916,6 @@ class Table(ABC):
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddResult
|
||||
An object containing the new version number of the table after adding data.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -974,12 +962,12 @@ class Table(ABC):
|
||||
>>> table = db.create_table("my_table", data)
|
||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||
>>> # Perform a "upsert" operation
|
||||
>>> res = table.merge_insert("a") \\
|
||||
>>> stats = table.merge_insert("a") \\
|
||||
... .when_matched_update_all() \\
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
>>> stats
|
||||
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
@@ -988,7 +976,7 @@ class Table(ABC):
|
||||
1 2 x
|
||||
2 3 y
|
||||
3 4 z
|
||||
""" # noqa: E501
|
||||
"""
|
||||
on = [on] if isinstance(on, str) else list(iter(on))
|
||||
|
||||
return LanceMergeInsertBuilder(self, on)
|
||||
@@ -1103,10 +1091,10 @@ class Table(ABC):
|
||||
new_data: DATA,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
) -> MergeResult: ...
|
||||
): ...
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, where: str) -> DeleteResult:
|
||||
def delete(self, where: str):
|
||||
"""Delete rows from the table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
@@ -1121,11 +1109,6 @@ class Table(ABC):
|
||||
|
||||
The filter must not be empty, or it will error.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DeleteResult
|
||||
An object containing the new version number of the table after deletion.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
@@ -1142,7 +1125,6 @@ class Table(ABC):
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
DeleteResult(version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
@@ -1156,7 +1138,6 @@ class Table(ABC):
|
||||
>>> to_remove
|
||||
'1, 5'
|
||||
>>> table.delete(f"x IN ({to_remove})")
|
||||
DeleteResult(version=3)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 3 [5.0, 6.0]
|
||||
@@ -1170,7 +1151,7 @@ class Table(ABC):
|
||||
values: Optional[dict] = None,
|
||||
*,
|
||||
values_sql: Optional[Dict[str, str]] = None,
|
||||
) -> UpdateResult:
|
||||
):
|
||||
"""
|
||||
This can be used to update zero to all rows depending on how many
|
||||
rows match the where clause. If no where clause is provided, then
|
||||
@@ -1192,12 +1173,6 @@ class Table(ABC):
|
||||
reference existing columns. For example, {"x": "x + 1"} will increment
|
||||
the x column by 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
UpdateResult
|
||||
- rows_updated: The number of rows that were updated
|
||||
- version: The new version number of the table after the update
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
@@ -1211,14 +1186,12 @@ class Table(ABC):
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.update(where="x = 2", values={"vector": [10.0, 10]})
|
||||
UpdateResult(rows_updated=1, version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 3 [5.0, 6.0]
|
||||
2 2 [10.0, 10.0]
|
||||
>>> table.update(values_sql={"x": "x + 1"})
|
||||
UpdateResult(rows_updated=3, version=3)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 2 [1.0, 2.0]
|
||||
@@ -1381,11 +1354,6 @@ class Table(ABC):
|
||||
Alternatively, a pyarrow Field or Schema can be provided to add
|
||||
new columns with the specified data types. The new columns will
|
||||
be initialized with null values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddColumnsResult
|
||||
version: the new version number of the table after adding columns.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1411,15 +1379,10 @@ class Table(ABC):
|
||||
nullability is not changed. Only non-nullable columns can be changed
|
||||
to nullable. Currently, you cannot change a nullable column to
|
||||
non-nullable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AlterColumnsResult
|
||||
version: the new version number of the table after the alteration.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
|
||||
def drop_columns(self, columns: Iterable[str]):
|
||||
"""
|
||||
Drop columns from the table.
|
||||
|
||||
@@ -1427,11 +1390,6 @@ class Table(ABC):
|
||||
----------
|
||||
columns : Iterable[str]
|
||||
The names of the columns to drop.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropColumnsResult
|
||||
version: the new version number of the table dropping the columns.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1653,7 +1611,6 @@ class LanceTable(Table):
|
||||
... [{"vector": [1.1, 0.9], "type": "vector"}])
|
||||
>>> table.tags.create("v1", table.version)
|
||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
||||
AddResult(version=2)
|
||||
>>> tags = table.tags.list()
|
||||
>>> print(tags["v1"]["version"])
|
||||
1
|
||||
@@ -1692,7 +1649,6 @@ class LanceTable(Table):
|
||||
vector type
|
||||
0 [1.1, 0.9] vector
|
||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
||||
AddResult(version=2)
|
||||
>>> table.version
|
||||
2
|
||||
>>> table.checkout(1)
|
||||
@@ -1735,7 +1691,6 @@ class LanceTable(Table):
|
||||
vector type
|
||||
0 [1.1, 0.9] vector
|
||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
||||
AddResult(version=2)
|
||||
>>> table.version
|
||||
2
|
||||
>>> table.restore(1)
|
||||
@@ -2100,7 +2055,7 @@ class LanceTable(Table):
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> AddResult:
|
||||
):
|
||||
"""Add data to the table.
|
||||
If vector columns are missing and the table
|
||||
has embedding functions, then the vector columns
|
||||
@@ -2124,7 +2079,7 @@ class LanceTable(Table):
|
||||
int
|
||||
The number of vectors in the table.
|
||||
"""
|
||||
return LOOP.run(
|
||||
LOOP.run(
|
||||
self._table.add(
|
||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
@@ -2454,8 +2409,8 @@ class LanceTable(Table):
|
||||
)
|
||||
return self
|
||||
|
||||
def delete(self, where: str) -> DeleteResult:
|
||||
return LOOP.run(self._table.delete(where))
|
||||
def delete(self, where: str):
|
||||
LOOP.run(self._table.delete(where))
|
||||
|
||||
def update(
|
||||
self,
|
||||
@@ -2463,7 +2418,7 @@ class LanceTable(Table):
|
||||
values: Optional[dict] = None,
|
||||
*,
|
||||
values_sql: Optional[Dict[str, str]] = None,
|
||||
) -> UpdateResult:
|
||||
):
|
||||
"""
|
||||
This can be used to update zero to all rows depending on how many
|
||||
rows match the where clause.
|
||||
@@ -2481,12 +2436,6 @@ class LanceTable(Table):
|
||||
reference existing columns. For example, {"x": "x + 1"} will increment
|
||||
the x column by 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
UpdateResult
|
||||
- rows_updated: The number of rows that were updated
|
||||
- version: The new version number of the table after the update
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
@@ -2500,7 +2449,6 @@ class LanceTable(Table):
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.update(where="x = 2", values={"vector": [10.0, 10]})
|
||||
UpdateResult(rows_updated=1, version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
@@ -2508,7 +2456,7 @@ class LanceTable(Table):
|
||||
2 2 [10.0, 10.0]
|
||||
|
||||
"""
|
||||
return LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
||||
LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
||||
|
||||
def _execute_query(
|
||||
self,
|
||||
@@ -2542,7 +2490,7 @@ class LanceTable(Table):
|
||||
new_data: DATA,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
) -> MergeResult:
|
||||
):
|
||||
return LOOP.run(
|
||||
self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
|
||||
)
|
||||
@@ -2687,16 +2635,14 @@ class LanceTable(Table):
|
||||
|
||||
def add_columns(
|
||||
self, transforms: Dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
||||
) -> AddColumnsResult:
|
||||
return LOOP.run(self._table.add_columns(transforms))
|
||||
):
|
||||
LOOP.run(self._table.add_columns(transforms))
|
||||
|
||||
def alter_columns(
|
||||
self, *alterations: Iterable[Dict[str, str]]
|
||||
) -> AlterColumnsResult:
|
||||
return LOOP.run(self._table.alter_columns(*alterations))
|
||||
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||
LOOP.run(self._table.alter_columns(*alterations))
|
||||
|
||||
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
|
||||
return LOOP.run(self._table.drop_columns(columns))
|
||||
def drop_columns(self, columns: Iterable[str]):
|
||||
LOOP.run(self._table.drop_columns(columns))
|
||||
|
||||
def uses_v2_manifest_paths(self) -> bool:
|
||||
"""
|
||||
@@ -3251,7 +3197,7 @@ class AsyncTable:
|
||||
mode: Optional[Literal["append", "overwrite"]] = "append",
|
||||
on_bad_vectors: Optional[OnBadVectorsType] = None,
|
||||
fill_value: Optional[float] = None,
|
||||
) -> AddResult:
|
||||
):
|
||||
"""Add more data to the [Table](Table).
|
||||
|
||||
Parameters
|
||||
@@ -3290,7 +3236,7 @@ class AsyncTable:
|
||||
if isinstance(data, pa.Table):
|
||||
data = data.to_reader()
|
||||
|
||||
return await self._inner.add(data, mode or "append")
|
||||
await self._inner.add(data, mode or "append")
|
||||
|
||||
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
||||
"""
|
||||
@@ -3335,12 +3281,12 @@ class AsyncTable:
|
||||
>>> table = db.create_table("my_table", data)
|
||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||
>>> # Perform a "upsert" operation
|
||||
>>> res = table.merge_insert("a") \\
|
||||
>>> stats = table.merge_insert("a") \\
|
||||
... .when_matched_update_all() \\
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
>>> stats
|
||||
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
@@ -3349,7 +3295,7 @@ class AsyncTable:
|
||||
1 2 x
|
||||
2 3 y
|
||||
3 4 z
|
||||
""" # noqa: E501
|
||||
"""
|
||||
on = [on] if isinstance(on, str) else list(iter(on))
|
||||
|
||||
return LanceMergeInsertBuilder(self, on)
|
||||
@@ -3680,7 +3626,7 @@ class AsyncTable:
|
||||
new_data: DATA,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
) -> MergeResult:
|
||||
):
|
||||
schema = await self.schema()
|
||||
if on_bad_vectors is None:
|
||||
on_bad_vectors = "error"
|
||||
@@ -3708,7 +3654,7 @@ class AsyncTable:
|
||||
),
|
||||
)
|
||||
|
||||
async def delete(self, where: str) -> DeleteResult:
|
||||
async def delete(self, where: str):
|
||||
"""Delete rows from the table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
@@ -3739,7 +3685,6 @@ class AsyncTable:
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
DeleteResult(version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
@@ -3753,7 +3698,6 @@ class AsyncTable:
|
||||
>>> to_remove
|
||||
'1, 5'
|
||||
>>> table.delete(f"x IN ({to_remove})")
|
||||
DeleteResult(version=3)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 3 [5.0, 6.0]
|
||||
@@ -3766,7 +3710,7 @@ class AsyncTable:
|
||||
*,
|
||||
where: Optional[str] = None,
|
||||
updates_sql: Optional[Dict[str, str]] = None,
|
||||
) -> UpdateResult:
|
||||
):
|
||||
"""
|
||||
This can be used to update zero to all rows in the table.
|
||||
|
||||
@@ -3788,13 +3732,6 @@ class AsyncTable:
|
||||
literals (e.g. "7" or "'foo'") or they can be expressions based on the
|
||||
previous value of the row (e.g. "x + 1" to increment the x column by 1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
UpdateResult
|
||||
An object containing:
|
||||
- rows_updated: The number of rows that were updated
|
||||
- version: The new version number of the table after the update
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import asyncio
|
||||
@@ -3823,7 +3760,7 @@ class AsyncTable:
|
||||
|
||||
async def add_columns(
|
||||
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
||||
) -> AddColumnsResult:
|
||||
):
|
||||
"""
|
||||
Add new columns with defined values.
|
||||
|
||||
@@ -3835,12 +3772,6 @@ class AsyncTable:
|
||||
each row in the table, and can reference existing columns.
|
||||
Alternatively, you can pass a pyarrow field or schema to add
|
||||
new columns with NULLs.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddColumnsResult
|
||||
version: the new version number of the table after adding columns.
|
||||
|
||||
"""
|
||||
if isinstance(transforms, pa.Field):
|
||||
transforms = [transforms]
|
||||
@@ -3849,13 +3780,11 @@ class AsyncTable:
|
||||
):
|
||||
transforms = pa.schema(transforms)
|
||||
if isinstance(transforms, pa.Schema):
|
||||
return await self._inner.add_columns_with_schema(transforms)
|
||||
await self._inner.add_columns_with_schema(transforms)
|
||||
else:
|
||||
return await self._inner.add_columns(list(transforms.items()))
|
||||
await self._inner.add_columns(list(transforms.items()))
|
||||
|
||||
async def alter_columns(
|
||||
self, *alterations: Iterable[dict[str, Any]]
|
||||
) -> AlterColumnsResult:
|
||||
async def alter_columns(self, *alterations: Iterable[dict[str, Any]]):
|
||||
"""
|
||||
Alter column names and nullability.
|
||||
|
||||
@@ -3875,13 +3804,8 @@ class AsyncTable:
|
||||
nullability is not changed. Only non-nullable columns can be changed
|
||||
to nullable. Currently, you cannot change a nullable column to
|
||||
non-nullable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AlterColumnsResult
|
||||
version: the new version number of the table after the alteration.
|
||||
"""
|
||||
return await self._inner.alter_columns(alterations)
|
||||
await self._inner.alter_columns(alterations)
|
||||
|
||||
async def drop_columns(self, columns: Iterable[str]):
|
||||
"""
|
||||
@@ -3892,7 +3816,7 @@ class AsyncTable:
|
||||
columns : Iterable[str]
|
||||
The names of the columns to drop.
|
||||
"""
|
||||
return await self._inner.drop_columns(columns)
|
||||
await self._inner.drop_columns(columns)
|
||||
|
||||
async def version(self) -> int:
|
||||
"""
|
||||
|
||||
@@ -18,19 +18,19 @@ def test_upsert(mem_db):
|
||||
{"id": 1, "name": "Bobby"},
|
||||
{"id": 2, "name": "Charlie"},
|
||||
]
|
||||
res = (
|
||||
stats = (
|
||||
table.merge_insert("id")
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.execute(new_users)
|
||||
)
|
||||
table.count_rows() # 3
|
||||
res # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
||||
# --8<-- [end:upsert_basic]
|
||||
assert table.count_rows() == 3
|
||||
assert res.num_inserted_rows == 1
|
||||
assert res.num_deleted_rows == 0
|
||||
assert res.num_updated_rows == 1
|
||||
assert stats["num_inserted_rows"] == 1
|
||||
assert stats["num_updated_rows"] == 1
|
||||
assert stats["num_deleted_rows"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -48,22 +48,19 @@ async def test_upsert_async(mem_db_async):
|
||||
{"id": 1, "name": "Bobby"},
|
||||
{"id": 2, "name": "Charlie"},
|
||||
]
|
||||
res = await (
|
||||
stats = await (
|
||||
table.merge_insert("id")
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.execute(new_users)
|
||||
)
|
||||
await table.count_rows() # 3
|
||||
res
|
||||
# MergeResult(version=2, num_updated_rows=1,
|
||||
# num_inserted_rows=1, num_deleted_rows=0)
|
||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
||||
# --8<-- [end:upsert_basic_async]
|
||||
assert await table.count_rows() == 3
|
||||
assert res.version == 2
|
||||
assert res.num_inserted_rows == 1
|
||||
assert res.num_deleted_rows == 0
|
||||
assert res.num_updated_rows == 1
|
||||
assert stats["num_inserted_rows"] == 1
|
||||
assert stats["num_updated_rows"] == 1
|
||||
assert stats["num_deleted_rows"] == 0
|
||||
|
||||
|
||||
def test_insert_if_not_exists(mem_db):
|
||||
@@ -80,19 +77,16 @@ def test_insert_if_not_exists(mem_db):
|
||||
{"domain": "google.com", "name": "Google"},
|
||||
{"domain": "facebook.com", "name": "Facebook"},
|
||||
]
|
||||
res = (
|
||||
stats = (
|
||||
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
||||
)
|
||||
table.count_rows() # 3
|
||||
res
|
||||
# MergeResult(version=2, num_updated_rows=0,
|
||||
# num_inserted_rows=1, num_deleted_rows=0)
|
||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
||||
# --8<-- [end:insert_if_not_exists]
|
||||
assert table.count_rows() == 3
|
||||
assert res.version == 2
|
||||
assert res.num_inserted_rows == 1
|
||||
assert res.num_deleted_rows == 0
|
||||
assert res.num_updated_rows == 0
|
||||
assert stats["num_inserted_rows"] == 1
|
||||
assert stats["num_updated_rows"] == 0
|
||||
assert stats["num_deleted_rows"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -110,19 +104,16 @@ async def test_insert_if_not_exists_async(mem_db_async):
|
||||
{"domain": "google.com", "name": "Google"},
|
||||
{"domain": "facebook.com", "name": "Facebook"},
|
||||
]
|
||||
res = await (
|
||||
stats = await (
|
||||
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
||||
)
|
||||
await table.count_rows() # 3
|
||||
res
|
||||
# MergeResult(version=2, num_updated_rows=0,
|
||||
# num_inserted_rows=1, num_deleted_rows=0)
|
||||
# --8<-- [end:insert_if_not_exists]
|
||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
||||
# --8<-- [end:insert_if_not_exists_async]
|
||||
assert await table.count_rows() == 3
|
||||
assert res.version == 2
|
||||
assert res.num_inserted_rows == 1
|
||||
assert res.num_deleted_rows == 0
|
||||
assert res.num_updated_rows == 0
|
||||
assert stats["num_inserted_rows"] == 1
|
||||
assert stats["num_updated_rows"] == 0
|
||||
assert stats["num_deleted_rows"] == 0
|
||||
|
||||
|
||||
def test_replace_range(mem_db):
|
||||
@@ -140,7 +131,7 @@ def test_replace_range(mem_db):
|
||||
new_chunks = [
|
||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||
]
|
||||
res = (
|
||||
stats = (
|
||||
table.merge_insert(["doc_id", "chunk_id"])
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
@@ -148,15 +139,12 @@ def test_replace_range(mem_db):
|
||||
.execute(new_chunks)
|
||||
)
|
||||
table.count_rows("doc_id = 1") # 1
|
||||
res
|
||||
# MergeResult(version=2, num_updated_rows=1,
|
||||
# num_inserted_rows=0, num_deleted_rows=1)
|
||||
# --8<-- [end:insert_if_not_exists]
|
||||
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
||||
# --8<-- [end:replace_range]
|
||||
assert table.count_rows("doc_id = 1") == 1
|
||||
assert res.version == 2
|
||||
assert res.num_inserted_rows == 0
|
||||
assert res.num_deleted_rows == 1
|
||||
assert res.num_updated_rows == 1
|
||||
assert stats["num_inserted_rows"] == 0
|
||||
assert stats["num_updated_rows"] == 1
|
||||
assert stats["num_deleted_rows"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -175,7 +163,7 @@ async def test_replace_range_async(mem_db_async):
|
||||
new_chunks = [
|
||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||
]
|
||||
res = await (
|
||||
stats = await (
|
||||
table.merge_insert(["doc_id", "chunk_id"])
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
@@ -183,12 +171,9 @@ async def test_replace_range_async(mem_db_async):
|
||||
.execute(new_chunks)
|
||||
)
|
||||
await table.count_rows("doc_id = 1") # 1
|
||||
res
|
||||
# MergeResult(version=2, num_updated_rows=1,
|
||||
# num_inserted_rows=0, num_deleted_rows=1)
|
||||
# --8<-- [end:insert_if_not_exists]
|
||||
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
||||
# --8<-- [end:replace_range_async]
|
||||
assert await table.count_rows("doc_id = 1") == 1
|
||||
assert res.version == 2
|
||||
assert res.num_inserted_rows == 0
|
||||
assert res.num_deleted_rows == 1
|
||||
assert res.num_updated_rows == 1
|
||||
assert stats["num_inserted_rows"] == 0
|
||||
assert stats["num_updated_rows"] == 1
|
||||
assert stats["num_deleted_rows"] == 1
|
||||
|
||||
@@ -106,22 +106,15 @@ async def test_update_async(mem_db_async: AsyncConnection):
|
||||
table = await mem_db_async.create_table("some_table", data=[{"id": 0}])
|
||||
assert await table.count_rows("id == 0") == 1
|
||||
assert await table.count_rows("id == 7") == 0
|
||||
update_res = await table.update({"id": 7})
|
||||
assert update_res.rows_updated == 1
|
||||
assert update_res.version == 2
|
||||
await table.update({"id": 7})
|
||||
assert await table.count_rows("id == 7") == 1
|
||||
assert await table.count_rows("id == 0") == 0
|
||||
add_res = await table.add([{"id": 2}])
|
||||
assert add_res.version == 3
|
||||
update_res = await table.update(where="id % 2 == 0", updates_sql={"id": "5"})
|
||||
assert update_res.rows_updated == 1
|
||||
assert update_res.version == 4
|
||||
await table.add([{"id": 2}])
|
||||
await table.update(where="id % 2 == 0", updates_sql={"id": "5"})
|
||||
assert await table.count_rows("id == 7") == 1
|
||||
assert await table.count_rows("id == 2") == 0
|
||||
assert await table.count_rows("id == 5") == 1
|
||||
update_res = await table.update({"id": 10}, where="id == 5")
|
||||
assert update_res.rows_updated == 1
|
||||
assert update_res.version == 5
|
||||
await table.update({"id": 10}, where="id == 5")
|
||||
assert await table.count_rows("id == 10") == 1
|
||||
|
||||
|
||||
@@ -444,8 +437,7 @@ def test_add_pydantic_model(mem_db: DBConnection):
|
||||
content="foo", meta=Metadata(source="bar", timestamp=datetime.now())
|
||||
),
|
||||
)
|
||||
add_res = tbl.add([expected])
|
||||
assert add_res.version == 2
|
||||
tbl.add([expected])
|
||||
|
||||
result = tbl.search([0.0, 0.0]).limit(1).to_pydantic(LanceSchema)[0]
|
||||
assert result == expected
|
||||
@@ -467,12 +459,11 @@ async def test_add_async(mem_db_async: AsyncConnection):
|
||||
],
|
||||
)
|
||||
assert await table.count_rows() == 2
|
||||
add_res = await table.add(
|
||||
await table.add(
|
||||
data=[
|
||||
{"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
|
||||
],
|
||||
)
|
||||
assert add_res.version == 2
|
||||
assert await table.count_rows() == 3
|
||||
|
||||
|
||||
@@ -804,8 +795,7 @@ def test_delete(mem_db: DBConnection):
|
||||
)
|
||||
assert len(table) == 2
|
||||
assert len(table.list_versions()) == 1
|
||||
delete_res = table.delete("id=0")
|
||||
assert delete_res.version == 2
|
||||
table.delete("id=0")
|
||||
assert len(table.list_versions()) == 2
|
||||
assert table.version == 2
|
||||
assert len(table) == 1
|
||||
@@ -819,9 +809,7 @@ def test_update(mem_db: DBConnection):
|
||||
)
|
||||
assert len(table) == 2
|
||||
assert len(table.list_versions()) == 1
|
||||
update_res = table.update(where="id=0", values={"vector": [1.1, 1.1]})
|
||||
assert update_res.version == 2
|
||||
assert update_res.rows_updated == 1
|
||||
table.update(where="id=0", values={"vector": [1.1, 1.1]})
|
||||
assert len(table.list_versions()) == 2
|
||||
assert table.version == 2
|
||||
assert len(table) == 2
|
||||
@@ -910,16 +898,9 @@ def test_merge_insert(mem_db: DBConnection):
|
||||
new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||
|
||||
# upsert
|
||||
merge_insert_res = (
|
||||
table.merge_insert("a")
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.execute(new_data)
|
||||
)
|
||||
assert merge_insert_res.version == 2
|
||||
assert merge_insert_res.num_inserted_rows == 1
|
||||
assert merge_insert_res.num_updated_rows == 2
|
||||
assert merge_insert_res.num_deleted_rows == 0
|
||||
table.merge_insert(
|
||||
"a"
|
||||
).when_matched_update_all().when_not_matched_insert_all().execute(new_data)
|
||||
|
||||
expected = pa.table({"a": [1, 2, 3, 4], "b": ["a", "x", "y", "z"]})
|
||||
assert table.to_arrow().sort_by("a") == expected
|
||||
@@ -927,28 +908,17 @@ def test_merge_insert(mem_db: DBConnection):
|
||||
table.restore(version)
|
||||
|
||||
# conditional update
|
||||
merge_insert_res = (
|
||||
table.merge_insert("a")
|
||||
.when_matched_update_all(where="target.b = 'b'")
|
||||
.execute(new_data)
|
||||
table.merge_insert("a").when_matched_update_all(where="target.b = 'b'").execute(
|
||||
new_data
|
||||
)
|
||||
assert merge_insert_res.version == 4
|
||||
assert merge_insert_res.num_inserted_rows == 0
|
||||
assert merge_insert_res.num_updated_rows == 1
|
||||
assert merge_insert_res.num_deleted_rows == 0
|
||||
expected = pa.table({"a": [1, 2, 3], "b": ["a", "x", "c"]})
|
||||
assert table.to_arrow().sort_by("a") == expected
|
||||
|
||||
table.restore(version)
|
||||
|
||||
# insert-if-not-exists
|
||||
merge_insert_res = (
|
||||
table.merge_insert("a").when_not_matched_insert_all().execute(new_data)
|
||||
)
|
||||
assert merge_insert_res.version == 6
|
||||
assert merge_insert_res.num_inserted_rows == 1
|
||||
assert merge_insert_res.num_updated_rows == 0
|
||||
assert merge_insert_res.num_deleted_rows == 0
|
||||
table.merge_insert("a").when_not_matched_insert_all().execute(new_data)
|
||||
|
||||
expected = pa.table({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "z"]})
|
||||
assert table.to_arrow().sort_by("a") == expected
|
||||
|
||||
@@ -957,17 +927,13 @@ def test_merge_insert(mem_db: DBConnection):
|
||||
new_data = pa.table({"a": [2, 4], "b": ["x", "z"]})
|
||||
|
||||
# replace-range
|
||||
merge_insert_res = (
|
||||
(
|
||||
table.merge_insert("a")
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.when_not_matched_by_source_delete("a > 2")
|
||||
.execute(new_data)
|
||||
)
|
||||
assert merge_insert_res.version == 8
|
||||
assert merge_insert_res.num_inserted_rows == 1
|
||||
assert merge_insert_res.num_updated_rows == 1
|
||||
assert merge_insert_res.num_deleted_rows == 1
|
||||
|
||||
expected = pa.table({"a": [1, 2, 4], "b": ["a", "x", "z"]})
|
||||
assert table.to_arrow().sort_by("a") == expected
|
||||
@@ -975,17 +941,11 @@ def test_merge_insert(mem_db: DBConnection):
|
||||
table.restore(version)
|
||||
|
||||
# replace-range no condition
|
||||
merge_insert_res = (
|
||||
table.merge_insert("a")
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.when_not_matched_by_source_delete()
|
||||
.execute(new_data)
|
||||
table.merge_insert(
|
||||
"a"
|
||||
).when_matched_update_all().when_not_matched_insert_all().when_not_matched_by_source_delete().execute(
|
||||
new_data
|
||||
)
|
||||
assert merge_insert_res.version == 10
|
||||
assert merge_insert_res.num_inserted_rows == 1
|
||||
assert merge_insert_res.num_updated_rows == 1
|
||||
assert merge_insert_res.num_deleted_rows == 2
|
||||
|
||||
expected = pa.table({"a": [2, 4], "b": ["x", "z"]})
|
||||
assert table.to_arrow().sort_by("a") == expected
|
||||
@@ -1518,13 +1478,11 @@ def test_restore_consistency(tmp_path):
|
||||
def test_add_columns(mem_db: DBConnection):
|
||||
data = pa.table({"id": [0, 1]})
|
||||
table = LanceTable.create(mem_db, "my_table", data=data)
|
||||
add_columns_res = table.add_columns({"new_col": "id + 2"})
|
||||
assert add_columns_res.version == 2
|
||||
table.add_columns({"new_col": "id + 2"})
|
||||
assert table.to_arrow().column_names == ["id", "new_col"]
|
||||
assert table.to_arrow()["new_col"].to_pylist() == [2, 3]
|
||||
|
||||
add_columns_res = table.add_columns({"null_int": "cast(null as bigint)"})
|
||||
assert add_columns_res.version == 3
|
||||
table.add_columns({"null_int": "cast(null as bigint)"})
|
||||
assert table.schema.field("null_int").type == pa.int64()
|
||||
|
||||
|
||||
@@ -1532,8 +1490,7 @@ def test_add_columns(mem_db: DBConnection):
|
||||
async def test_add_columns_async(mem_db_async: AsyncConnection):
|
||||
data = pa.table({"id": [0, 1]})
|
||||
table = await mem_db_async.create_table("my_table", data=data)
|
||||
add_columns_res = await table.add_columns({"new_col": "id + 2"})
|
||||
assert add_columns_res.version == 2
|
||||
await table.add_columns({"new_col": "id + 2"})
|
||||
data = await table.to_arrow()
|
||||
assert data.column_names == ["id", "new_col"]
|
||||
assert data["new_col"].to_pylist() == [2, 3]
|
||||
@@ -1543,10 +1500,9 @@ async def test_add_columns_async(mem_db_async: AsyncConnection):
|
||||
async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
|
||||
data = pa.table({"id": [0, 1]})
|
||||
table = await mem_db_async.create_table("my_table", data=data)
|
||||
add_columns_res = await table.add_columns(
|
||||
await table.add_columns(
|
||||
[pa.field("x", pa.int64()), pa.field("vector", pa.list_(pa.float32(), 8))]
|
||||
)
|
||||
assert add_columns_res.version == 2
|
||||
|
||||
assert await table.schema() == pa.schema(
|
||||
[
|
||||
@@ -1557,12 +1513,11 @@ async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
|
||||
)
|
||||
|
||||
table = await mem_db_async.create_table("table2", data=data)
|
||||
add_columns_res = await table.add_columns(
|
||||
await table.add_columns(
|
||||
pa.schema(
|
||||
[pa.field("y", pa.int64()), pa.field("emb", pa.list_(pa.float32(), 8))]
|
||||
)
|
||||
)
|
||||
assert add_columns_res.version == 2
|
||||
assert await table.schema() == pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
@@ -1575,8 +1530,7 @@ async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
|
||||
def test_alter_columns(mem_db: DBConnection):
|
||||
data = pa.table({"id": [0, 1]})
|
||||
table = mem_db.create_table("my_table", data=data)
|
||||
alter_columns_res = table.alter_columns({"path": "id", "rename": "new_id"})
|
||||
assert alter_columns_res.version == 2
|
||||
table.alter_columns({"path": "id", "rename": "new_id"})
|
||||
assert table.to_arrow().column_names == ["new_id"]
|
||||
|
||||
|
||||
@@ -1584,13 +1538,9 @@ def test_alter_columns(mem_db: DBConnection):
|
||||
async def test_alter_columns_async(mem_db_async: AsyncConnection):
|
||||
data = pa.table({"id": [0, 1]})
|
||||
table = await mem_db_async.create_table("my_table", data=data)
|
||||
alter_columns_res = await table.alter_columns({"path": "id", "rename": "new_id"})
|
||||
assert alter_columns_res.version == 2
|
||||
await table.alter_columns({"path": "id", "rename": "new_id"})
|
||||
assert (await table.to_arrow()).column_names == ["new_id"]
|
||||
alter_columns_res = await table.alter_columns(
|
||||
dict(path="new_id", data_type=pa.int16(), nullable=True)
|
||||
)
|
||||
assert alter_columns_res.version == 3
|
||||
await table.alter_columns(dict(path="new_id", data_type=pa.int16(), nullable=True))
|
||||
data = await table.to_arrow()
|
||||
assert data.column(0).type == pa.int16()
|
||||
assert data.schema.field(0).nullable
|
||||
@@ -1599,8 +1549,7 @@ async def test_alter_columns_async(mem_db_async: AsyncConnection):
|
||||
def test_drop_columns(mem_db: DBConnection):
|
||||
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
||||
table = mem_db.create_table("my_table", data=data)
|
||||
drop_columns_res = table.drop_columns(["category"])
|
||||
assert drop_columns_res.version == 2
|
||||
table.drop_columns(["category"])
|
||||
assert table.to_arrow().column_names == ["id"]
|
||||
|
||||
|
||||
@@ -1608,8 +1557,7 @@ def test_drop_columns(mem_db: DBConnection):
|
||||
async def test_drop_columns_async(mem_db_async: AsyncConnection):
|
||||
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
||||
table = await mem_db_async.create_table("my_table", data=data)
|
||||
drop_columns_res = await table.drop_columns(["category"])
|
||||
assert drop_columns_res.version == 2
|
||||
await table.drop_columns(["category"])
|
||||
assert (await table.to_arrow()).column_names == ["id"]
|
||||
|
||||
|
||||
|
||||
@@ -11,10 +11,7 @@ use pyo3::{
|
||||
wrap_pyfunction, Bound, PyResult, Python,
|
||||
};
|
||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||
use table::{
|
||||
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
||||
Table, UpdateResult,
|
||||
};
|
||||
use table::Table;
|
||||
|
||||
pub mod arrow;
|
||||
pub mod connection;
|
||||
@@ -38,13 +35,6 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<HybridQuery>()?;
|
||||
m.add_class::<VectorQuery>()?;
|
||||
m.add_class::<RecordBatchStream>()?;
|
||||
m.add_class::<AddColumnsResult>()?;
|
||||
m.add_class::<AlterColumnsResult>()?;
|
||||
m.add_class::<AddResult>()?;
|
||||
m.add_class::<MergeResult>()?;
|
||||
m.add_class::<DeleteResult>()?;
|
||||
m.add_class::<DropColumnsResult>()?;
|
||||
m.add_class::<UpdateResult>()?;
|
||||
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
||||
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||
|
||||
@@ -58,170 +58,6 @@ pub struct OptimizeStats {
|
||||
pub prune: RemovalStats,
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateResult {
|
||||
pub rows_updated: u64,
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl UpdateResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!(
|
||||
"UpdateResult(rows_updated={}, version={})",
|
||||
self.rows_updated, self.version
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::UpdateResult> for UpdateResult {
|
||||
fn from(result: lancedb::table::UpdateResult) -> Self {
|
||||
Self {
|
||||
rows_updated: result.rows_updated,
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AddResult {
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl AddResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("AddResult(version={})", self.version)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AddResult> for AddResult {
|
||||
fn from(result: lancedb::table::AddResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DeleteResult {
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl DeleteResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("DeleteResult(version={})", self.version)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
fn from(result: lancedb::table::DeleteResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MergeResult {
|
||||
pub version: u64,
|
||||
pub num_updated_rows: u64,
|
||||
pub num_inserted_rows: u64,
|
||||
pub num_deleted_rows: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl MergeResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!(
|
||||
"MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={})",
|
||||
self.version,
|
||||
self.num_updated_rows,
|
||||
self.num_inserted_rows,
|
||||
self.num_deleted_rows
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
fn from(result: lancedb::table::MergeResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
num_updated_rows: result.num_updated_rows,
|
||||
num_inserted_rows: result.num_inserted_rows,
|
||||
num_deleted_rows: result.num_deleted_rows,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AddColumnsResult {
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl AddColumnsResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("AddColumnsResult(version={})", self.version)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AddColumnsResult> for AddColumnsResult {
|
||||
fn from(result: lancedb::table::AddColumnsResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AlterColumnsResult {
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl AlterColumnsResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("AlterColumnsResult(version={})", self.version)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
|
||||
fn from(result: lancedb::table::AlterColumnsResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DropColumnsResult {
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl DropColumnsResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("DropColumnsResult(version={})", self.version)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DropColumnsResult> for DropColumnsResult {
|
||||
fn from(result: lancedb::table::DropColumnsResult) -> Self {
|
||||
Self {
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub struct Table {
|
||||
// We keep a copy of the name to use if the inner table is dropped
|
||||
@@ -296,16 +132,15 @@ impl Table {
|
||||
}
|
||||
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = op.execute().await.infer_error()?;
|
||||
Ok(AddResult::from(result))
|
||||
op.execute().await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = inner.delete(&condition).await.infer_error()?;
|
||||
Ok(DeleteResult::from(result))
|
||||
inner.delete(&condition).await.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -325,8 +160,8 @@ impl Table {
|
||||
op = op.column(column_name, value);
|
||||
}
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = op.execute().await.infer_error()?;
|
||||
Ok(UpdateResult::from(result))
|
||||
op.execute().await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
@@ -654,8 +489,14 @@ impl Table {
|
||||
}
|
||||
|
||||
future_into_py(self_.py(), async move {
|
||||
let res = builder.execute(Box::new(batches)).await.infer_error()?;
|
||||
Ok(MergeResult::from(res))
|
||||
let stats = builder.execute(Box::new(batches)).await.infer_error()?;
|
||||
Python::with_gil(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_inserted_rows", stats.num_inserted_rows)?;
|
||||
dict.set_item("num_updated_rows", stats.num_updated_rows)?;
|
||||
dict.set_item("num_deleted_rows", stats.num_deleted_rows)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -691,8 +532,8 @@ impl Table {
|
||||
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = inner.add_columns(definitions, None).await.infer_error()?;
|
||||
Ok(AddColumnsResult::from(result))
|
||||
inner.add_columns(definitions, None).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
@@ -705,8 +546,8 @@ impl Table {
|
||||
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = inner.add_columns(transform, None).await.infer_error()?;
|
||||
Ok(AddColumnsResult::from(result))
|
||||
inner.add_columns(transform, None).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
@@ -749,8 +590,8 @@ impl Table {
|
||||
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = inner.alter_columns(&alterations).await.infer_error()?;
|
||||
Ok(AlterColumnsResult::from(result))
|
||||
inner.alter_columns(&alterations).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
@@ -758,8 +599,8 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let column_refs = columns.iter().map(String::as_str).collect::<Vec<&str>>();
|
||||
let result = inner.drop_columns(&column_refs).await.infer_error()?;
|
||||
Ok(DropColumnsResult::from(result))
|
||||
inner.drop_columns(&column_refs).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -4,14 +4,7 @@
|
||||
use crate::index::Index;
|
||||
use crate::index::IndexStatistics;
|
||||
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
||||
use crate::table::AddColumnsResult;
|
||||
use crate::table::AddResult;
|
||||
use crate::table::AlterColumnsResult;
|
||||
use crate::table::DeleteResult;
|
||||
use crate::table::DropColumnsResult;
|
||||
use crate::table::MergeResult;
|
||||
use crate::table::Tags;
|
||||
use crate::table::UpdateResult;
|
||||
use crate::table::{AddDataMode, AnyQuery, Filter, TableStatistics};
|
||||
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
||||
use crate::{DistanceType, Error, Table};
|
||||
@@ -54,6 +47,7 @@ use crate::{
|
||||
TableDefinition, UpdateBuilder,
|
||||
},
|
||||
};
|
||||
use lance::dataset::MergeStats;
|
||||
|
||||
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
|
||||
|
||||
@@ -741,7 +735,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
&self,
|
||||
add: AddDataBuilder<NoData>,
|
||||
data: Box<dyn RecordBatchReader + Send>,
|
||||
) -> Result<AddResult> {
|
||||
) -> Result<()> {
|
||||
self.check_mutable().await?;
|
||||
let mut request = self
|
||||
.client
|
||||
@@ -756,20 +750,9 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
}
|
||||
|
||||
let (request_id, response) = self.send_streaming(request, data, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(AddResult { version: 0 });
|
||||
}
|
||||
|
||||
let add_response: AddResult = serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse add response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
Ok(add_response)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_plan(
|
||||
@@ -902,7 +885,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
Ok(final_analyze)
|
||||
}
|
||||
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||
self.check_mutable().await?;
|
||||
let request = self
|
||||
.client
|
||||
@@ -919,28 +902,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
}));
|
||||
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(UpdateResult {
|
||||
rows_updated: 0,
|
||||
version: 0,
|
||||
});
|
||||
}
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
|
||||
let update_response: UpdateResult =
|
||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse update response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
Ok(update_response)
|
||||
Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
|
||||
}
|
||||
|
||||
async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
|
||||
async fn delete(&self, predicate: &str) -> Result<()> {
|
||||
self.check_mutable().await?;
|
||||
let body = serde_json::json!({ "predicate": predicate });
|
||||
let request = self
|
||||
@@ -948,21 +916,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
.post(&format!("/v1/table/{}/delete/", self.name))
|
||||
.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(DeleteResult { version: 0 });
|
||||
}
|
||||
|
||||
let delete_response: DeleteResult =
|
||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse delete response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
Ok(delete_response)
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index(&self, mut index: IndexBuilder) -> Result<()> {
|
||||
@@ -1068,7 +1023,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
&self,
|
||||
params: MergeInsertBuilder,
|
||||
new_data: Box<dyn RecordBatchReader + Send>,
|
||||
) -> Result<MergeResult> {
|
||||
) -> Result<MergeStats> {
|
||||
self.check_mutable().await?;
|
||||
|
||||
let query = MergeInsertRequest::try_from(params)?;
|
||||
@@ -1080,27 +1035,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
|
||||
let (request_id, response) = self.send_streaming(request, new_data, true).await?;
|
||||
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(MergeResult {
|
||||
version: 0,
|
||||
num_deleted_rows: 0,
|
||||
num_inserted_rows: 0,
|
||||
num_updated_rows: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let merge_insert_response: MergeResult =
|
||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse merge_insert response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
Ok(merge_insert_response)
|
||||
// TODO: server can response with these stats in response body.
|
||||
// We should test that we can handle both empty response from old server
|
||||
// and response with stats from new server.
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(MergeStats::default())
|
||||
}
|
||||
|
||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
||||
@@ -1123,7 +1062,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
&self,
|
||||
transforms: NewColumnTransform,
|
||||
_read_columns: Option<Vec<String>>,
|
||||
) -> Result<AddColumnsResult> {
|
||||
) -> Result<()> {
|
||||
self.check_mutable().await?;
|
||||
match transforms {
|
||||
NewColumnTransform::SqlExpressions(expressions) => {
|
||||
@@ -1141,23 +1080,9 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/add_columns/", self.name))
|
||||
.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(AddColumnsResult { version: 0 });
|
||||
}
|
||||
|
||||
let result: AddColumnsResult =
|
||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse add_columns response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
Ok(result)
|
||||
let (request_id, response) = self.send(request, true).await?; // todo:
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
_ => {
|
||||
return Err(Error::NotSupported {
|
||||
@@ -1167,7 +1092,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
|
||||
self.check_mutable().await?;
|
||||
let body = alterations
|
||||
.iter()
|
||||
@@ -1195,24 +1120,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
.post(&format!("/v1/table/{}/alter_columns/", self.name))
|
||||
.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(AlterColumnsResult { version: 0 });
|
||||
}
|
||||
|
||||
let result: AlterColumnsResult = serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse alter_columns response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
Ok(result)
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
|
||||
self.check_mutable().await?;
|
||||
let body = serde_json::json!({ "columns": columns });
|
||||
let request = self
|
||||
@@ -1220,21 +1132,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
.post(&format!("/v1/table/{}/drop_columns/", self.name))
|
||||
.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
let response = self.check_table_response(&request_id, response).await?;
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
|
||||
if body.trim().is_empty() || body == "{}" {
|
||||
// Backward compatible with old servers
|
||||
return Ok(DropColumnsResult { version: 0 });
|
||||
}
|
||||
|
||||
let result: DropColumnsResult = serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
source: format!("Failed to parse drop_columns response: {}", e).into(),
|
||||
request_id,
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
Ok(result)
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
||||
@@ -1458,20 +1357,16 @@ mod tests {
|
||||
.execute(example_data())
|
||||
.map_ok(|_| ()),
|
||||
),
|
||||
Box::pin(table.delete("false").map_ok(|_| ())),
|
||||
Box::pin(
|
||||
table
|
||||
.add_columns(
|
||||
NewColumnTransform::SqlExpressions(vec![("x".into(), "y".into())]),
|
||||
None,
|
||||
)
|
||||
.map_ok(|_| ()),
|
||||
),
|
||||
Box::pin(table.delete("false")),
|
||||
Box::pin(table.add_columns(
|
||||
NewColumnTransform::SqlExpressions(vec![("x".into(), "y".into())]),
|
||||
None,
|
||||
)),
|
||||
Box::pin(async {
|
||||
let alterations = vec![ColumnAlteration::new("x".into()).rename("y".into())];
|
||||
table.alter_columns(&alterations).await.map(|_| ())
|
||||
table.alter_columns(&alterations).await
|
||||
}),
|
||||
Box::pin(table.drop_columns(&["a"]).map_ok(|_| ())),
|
||||
Box::pin(table.drop_columns(&["a"])),
|
||||
// TODO: other endpoints.
|
||||
];
|
||||
|
||||
@@ -1602,11 +1497,8 @@ mod tests {
|
||||
body
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_add_append(#[case] old_server: bool) {
|
||||
async fn test_add_append() {
|
||||
let data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
@@ -1615,55 +1507,42 @@ mod tests {
|
||||
|
||||
let (sender, receiver) = std::sync::mpsc::channel();
|
||||
let table = Table::new_with_handler("my_table", move |mut request| {
|
||||
if request.url().path() == "/v1/table/my_table/insert/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert!(request
|
||||
.url()
|
||||
.query_pairs()
|
||||
.filter(|(k, _)| k == "mode")
|
||||
.all(|(_, v)| v == "append"));
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/insert/");
|
||||
// If mode is specified, it should be "append". Append is default
|
||||
// so it's not required.
|
||||
assert!(request
|
||||
.url()
|
||||
.query_pairs()
|
||||
.filter(|(k, _)| k == "mode")
|
||||
.all(|(_, v)| v == "append"));
|
||||
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
);
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let mut body_out = reqwest::Body::from(Vec::new());
|
||||
std::mem::swap(request.body_mut().as_mut().unwrap(), &mut body_out);
|
||||
sender.send(body_out).unwrap();
|
||||
let mut body_out = reqwest::Body::from(Vec::new());
|
||||
std::mem::swap(request.body_mut().as_mut().unwrap(), &mut body_out);
|
||||
sender.send(body_out).unwrap();
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
|
||||
let result = table
|
||||
table
|
||||
.add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
|
||||
let body = receiver.recv().unwrap();
|
||||
let body = collect_body(body).await;
|
||||
let expected_body = write_ipc_stream(&data);
|
||||
assert_eq!(&body, &expected_body);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_add_overwrite(#[case] old_server: bool) {
|
||||
async fn test_add_overwrite() {
|
||||
let data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
@@ -1694,78 +1573,56 @@ mod tests {
|
||||
std::mem::swap(request.body_mut().as_mut().unwrap(), &mut body_out);
|
||||
sender.send(body_out).unwrap();
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
|
||||
let result = table
|
||||
table
|
||||
.add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
|
||||
.mode(AddDataMode::Overwrite)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
|
||||
let body = receiver.recv().unwrap();
|
||||
let body = collect_body(body).await;
|
||||
let expected_body = write_ipc_stream(&data);
|
||||
assert_eq!(&body, &expected_body);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_update(#[case] old_server: bool) {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/update/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
async fn test_update() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/update/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
if let Some(body) = request.body().unwrap().as_bytes() {
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let updates = value.get("updates").unwrap().as_array().unwrap();
|
||||
assert!(updates.len() == 2);
|
||||
if let Some(body) = request.body().unwrap().as_bytes() {
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let updates = value.get("updates").unwrap().as_array().unwrap();
|
||||
assert!(updates.len() == 2);
|
||||
|
||||
let col_name = updates[0][0].as_str().unwrap();
|
||||
let expression = updates[0][1].as_str().unwrap();
|
||||
assert_eq!(col_name, "a");
|
||||
assert_eq!(expression, "a + 1");
|
||||
let col_name = updates[0][0].as_str().unwrap();
|
||||
let expression = updates[0][1].as_str().unwrap();
|
||||
assert_eq!(col_name, "a");
|
||||
assert_eq!(expression, "a + 1");
|
||||
|
||||
let col_name = updates[1][0].as_str().unwrap();
|
||||
let expression = updates[1][1].as_str().unwrap();
|
||||
assert_eq!(col_name, "b");
|
||||
assert_eq!(expression, "b - 1");
|
||||
let col_name = updates[1][0].as_str().unwrap();
|
||||
let expression = updates[1][1].as_str().unwrap();
|
||||
assert_eq!(col_name, "b");
|
||||
assert_eq!(expression, "b - 1");
|
||||
|
||||
let only_if = value.get("predicate").unwrap().as_str().unwrap();
|
||||
assert_eq!(only_if, "b > 10");
|
||||
}
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"rows_updated": 5, "version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
let only_if = value.get("predicate").unwrap().as_str().unwrap();
|
||||
assert_eq!(only_if, "b > 10");
|
||||
}
|
||||
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
|
||||
let result = table
|
||||
table
|
||||
.update()
|
||||
.column("a", "a + 1")
|
||||
.column("b", "b - 1")
|
||||
@@ -1773,73 +1630,10 @@ mod tests {
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
assert_eq!(result.rows_updated, if old_server { 0 } else { 5 });
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_alter_columns(#[case] old_server: bool) {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/alter_columns/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let alterations = value.get("alterations").unwrap().as_array().unwrap();
|
||||
assert!(alterations.len() == 2);
|
||||
|
||||
let path = alterations[0]["path"].as_str().unwrap();
|
||||
let data_type = alterations[0]["data_type"]["type"].as_str().unwrap();
|
||||
assert_eq!(path, "b.c");
|
||||
assert_eq!(data_type, "int32");
|
||||
|
||||
let path = alterations[1]["path"].as_str().unwrap();
|
||||
let nullable = alterations[1]["nullable"].as_bool().unwrap();
|
||||
let rename = alterations[1]["rename"].as_str().unwrap();
|
||||
assert_eq!(path, "x");
|
||||
assert!(nullable);
|
||||
assert_eq!(rename, "y");
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
});
|
||||
|
||||
let result = table
|
||||
.alter_columns(&[
|
||||
ColumnAlteration::new("b.c".into()).cast_to(DataType::Int32),
|
||||
ColumnAlteration::new("x".into())
|
||||
.rename("y".into())
|
||||
.set_nullable(true),
|
||||
])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert(#[case] old_server: bool) {
|
||||
async fn test_merge_insert() {
|
||||
let batch = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
@@ -1850,43 +1644,66 @@ mod tests {
|
||||
batch.schema(),
|
||||
));
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/merge_insert/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
// Default parameters
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/merge_insert/");
|
||||
|
||||
let params = request.url().query_pairs().collect::<HashMap<_, _>>();
|
||||
assert_eq!(params["on"], "some_col");
|
||||
assert_eq!(params["when_matched_update_all"], "false");
|
||||
assert_eq!(params["when_not_matched_insert_all"], "false");
|
||||
assert_eq!(params["when_not_matched_by_source_delete"], "false");
|
||||
assert!(!params.contains_key("when_matched_update_all_filt"));
|
||||
assert!(!params.contains_key("when_not_matched_by_source_delete_filt"));
|
||||
let params = request.url().query_pairs().collect::<HashMap<_, _>>();
|
||||
assert_eq!(params["on"], "some_col");
|
||||
assert_eq!(params["when_matched_update_all"], "false");
|
||||
assert_eq!(params["when_not_matched_insert_all"], "false");
|
||||
assert_eq!(params["when_not_matched_by_source_delete"], "false");
|
||||
assert!(!params.contains_key("when_matched_update_all_filt"));
|
||||
assert!(!params.contains_key("when_not_matched_by_source_delete_filt"));
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43, "num_deleted_rows": 0, "num_inserted_rows": 3, "num_updated_rows": 0}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
|
||||
let result = table
|
||||
table
|
||||
.merge_insert(&["some_col"])
|
||||
.execute(data)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
if !old_server {
|
||||
assert_eq!(result.num_deleted_rows, 0);
|
||||
assert_eq!(result.num_inserted_rows, 3);
|
||||
assert_eq!(result.num_updated_rows, 0);
|
||||
}
|
||||
// All parameters specified
|
||||
let (sender, receiver) = std::sync::mpsc::channel();
|
||||
let table = Table::new_with_handler("my_table", move |mut request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/merge_insert/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let params = request.url().query_pairs().collect::<HashMap<_, _>>();
|
||||
assert_eq!(params["on"], "some_col");
|
||||
assert_eq!(params["when_matched_update_all"], "true");
|
||||
assert_eq!(params["when_not_matched_insert_all"], "false");
|
||||
assert_eq!(params["when_not_matched_by_source_delete"], "true");
|
||||
assert_eq!(params["when_matched_update_all_filt"], "a = 1");
|
||||
assert_eq!(params["when_not_matched_by_source_delete_filt"], "b = 2");
|
||||
|
||||
let mut body_out = reqwest::Body::from(Vec::new());
|
||||
std::mem::swap(request.body_mut().as_mut().unwrap(), &mut body_out);
|
||||
sender.send(body_out).unwrap();
|
||||
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
let mut builder = table.merge_insert(&["some_col"]);
|
||||
builder
|
||||
.when_matched_update_all(Some("a = 1".into()))
|
||||
.when_not_matched_by_source_delete(Some("b = 2".into()));
|
||||
let data = Box::new(RecordBatchIterator::new(
|
||||
[Ok(batch.clone())],
|
||||
batch.schema(),
|
||||
));
|
||||
builder.execute(data).await.unwrap();
|
||||
|
||||
let body = receiver.recv().unwrap();
|
||||
let body = collect_body(body).await;
|
||||
let expected_body = write_ipc_stream(&batch);
|
||||
assert_eq!(&body, &expected_body);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1925,80 +1742,25 @@ mod tests {
|
||||
assert!(e.to_string().contains("Hit retry limit"));
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_delete(#[case] old_server: bool) {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/delete/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
async fn test_delete() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/delete/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let predicate = body.get("predicate").unwrap().as_str().unwrap();
|
||||
assert_eq!(predicate, "id in (1, 2, 3)");
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let predicate = body.get("predicate").unwrap().as_str().unwrap();
|
||||
assert_eq!(predicate, "id in (1, 2, 3)");
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
|
||||
let result = table.delete("id in (1, 2, 3)").await.unwrap();
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_drop_columns(#[case] old_server: bool) {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/drop_columns/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let columns = value.get("columns").unwrap().as_array().unwrap();
|
||||
assert!(columns.len() == 2);
|
||||
|
||||
let col1 = columns[0].as_str().unwrap();
|
||||
let col2 = columns[1].as_str().unwrap();
|
||||
assert_eq!(col1, "a");
|
||||
assert_eq!(col2, "b");
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
});
|
||||
|
||||
let result = table.drop_columns(&["a", "b"]).await.unwrap();
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
table.delete("id in (1, 2, 3)").await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -2815,49 +2577,36 @@ mod tests {
|
||||
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
#[tokio::test]
|
||||
async fn test_add_columns(#[case] old_server: bool) {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/add_columns/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
async fn test_add_columns() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/add_columns/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let new_columns = value.get("new_columns").unwrap().as_array().unwrap();
|
||||
assert!(new_columns.len() == 2);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let new_columns = value.get("new_columns").unwrap().as_array().unwrap();
|
||||
assert!(new_columns.len() == 2);
|
||||
|
||||
let col_name = new_columns[0]["name"].as_str().unwrap();
|
||||
let expression = new_columns[0]["expression"].as_str().unwrap();
|
||||
assert_eq!(col_name, "b");
|
||||
assert_eq!(expression, "a + 1");
|
||||
let col_name = new_columns[0]["name"].as_str().unwrap();
|
||||
let expression = new_columns[0]["expression"].as_str().unwrap();
|
||||
assert_eq!(col_name, "b");
|
||||
assert_eq!(expression, "a + 1");
|
||||
|
||||
let col_name = new_columns[1]["name"].as_str().unwrap();
|
||||
let expression = new_columns[1]["expression"].as_str().unwrap();
|
||||
assert_eq!(col_name, "x");
|
||||
assert_eq!(expression, "cast(NULL as int32)");
|
||||
let col_name = new_columns[1]["name"].as_str().unwrap();
|
||||
let expression = new_columns[1]["expression"].as_str().unwrap();
|
||||
assert_eq!(col_name, "x");
|
||||
assert_eq!(expression, "cast(NULL as int32)");
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
} else {
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 43}"#)
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
|
||||
let result = table
|
||||
table
|
||||
.add_columns(
|
||||
NewColumnTransform::SqlExpressions(vec![
|
||||
("b".into(), "a + 1".into()),
|
||||
@@ -2867,8 +2616,75 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
#[tokio::test]
|
||||
async fn test_alter_columns() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/alter_columns/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let alterations = value.get("alterations").unwrap().as_array().unwrap();
|
||||
assert!(alterations.len() == 2);
|
||||
|
||||
let path = alterations[0]["path"].as_str().unwrap();
|
||||
let data_type = alterations[0]["data_type"]["type"].as_str().unwrap();
|
||||
assert_eq!(path, "b.c");
|
||||
assert_eq!(data_type, "int32");
|
||||
|
||||
let path = alterations[1]["path"].as_str().unwrap();
|
||||
let nullable = alterations[1]["nullable"].as_bool().unwrap();
|
||||
let rename = alterations[1]["rename"].as_str().unwrap();
|
||||
assert_eq!(path, "x");
|
||||
assert!(nullable);
|
||||
assert_eq!(rename, "y");
|
||||
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
|
||||
table
|
||||
.alter_columns(&[
|
||||
ColumnAlteration::new("b.c".into()).cast_to(DataType::Int32),
|
||||
ColumnAlteration::new("x".into())
|
||||
.rename("y".into())
|
||||
.set_nullable(true),
|
||||
])
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_drop_columns() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/drop_columns/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body = std::str::from_utf8(body).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||
let columns = value.get("columns").unwrap().as_array().unwrap();
|
||||
assert!(columns.len() == 2);
|
||||
|
||||
let col1 = columns[0].as_str().unwrap();
|
||||
let col2 = columns[1].as_str().unwrap();
|
||||
assert_eq!(col1, "a");
|
||||
assert_eq!(col2, "b");
|
||||
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
|
||||
table.drop_columns(&["a", "b"]).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -20,6 +20,7 @@ use lance::dataset::cleanup::RemovalStats;
|
||||
use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions};
|
||||
use lance::dataset::scanner::Scanner;
|
||||
pub use lance::dataset::ColumnAlteration;
|
||||
pub use lance::dataset::MergeStats;
|
||||
pub use lance::dataset::NewColumnTransform;
|
||||
pub use lance::dataset::ReadParams;
|
||||
pub use lance::dataset::Version;
|
||||
@@ -311,7 +312,7 @@ impl<T: IntoArrow> AddDataBuilder<T> {
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn execute(self) -> Result<AddResult> {
|
||||
pub async fn execute(self) -> Result<()> {
|
||||
let parent = self.parent.clone();
|
||||
let data = self.data.into_arrow()?;
|
||||
let without_data = AddDataBuilder::<NoData> {
|
||||
@@ -379,8 +380,8 @@ impl UpdateBuilder {
|
||||
}
|
||||
|
||||
/// Executes the update operation.
|
||||
/// Returns the update result
|
||||
pub async fn execute(self) -> Result<UpdateResult> {
|
||||
/// Returns the number of rows that were updated.
|
||||
pub async fn execute(self) -> Result<u64> {
|
||||
if self.columns.is_empty() {
|
||||
Err(Error::InvalidInput {
|
||||
message: "at least one column must be specified in an update operation".to_string(),
|
||||
@@ -423,71 +424,6 @@ pub trait Tags: Send + Sync {
|
||||
async fn update(&mut self, tag: &str, version: u64) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct UpdateResult {
|
||||
pub rows_updated: u64,
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct AddResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DeleteResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct MergeResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
/// Number of inserted rows (for user statistics)
|
||||
pub num_inserted_rows: u64,
|
||||
/// Number of updated rows (for user statistics)
|
||||
pub num_updated_rows: u64,
|
||||
/// Number of deleted rows (for user statistics)
|
||||
/// Note: This is different from internal references to 'deleted_rows', since we technically "delete" updated rows during processing.
|
||||
/// However those rows are not shared with the user.
|
||||
pub num_deleted_rows: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct AddColumnsResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct AlterColumnsResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DropColumnsResult {
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
/// A trait for anything "table-like". This is used for both native tables (which target
|
||||
/// Lance datasets) and remote tables (which target LanceDB cloud)
|
||||
///
|
||||
@@ -532,11 +468,11 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
&self,
|
||||
add: AddDataBuilder<NoData>,
|
||||
data: Box<dyn arrow_array::RecordBatchReader + Send>,
|
||||
) -> Result<AddResult>;
|
||||
) -> Result<()>;
|
||||
/// Delete rows from the table.
|
||||
async fn delete(&self, predicate: &str) -> Result<DeleteResult>;
|
||||
async fn delete(&self, predicate: &str) -> Result<()>;
|
||||
/// Update rows in the table.
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult>;
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<u64>;
|
||||
/// Create an index on the provided column(s).
|
||||
async fn create_index(&self, index: IndexBuilder) -> Result<()>;
|
||||
/// List the indices on the table.
|
||||
@@ -552,7 +488,7 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
&self,
|
||||
params: MergeInsertBuilder,
|
||||
new_data: Box<dyn RecordBatchReader + Send>,
|
||||
) -> Result<MergeResult>;
|
||||
) -> Result<MergeStats>;
|
||||
/// Gets the table tag manager.
|
||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>>;
|
||||
/// Optimize the dataset.
|
||||
@@ -562,11 +498,11 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
&self,
|
||||
transforms: NewColumnTransform,
|
||||
read_columns: Option<Vec<String>>,
|
||||
) -> Result<AddColumnsResult>;
|
||||
) -> Result<()>;
|
||||
/// Alter columns in the table.
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult>;
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()>;
|
||||
/// Drop columns from the table.
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult>;
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<()>;
|
||||
/// Get the version of the table.
|
||||
async fn version(&self) -> Result<u64>;
|
||||
/// Checkout a specific version of the table.
|
||||
@@ -795,7 +731,7 @@ impl Table {
|
||||
/// tbl.delete("id > 5").await.unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
pub async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
|
||||
pub async fn delete(&self, predicate: &str) -> Result<()> {
|
||||
self.inner.delete(predicate).await
|
||||
}
|
||||
|
||||
@@ -1110,20 +1046,17 @@ impl Table {
|
||||
&self,
|
||||
transforms: NewColumnTransform,
|
||||
read_columns: Option<Vec<String>>,
|
||||
) -> Result<AddColumnsResult> {
|
||||
) -> Result<()> {
|
||||
self.inner.add_columns(transforms, read_columns).await
|
||||
}
|
||||
|
||||
/// Change a column's name or nullability.
|
||||
pub async fn alter_columns(
|
||||
&self,
|
||||
alterations: &[ColumnAlteration],
|
||||
) -> Result<AlterColumnsResult> {
|
||||
pub async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
|
||||
self.inner.alter_columns(alterations).await
|
||||
}
|
||||
|
||||
/// Remove columns from the table.
|
||||
pub async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
|
||||
pub async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
|
||||
self.inner.drop_columns(columns).await
|
||||
}
|
||||
|
||||
@@ -2156,7 +2089,7 @@ impl BaseTable for NativeTable {
|
||||
&self,
|
||||
add: AddDataBuilder<NoData>,
|
||||
data: Box<dyn RecordBatchReader + Send>,
|
||||
) -> Result<AddResult> {
|
||||
) -> Result<()> {
|
||||
let data = Box::new(MaybeEmbedded::try_new(
|
||||
data,
|
||||
self.table_definition().await?,
|
||||
@@ -2179,9 +2112,9 @@ impl BaseTable for NativeTable {
|
||||
.execute_stream(data)
|
||||
.await?
|
||||
};
|
||||
let version = dataset.manifest().version;
|
||||
|
||||
self.dataset.set_latest(dataset).await;
|
||||
Ok(AddResult { version })
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index(&self, opts: IndexBuilder) -> Result<()> {
|
||||
@@ -2227,7 +2160,7 @@ impl BaseTable for NativeTable {
|
||||
Ok(dataset.prewarm_index(index_name).await?)
|
||||
}
|
||||
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||
let dataset = self.dataset.get().await?.clone();
|
||||
let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
|
||||
if let Some(predicate) = update.filter {
|
||||
@@ -2243,10 +2176,7 @@ impl BaseTable for NativeTable {
|
||||
self.dataset
|
||||
.set_latest(res.new_dataset.as_ref().clone())
|
||||
.await;
|
||||
Ok(UpdateResult {
|
||||
rows_updated: res.rows_updated,
|
||||
version: res.new_dataset.version().version,
|
||||
})
|
||||
Ok(res.rows_updated)
|
||||
}
|
||||
|
||||
async fn create_plan(
|
||||
@@ -2438,7 +2368,7 @@ impl BaseTable for NativeTable {
|
||||
&self,
|
||||
params: MergeInsertBuilder,
|
||||
new_data: Box<dyn RecordBatchReader + Send>,
|
||||
) -> Result<MergeResult> {
|
||||
) -> Result<MergeStats> {
|
||||
let dataset = Arc::new(self.dataset.get().await?.clone());
|
||||
let mut builder = LanceMergeInsertBuilder::try_new(dataset.clone(), params.on)?;
|
||||
match (
|
||||
@@ -2466,23 +2396,14 @@ impl BaseTable for NativeTable {
|
||||
}
|
||||
let job = builder.try_build()?;
|
||||
let (new_dataset, stats) = job.execute_reader(new_data).await?;
|
||||
let version = new_dataset.manifest().version;
|
||||
self.dataset.set_latest(new_dataset.as_ref().clone()).await;
|
||||
Ok(MergeResult {
|
||||
version,
|
||||
num_updated_rows: stats.num_updated_rows,
|
||||
num_inserted_rows: stats.num_inserted_rows,
|
||||
num_deleted_rows: stats.num_deleted_rows,
|
||||
})
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Delete rows from the table
|
||||
async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
dataset.delete(predicate).await?;
|
||||
Ok(DeleteResult {
|
||||
version: dataset.version().version,
|
||||
})
|
||||
async fn delete(&self, predicate: &str) -> Result<()> {
|
||||
self.dataset.get_mut().await?.delete(predicate).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
||||
@@ -2549,28 +2470,27 @@ impl BaseTable for NativeTable {
|
||||
&self,
|
||||
transforms: NewColumnTransform,
|
||||
read_columns: Option<Vec<String>>,
|
||||
) -> Result<AddColumnsResult> {
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
dataset.add_columns(transforms, read_columns, None).await?;
|
||||
Ok(AddColumnsResult {
|
||||
version: dataset.version().version,
|
||||
})
|
||||
) -> Result<()> {
|
||||
self.dataset
|
||||
.get_mut()
|
||||
.await?
|
||||
.add_columns(transforms, read_columns, None)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
dataset.alter_columns(alterations).await?;
|
||||
Ok(AlterColumnsResult {
|
||||
version: dataset.version().version,
|
||||
})
|
||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
|
||||
self.dataset
|
||||
.get_mut()
|
||||
.await?
|
||||
.alter_columns(alterations)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
dataset.drop_columns(columns).await?;
|
||||
Ok(DropColumnsResult {
|
||||
version: dataset.version().version,
|
||||
})
|
||||
async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
|
||||
self.dataset.get_mut().await?.drop_columns(columns).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
||||
|
||||
@@ -4,10 +4,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use lance::dataset::MergeStats;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
use super::{BaseTable, MergeResult};
|
||||
use super::BaseTable;
|
||||
|
||||
/// A builder used to create and run a merge insert operation
|
||||
///
|
||||
@@ -86,9 +87,9 @@ impl MergeInsertBuilder {
|
||||
|
||||
/// Executes the merge insert operation
|
||||
///
|
||||
/// Returns version and statistics about the merge operation including the number of rows
|
||||
/// Returns statistics about the merge operation including the number of rows
|
||||
/// inserted, updated, and deleted.
|
||||
pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<MergeResult> {
|
||||
pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<MergeStats> {
|
||||
self.table.clone().merge_insert(self, new_data).await
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user