mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 12:52:58 +00:00
doc: update quick start for full rust example (#872)
This commit is contained in:
@@ -11,43 +11,77 @@
|
||||
## Installation
|
||||
|
||||
=== "Python"
|
||||
|
||||
```shell
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```shell
|
||||
npm install vectordb
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```shell
|
||||
cargo install vectordb
|
||||
```
|
||||
|
||||
!!! info "Rust crate is installed as source. You need to install protobuf."
|
||||
|
||||
=== "macOS"
|
||||
|
||||
```shell
|
||||
brew install protobuf
|
||||
```
|
||||
|
||||
=== "Ubuntu/Debian"
|
||||
|
||||
```shell
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
```
|
||||
|
||||
|
||||
## How to connect to a database
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
```
|
||||
|
||||
LanceDB will create the directory if it doesn't exist (including parent directories).
|
||||
|
||||
If you need a reminder of the uri, use the `db.uri` property.
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
LanceDB will create the directory if it doesn't exist (including parent directories).
|
||||
|
||||
If you need a reminder of the uri, you can call `db.uri()`.
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
use vectordb::connect;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let uri = "data/sample-lancedb";
|
||||
let db = connect(&uri).await?;
|
||||
}
|
||||
```
|
||||
|
||||
LanceDB will create the directory if it doesn't exist (including parent directories).
|
||||
|
||||
If you need a reminder of the uri, you can call `db.uri()`.
|
||||
|
||||
## How to create a table
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
tbl = db.create_table("my_table",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
@@ -59,6 +93,7 @@
|
||||
to the `create_table` method.
|
||||
|
||||
You can also pass in a pandas DataFrame directly:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
df = pd.DataFrame([{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
@@ -67,8 +102,9 @@
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```javascript
|
||||
const tb = await db.createTable(
|
||||
const tbl = await db.createTable(
|
||||
"myTable",
|
||||
[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
|
||||
@@ -79,6 +115,31 @@
|
||||
If you want to overwrite the table, you can pass in `mode="overwrite"`
|
||||
to the `createTable` function.
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
use arrow_schema::{DataType, Schema, Field};
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator};
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("vector", DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)), 128), true),
|
||||
]));
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(vec![
|
||||
RecordBatch::try*new(schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..10)),
|
||||
Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, *, _>(
|
||||
(0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
|
||||
]).unwrap()
|
||||
].into_iter().map(Ok),
|
||||
schema.clone());
|
||||
db.create_table("my_table", Box::new(batches), None).await.unwrap();
|
||||
```
|
||||
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
|
||||
!!! info "Under the hood, LanceDB is converting the input data into an Apache Arrow table and persisting it to disk in [Lance format](https://www.github.com/lancedb/lance)."
|
||||
|
||||
@@ -88,76 +149,164 @@ Sometimes you may not have the data to insert into the table at creation time.
|
||||
In this case, you can create an empty table and specify the schema.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
import pyarrow as pa
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
|
||||
tbl = db.create_table("empty_table", schema=schema)
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```typescript
|
||||
import { Schema, Field, FixedSizeList, DataType } from "apache-arrow";
|
||||
|
||||
schema = new Schema([new new Field("vec", new FixedSizeList(2, new Field("item", new Float32())))])
|
||||
tbl = await db.createTable({ name: "empty_table", schema: schema });
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
use arrow_schema::{Schema, Field, DataType};
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator};
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("vector", DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)), 2), true),
|
||||
]));
|
||||
let batches = RecordBatchIterator::new(vec![].into_iter().map(Ok), schema);
|
||||
db.create_table("empty_table", Box::new(batches), None).await.unwrap();
|
||||
```
|
||||
|
||||
## How to open an existing table
|
||||
|
||||
Once created, you can open a table using the following code:
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl = db.open_table("my_table")
|
||||
```
|
||||
|
||||
If you forget the name of your table, you can always get a listing of all table names:
|
||||
|
||||
```python
|
||||
print(db.table_names())
|
||||
```
|
||||
```python
|
||||
tbl = db.open_table("my_table")
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const tbl = await db.openTable("myTable");
|
||||
```
|
||||
|
||||
If you forget the name of your table, you can always get a listing of all table names:
|
||||
```typescript
|
||||
const tbl = await db.openTable("myTable");
|
||||
```
|
||||
|
||||
```javascript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
const tbl = db.open_table_with_params("myTable", None).await.unwrap();
|
||||
```
|
||||
|
||||
If you forget the name of your table, you can always get a listing of all table names:
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
print(db.table_names())
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```javascript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
println!("{:?}", db.table_names().await.unwrap());
|
||||
```
|
||||
|
||||
## How to add data to a table
|
||||
|
||||
After a table has been created, you can always add more data to it using
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
|
||||
# Option 1: Add a list of dicts to a table
|
||||
data = [{"vector": [1.3, 1.4], "item": "fizz", "price": 100.0},
|
||||
{"vector": [9.5, 56.2], "item": "buzz", "price": 200.0}]
|
||||
tbl.add(data)
|
||||
```python
|
||||
|
||||
# Option 2: Add a pandas DataFrame to a table
|
||||
df = pd.DataFrame(data)
|
||||
tbl.add(data)
|
||||
```
|
||||
# Option 1: Add a list of dicts to a table
|
||||
data = [{"vector": [1.3, 1.4], "item": "fizz", "price": 100.0},
|
||||
{"vector": [9.5, 56.2], "item": "buzz", "price": 200.0}]
|
||||
tbl.add(data)
|
||||
|
||||
# Option 2: Add a pandas DataFrame to a table
|
||||
df = pd.DataFrame(data)
|
||||
tbl.add(data)
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
await tbl.add([{vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
||||
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
||||
```
|
||||
|
||||
```javascript
|
||||
await tbl.add([{vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
||||
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
let batches = RecordBatchIterator::new(...);
|
||||
tbl.add(Box::new(batches), None).await.unwrap();
|
||||
```
|
||||
|
||||
## How to search for (approximate) nearest neighbors
|
||||
|
||||
Once you've embedded the query, you can find its nearest neighbors using the following code:
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.search([100, 100]).limit(2).to_pandas()
|
||||
```
|
||||
|
||||
This returns a pandas DataFrame with the results.
|
||||
```python
|
||||
tbl.search([100, 100]).limit(2).to_pandas()
|
||||
```
|
||||
|
||||
This returns a pandas DataFrame with the results.
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const query = await tbl.search([100, 100]).limit(2).execute();
|
||||
```
|
||||
|
||||
```javascript
|
||||
const query = await tbl.search([100, 100]).limit(2).execute();
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
use arrow_array::RecordBatch;
|
||||
use futures::TryStreamExt;
|
||||
|
||||
let results: Vec<RecordBatch> = tbl
|
||||
.search(&[100.0, 100.0])
|
||||
.execute_stream()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect();
|
||||
```
|
||||
|
||||
By default, LanceDB runs a brute-force scan over dataset to find the K nearest neighbours (KNN).
|
||||
users can speed up the query by creating vector indices over the vector columns.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
tbl.create_index()
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
```javascript
|
||||
await tbl.createIndex({})
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
tbl.create_index(&["vector"]).build().await.unwrap()
|
||||
```
|
||||
|
||||
Check [Approximate Nearest Neighbor (ANN) Indexes](/ann_indices.md) section for more details.
|
||||
|
||||
## How to delete rows from a table
|
||||
|
||||
@@ -166,20 +315,27 @@ which rows to delete, provide a filter that matches on the metadata columns.
|
||||
This can delete any number of rows that match the filter.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
```python
|
||||
tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
await tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
```javascript
|
||||
await tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
tbl.delete("item = \"fizz\"").await.unwrap();
|
||||
```
|
||||
|
||||
The deletion predicate is a SQL expression that supports the same expressions
|
||||
as the `where()` clause on a search. They can be as simple or complex as needed.
|
||||
To see what expressions are supported, see the [SQL filters](sql.md) section.
|
||||
|
||||
|
||||
=== "Python"
|
||||
|
||||
Read more: [lancedb.table.Table.delete][]
|
||||
@@ -193,6 +349,7 @@ To see what expressions are supported, see the [SQL filters](sql.md) section.
|
||||
Use the `drop_table()` method on the database to remove a table.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
db.drop_table("my_table")
|
||||
```
|
||||
@@ -202,12 +359,19 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
you can pass in `ignore_missing=True`.
|
||||
|
||||
=== "JavaScript"
|
||||
|
||||
```javascript
|
||||
await db.dropTable('myTable')
|
||||
```
|
||||
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
If the table does not exist an exception is raised.
|
||||
If the table does not exist an exception is raised.
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
db.drop_table("my_table").await.unwrap()
|
||||
```
|
||||
|
||||
!!! note "Bundling `vectordb` apps with Webpack"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user