chore: add one rust SDK e2e example (#876)

Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com>
This commit is contained in:
Lei Xu
2024-01-26 22:41:20 -08:00
committed by Weston Pace
parent fd2fd94862
commit 36dbf47d60
3 changed files with 94 additions and 10 deletions

View File

@@ -42,7 +42,6 @@
sudo apt install -y protobuf-compiler libssl-dev
```
## How to connect to a database
=== "Python"
@@ -70,10 +69,12 @@
#[tokio::main]
async fn main() -> Result<()> {
let uri = "data/sample-lancedb";
let db = connect(&uri).await?;
let db = connect(uri).await?;
}
```
!!! info "See [examples/simple.rs](https://github.com/lancedb/lancedb/tree/main/rust/vectordb/src/examples/simple.rs) for a full working example."
LanceDB will create the directory if it doesn't exist (including parent directories).
If you need a reminder of the uri, you can call `db.uri()`.
@@ -286,7 +287,7 @@ Once you've embedded the query, you can find its nearest neighbors using the fol
```
By default, LanceDB runs a brute-force scan over dataset to find the K nearest neighbours (KNN).
users can speed up the query by creating vector indices over the vector columns.
For tables with more than 50K vectors, creating an ANN index is recommended to speed up search performance.
=== "Python"

View File

@@ -0,0 +1,86 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow_array::types::Float32Type;
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator};
use arrow_schema::{DataType, Field, Schema};
use futures::TryStreamExt;
use vectordb::Connection;
use vectordb::{connect, Result, Table, TableRef};
#[tokio::main]
async fn main() -> Result<()> {
let uri = "data/sample-lancedb";
let db = connect(uri).await?;
let tbl = create_table(db).await?;
create_index(tbl.as_ref()).await?;
let batches = search(tbl.as_ref()).await?;
println!("{:?}", batches);
Ok(())
}
async fn create_table(db: Arc<dyn Connection>) -> Result<TableRef> {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new(
"vector",
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 128),
true,
),
]));
const TOTAL: usize = 1000;
const DIM: usize = 128;
// Create a RecordBatch stream.
let batches = RecordBatchIterator::new(
vec![RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
Arc::new(
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
DIM as i32,
),
),
],
)
.unwrap()]
.into_iter()
.map(Ok),
schema.clone(),
);
db.create_table("my_table", Box::new(batches), None).await
}
async fn create_index(table: &dyn Table) -> Result<()> {
table
.create_index(&["vector"])
.ivf_pq()
.num_partitions(2)
.build()
.await
}
async fn search(table: &dyn Table) -> Result<Vec<RecordBatch>> {
Ok(table
.search(&[1.0; 128])
.limit(2)
.execute_stream()
.await?
.try_collect::<Vec<_>>()
.await?)
}

View File

@@ -33,10 +33,7 @@
//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`:
//!
//! ```ignore
//! [dependencies]
//! vectordb = "0.4"
//! arrow-schema = "50"
//! arrow-array = "50"
//! cargo install vectordb
//! ```
//!
//! ### Quick Start
@@ -100,9 +97,9 @@
//! let batches = RecordBatchIterator::new(vec![
//! RecordBatch::try_new(schema.clone(),
//! vec![
//! Arc::new(Int32Array::from_iter_values(0..10)),
//! Arc::new(Int32Array::from_iter_values(0..1000)),
//! Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
//! (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
//! (0..1000).map(|_| Some(vec![Some(1.0); 128])), 128)),
//! ]).unwrap()
//! ].into_iter().map(Ok),
//! schema.clone());
@@ -158,7 +155,7 @@
//! # ].into_iter().map(Ok),
//! # schema.clone());
//! # db.create_table("my_table", Box::new(batches), None).await.unwrap();
//! let table = db.open_table("my_table").await.unwrap();
//! # let table = db.open_table("my_table").await.unwrap();
//! let results = table
//! .search(&[1.0; 128])
//! .execute_stream()