mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-03 18:32:55 +00:00
chore: add one rust SDK e2e example (#876)
Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com>
This commit is contained in:
@@ -42,7 +42,6 @@
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
```
|
||||
|
||||
|
||||
## How to connect to a database
|
||||
|
||||
=== "Python"
|
||||
@@ -70,10 +69,12 @@
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let uri = "data/sample-lancedb";
|
||||
let db = connect(&uri).await?;
|
||||
let db = connect(uri).await?;
|
||||
}
|
||||
```
|
||||
|
||||
!!! info "See [examples/simple.rs](https://github.com/lancedb/lancedb/tree/main/rust/vectordb/src/examples/simple.rs) for a full working example."
|
||||
|
||||
LanceDB will create the directory if it doesn't exist (including parent directories).
|
||||
|
||||
If you need a reminder of the uri, you can call `db.uri()`.
|
||||
@@ -286,7 +287,7 @@ Once you've embedded the query, you can find its nearest neighbors using the fol
|
||||
```
|
||||
|
||||
By default, LanceDB runs a brute-force scan over dataset to find the K nearest neighbours (KNN).
|
||||
users can speed up the query by creating vector indices over the vector columns.
|
||||
For tables with more than 50K vectors, creating an ANN index is recommended to speed up search performance.
|
||||
|
||||
=== "Python"
|
||||
|
||||
|
||||
86
rust/vectordb/examples/simple.rs
Normal file
86
rust/vectordb/examples/simple.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::types::Float32Type;
|
||||
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use vectordb::Connection;
|
||||
use vectordb::{connect, Result, Table, TableRef};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let uri = "data/sample-lancedb";
|
||||
let db = connect(uri).await?;
|
||||
let tbl = create_table(db).await?;
|
||||
create_index(tbl.as_ref()).await?;
|
||||
let batches = search(tbl.as_ref()).await?;
|
||||
println!("{:?}", batches);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_table(db: Arc<dyn Connection>) -> Result<TableRef> {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 128),
|
||||
true,
|
||||
),
|
||||
]));
|
||||
const TOTAL: usize = 1000;
|
||||
const DIM: usize = 128;
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
);
|
||||
db.create_table("my_table", Box::new(batches), None).await
|
||||
}
|
||||
|
||||
async fn create_index(table: &dyn Table) -> Result<()> {
|
||||
table
|
||||
.create_index(&["vector"])
|
||||
.ivf_pq()
|
||||
.num_partitions(2)
|
||||
.build()
|
||||
.await
|
||||
}
|
||||
|
||||
async fn search(table: &dyn Table) -> Result<Vec<RecordBatch>> {
|
||||
Ok(table
|
||||
.search(&[1.0; 128])
|
||||
.limit(2)
|
||||
.execute_stream()
|
||||
.await?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?)
|
||||
}
|
||||
@@ -33,10 +33,7 @@
|
||||
//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! [dependencies]
|
||||
//! vectordb = "0.4"
|
||||
//! arrow-schema = "50"
|
||||
//! arrow-array = "50"
|
||||
//! cargo install vectordb
|
||||
//! ```
|
||||
//!
|
||||
//! ### Quick Start
|
||||
@@ -100,9 +97,9 @@
|
||||
//! let batches = RecordBatchIterator::new(vec![
|
||||
//! RecordBatch::try_new(schema.clone(),
|
||||
//! vec![
|
||||
//! Arc::new(Int32Array::from_iter_values(0..10)),
|
||||
//! Arc::new(Int32Array::from_iter_values(0..1000)),
|
||||
//! Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
//! (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
|
||||
//! (0..1000).map(|_| Some(vec![Some(1.0); 128])), 128)),
|
||||
//! ]).unwrap()
|
||||
//! ].into_iter().map(Ok),
|
||||
//! schema.clone());
|
||||
@@ -158,7 +155,7 @@
|
||||
//! # ].into_iter().map(Ok),
|
||||
//! # schema.clone());
|
||||
//! # db.create_table("my_table", Box::new(batches), None).await.unwrap();
|
||||
//! let table = db.open_table("my_table").await.unwrap();
|
||||
//! # let table = db.open_table("my_table").await.unwrap();
|
||||
//! let results = table
|
||||
//! .search(&[1.0; 128])
|
||||
//! .execute_stream()
|
||||
|
||||
Reference in New Issue
Block a user