diff --git a/docs/src/basic.md b/docs/src/basic.md index 44ac128f..8e108fb8 100644 --- a/docs/src/basic.md +++ b/docs/src/basic.md @@ -42,7 +42,6 @@ sudo apt install -y protobuf-compiler libssl-dev ``` - ## How to connect to a database === "Python" @@ -70,10 +69,12 @@ #[tokio::main] async fn main() -> Result<()> { let uri = "data/sample-lancedb"; - let db = connect(&uri).await?; + let db = connect(uri).await?; } ``` + !!! info "See [examples/simple.rs](https://github.com/lancedb/lancedb/tree/main/rust/vectordb/src/examples/simple.rs) for a full working example." + LanceDB will create the directory if it doesn't exist (including parent directories). If you need a reminder of the uri, you can call `db.uri()`. @@ -286,7 +287,7 @@ Once you've embedded the query, you can find its nearest neighbors using the fol ``` By default, LanceDB runs a brute-force scan over dataset to find the K nearest neighbours (KNN). -users can speed up the query by creating vector indices over the vector columns. +For tables with more than 50K vectors, creating an ANN index is recommended to speed up search performance. === "Python" diff --git a/rust/vectordb/examples/simple.rs b/rust/vectordb/examples/simple.rs new file mode 100644 index 00000000..31ca4b59 --- /dev/null +++ b/rust/vectordb/examples/simple.rs @@ -0,0 +1,86 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::types::Float32Type; +use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator}; +use arrow_schema::{DataType, Field, Schema}; +use futures::TryStreamExt; + +use vectordb::Connection; +use vectordb::{connect, Result, Table, TableRef}; + +#[tokio::main] +async fn main() -> Result<()> { + let uri = "data/sample-lancedb"; + let db = connect(uri).await?; + let tbl = create_table(db).await?; + create_index(tbl.as_ref()).await?; + let batches = search(tbl.as_ref()).await?; + println!("{:?}", batches); + Ok(()) +} + +async fn create_table(db: Arc) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new( + "vector", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 128), + true, + ), + ])); + const TOTAL: usize = 1000; + const DIM: usize = 128; + // Create a RecordBatch stream. + let batches = RecordBatchIterator::new( + vec![RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), + Arc::new( + FixedSizeListArray::from_iter_primitive::( + (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])), + DIM as i32, + ), + ), + ], + ) + .unwrap()] + .into_iter() + .map(Ok), + schema.clone(), + ); + db.create_table("my_table", Box::new(batches), None).await +} + +async fn create_index(table: &dyn Table) -> Result<()> { + table + .create_index(&["vector"]) + .ivf_pq() + .num_partitions(2) + .build() + .await +} + +async fn search(table: &dyn Table) -> Result> { + Ok(table + .search(&[1.0; 128]) + .limit(2) + .execute_stream() + .await? + .try_collect::>() + .await?) +} diff --git a/rust/vectordb/src/lib.rs b/rust/vectordb/src/lib.rs index 631aafc8..fc4ac149 100644 --- a/rust/vectordb/src/lib.rs +++ b/rust/vectordb/src/lib.rs @@ -33,10 +33,7 @@ //! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`: //! //! ```ignore -//! [dependencies] -//! vectordb = "0.4" -//! arrow-schema = "50" -//! arrow-array = "50" +//! cargo install vectordb //! ``` //! //! ### Quick Start @@ -100,9 +97,9 @@ //! let batches = RecordBatchIterator::new(vec![ //! RecordBatch::try_new(schema.clone(), //! vec![ -//! Arc::new(Int32Array::from_iter_values(0..10)), +//! Arc::new(Int32Array::from_iter_values(0..1000)), //! Arc::new(FixedSizeListArray::from_iter_primitive::( -//! (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)), +//! (0..1000).map(|_| Some(vec![Some(1.0); 128])), 128)), //! ]).unwrap() //! ].into_iter().map(Ok), //! schema.clone()); @@ -158,7 +155,7 @@ //! # ].into_iter().map(Ok), //! # schema.clone()); //! # db.create_table("my_table", Box::new(batches), None).await.unwrap(); -//! let table = db.open_table("my_table").await.unwrap(); +//! # let table = db.open_table("my_table").await.unwrap(); //! let results = table //! .search(&[1.0; 128]) //! .execute_stream()