mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-19 21:10:41 +00:00
docs: add links to rust SDK docs, remove references to rust SDK being unstable / experimental (#1131)
This commit is contained in:
165
rust/lancedb/examples/ivf_pq.rs
Normal file
165
rust/lancedb/examples/ivf_pq.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! This example demonstrates setting advanced parameters when building an IVF PQ index
|
||||
//!
|
||||
//! Snippets from this example are used in the documentation on ANN indices.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::types::Float32Type;
|
||||
use arrow_array::{
|
||||
FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator, RecordBatchReader,
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use futures::TryStreamExt;
|
||||
use lancedb::connection::Connection;
|
||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
use lancedb::{connect, DistanceType, Result, Table};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
if std::path::Path::new("data").exists() {
|
||||
std::fs::remove_dir_all("data").unwrap();
|
||||
}
|
||||
let uri = "data/sample-lancedb";
|
||||
let db = connect(uri).execute().await?;
|
||||
let tbl = create_table(&db).await?;
|
||||
|
||||
create_index(&tbl).await?;
|
||||
search_index(&tbl).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
const TOTAL: usize = 1000;
|
||||
const DIM: usize = 128;
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
DIM as i32,
|
||||
),
|
||||
true,
|
||||
),
|
||||
]));
|
||||
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
);
|
||||
Ok(Box::new(batches))
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<Table> {
|
||||
let initial_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||
let tbl = db
|
||||
.create_table("my_table", Box::new(initial_data))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
Ok(tbl)
|
||||
}
|
||||
|
||||
async fn create_index(table: &Table) -> Result<()> {
|
||||
// --8<-- [start:create_index]
|
||||
// For this example, `table` is a lancedb::Table with a column named
|
||||
// "vector" that is a vector column with dimension 128.
|
||||
|
||||
// By default, if the column "vector" appears to be a vector column,
|
||||
// then an IVF_PQ index with reasonable defaults is created.
|
||||
table
|
||||
.create_index(&["vector"], Index::Auto)
|
||||
.execute()
|
||||
.await?;
|
||||
// For advanced cases, it is also possible to specifically request an
|
||||
// IVF_PQ index and provide custom parameters.
|
||||
table
|
||||
.create_index(
|
||||
&["vector"],
|
||||
Index::IvfPq(
|
||||
// Here we specify advanced indexing parameters. In this case
|
||||
// we are creating an index that my have better recall than the
|
||||
// default but is also larger and slower.
|
||||
IvfPqIndexBuilder::default()
|
||||
// This overrides the default distance type of L2
|
||||
.distance_type(DistanceType::Cosine)
|
||||
// With 1000 rows this have been ~31 by default
|
||||
.num_partitions(50)
|
||||
// With dimension 128 this would have been 8 by default
|
||||
.num_sub_vectors(16),
|
||||
),
|
||||
)
|
||||
.execute()
|
||||
.await?;
|
||||
// --8<-- [end:create_index]
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn search_index(table: &Table) -> Result<()> {
|
||||
// --8<-- [start:search1]
|
||||
let query_vector = [1.0; 128];
|
||||
// By default the index will find the 10 closest results using default
|
||||
// search parameters that give a reasonable tradeoff between accuracy
|
||||
// and search latency
|
||||
let mut results = table
|
||||
.vector_search(&query_vector)?
|
||||
// Note: you should always set the distance_type to match the value used
|
||||
// to train the index
|
||||
.distance_type(DistanceType::Cosine)
|
||||
.execute()
|
||||
.await?;
|
||||
while let Some(batch) = results.try_next().await? {
|
||||
println!("{:?}", batch);
|
||||
}
|
||||
// We can also provide custom search parameters. Here we perform a
|
||||
// slower but more accurate search
|
||||
let mut results = table
|
||||
.vector_search(&query_vector)?
|
||||
.distance_type(DistanceType::Cosine)
|
||||
// Override the default of 10 to get more rows
|
||||
.limit(15)
|
||||
// Override the default of 20 to search more partitions
|
||||
.nprobes(30)
|
||||
// Override the default of None to apply a refine step
|
||||
.refine_factor(1)
|
||||
.execute()
|
||||
.await?;
|
||||
while let Some(batch) = results.try_next().await? {
|
||||
println!("{:?}", batch);
|
||||
}
|
||||
Ok(())
|
||||
// --8<-- [end:search1]
|
||||
}
|
||||
@@ -12,10 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! This example demonstrates basic usage of LanceDb.
|
||||
//!
|
||||
//! Snippets from this example are used in the quickstart documentation.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::types::Float32Type;
|
||||
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_array::{
|
||||
FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator, RecordBatchReader,
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
|
||||
@@ -58,14 +64,14 @@ async fn main() -> Result<()> {
|
||||
async fn open_with_existing_tbl() -> Result<()> {
|
||||
let uri = "data/sample-lancedb";
|
||||
let db = connect(uri).execute().await?;
|
||||
// --8<-- [start:open_with_existing_file]
|
||||
let _ = db.open_table("my_table").execute().await.unwrap();
|
||||
// --8<-- [end:open_with_existing_file]
|
||||
#[allow(unused_variables)]
|
||||
// --8<-- [start:open_existing_tbl]
|
||||
let table = db.open_table("my_table").execute().await.unwrap();
|
||||
// --8<-- [end:open_existing_tbl]
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<LanceDbTable> {
|
||||
// --8<-- [start:create_table]
|
||||
fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
const TOTAL: usize = 1000;
|
||||
const DIM: usize = 128;
|
||||
|
||||
@@ -100,33 +106,22 @@ async fn create_table(db: &Connection) -> Result<LanceDbTable> {
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
);
|
||||
Ok(Box::new(batches))
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<LanceDbTable> {
|
||||
// --8<-- [start:create_table]
|
||||
let initial_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||
let tbl = db
|
||||
.create_table("my_table", Box::new(batches))
|
||||
.create_table("my_table", Box::new(initial_data))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
// --8<-- [end:create_table]
|
||||
|
||||
let new_batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
);
|
||||
// --8<-- [start:add]
|
||||
tbl.add(Box::new(new_batches)).execute().await.unwrap();
|
||||
let new_data = create_some_records()?;
|
||||
tbl.add(new_data).execute().await.unwrap();
|
||||
// --8<-- [end:add]
|
||||
|
||||
Ok(tbl)
|
||||
|
||||
@@ -36,8 +36,6 @@
|
||||
//!
|
||||
//! ### Quick Start
|
||||
//!
|
||||
//! <div class="warning">Rust API is not stable yet, please expect breaking changes.</div>
|
||||
//!
|
||||
//! #### Connect to a database.
|
||||
//!
|
||||
//! ```rust
|
||||
|
||||
Reference in New Issue
Block a user