doc: add index page for rust crate (#839)

Rust API doc for the braves
This commit is contained in:
Lei Xu
2024-01-22 09:15:55 -08:00
committed by Weston Pace
parent 97d033dfd6
commit 49de13c65a
5 changed files with 130 additions and 10 deletions

View File

@@ -1,7 +1,6 @@
use std::convert::TryFrom;
use std::ops::Deref;
use arrow_array::Float32Array;
use futures::{TryFutureExt, TryStreamExt};
use lance_linalg::distance::MetricType;
use neon::context::FunctionContext;
@@ -72,7 +71,7 @@ impl JsQuery {
rt.spawn(async move {
let mut builder = table
.search(query.map(Float32Array::from))
.search(query)
.refine_factor(refine_factor)
.nprobes(nprobes)
.filter(filter)

View File

@@ -51,8 +51,8 @@ where
/// Parameters
/// ----------
/// - reader: RecordBatchReader
/// - strict: if set true, only fixed_size_list<float> is considered as vector column. If set to false,
/// a list<float> column with same length is also considered as vector column.
/// - strict: if set true, only `fixed_size_list<float>` is considered as vector column. If set to false,
/// a `list<float>` column with same length is also considered as vector column.
pub fn infer_vector_columns(
reader: impl RecordBatchReader + Send,
strict: bool,

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! LanceDB Database
//!
use std::fs::create_dir_all;
use std::path::Path;
use std::sync::Arc;
@@ -159,7 +162,7 @@ impl Database {
///
/// # Returns
///
/// * A [Vec<String>] with all table names.
/// * A [`Vec<String>`] with all table names.
pub async fn table_names(&self) -> Result<Vec<String>> {
let mut f = self
.object_store

View File

@@ -1,4 +1,4 @@
// Copyright 2023 Lance Developers.
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,6 +12,125 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # VectorDB ([LanceDB](https://github.com/lancedb/lancedb)) -- Developer-friendly, serverless vector database for AI applications
//!
//! [LanceDB](https://github.com/lancedb/lancedb) is an open-source database for vector-search built with persistent storage,
//! which greatly simplifies retrevial, filtering and management of embeddings.
//!
//! The key features of LanceDB include:
//! - Production-scale vector search with no servers to manage.
//! - Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more).
//! - Support for vector similarity search, full-text search and SQL.
//! - Native Rust, Python, Javascript/Typescript support.
//! - Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
//! - GPU support in building vector indices[^note].
//! - Ecosystem integrations with LangChain 🦜️🔗, LlamaIndex 🦙, Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
//!
//! [^note]: Only in Python SDK.
//!
//! ## Getting Started
//!
//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`:
//!
//! ```ignore
//! [dependencies]
//! vectordb = "0.4"
//! arrow-schema = "50"
//! arrow-array = "50"
//! ```
//!
//! ### Quick Start
//!
//! <div class="warning">Rust API is not stable yet.</div>
//!
//! #### Connect to a database.
//!
//! ```rust
//! use vectordb::{Database, Table, WriteMode};
//! use arrow_schema::{Field, Schema};
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = Database::connect("data/sample-lancedb").await.unwrap();
//! # });
//! ```
//!
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
//! columns as vectors.
//!
//! #### Create a table
//!
//! To create a Table, you need to provide a [`arrow_schema::Schema`] and a [`arrow_array::RecordBatch`] stream.
//!
//! ```rust
//! # use std::sync::Arc;
//! use arrow_schema::{DataType, Schema, Field};
//! use arrow_array::{RecordBatch, RecordBatchIterator};
//! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type};
//! # use vectordb::Database;
//!
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! # let tmpdir = tempfile::tempdir().unwrap();
//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap();
//! let schema = Arc::new(Schema::new(vec![
//! Field::new("id", DataType::Int32, false),
//! Field::new("vector", DataType::FixedSizeList(
//! Arc::new(Field::new("item", DataType::Float32, true)), 128), true),
//! ]));
//! // Create a RecordBatch stream.
//! let batches = RecordBatchIterator::new(vec![
//! RecordBatch::try_new(schema.clone(),
//! vec![
//! Arc::new(Int32Array::from_iter_values(0..10)),
//! Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
//! (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
//! ]).unwrap()
//! ].into_iter().map(Ok),
//! schema.clone());
//! db.create_table("my_table", batches, None).await.unwrap();
//! # });
//! ```
//!
//! #### Open table and run search
//!
//! ```rust
//! # use std::sync::Arc;
//! # use futures::TryStreamExt;
//! # use arrow_schema::{DataType, Schema, Field};
//! # use arrow_array::{RecordBatch, RecordBatchIterator};
//! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type};
//! # use vectordb::Database;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! # let tmpdir = tempfile::tempdir().unwrap();
//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap();
//! # let schema = Arc::new(Schema::new(vec![
//! # Field::new("id", DataType::Int32, false),
//! # Field::new("vector", DataType::FixedSizeList(
//! # Arc::new(Field::new("item", DataType::Float32, true)), 128), true),
//! # ]));
//! # let batches = RecordBatchIterator::new(vec![
//! # RecordBatch::try_new(schema.clone(),
//! # vec![
//! # Arc::new(Int32Array::from_iter_values(0..10)),
//! # Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
//! # (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
//! # ]).unwrap()
//! # ].into_iter().map(Ok),
//! # schema.clone());
//! # db.create_table("my_table", batches, None).await.unwrap();
//! let table = db.open_table("my_table").await.unwrap();
//! let results = table
//! .search(Some(vec![1.0; 128]))
//! .execute()
//! .await
//! .unwrap()
//! .try_collect::<Vec<_>>()
//! .await
//! .unwrap();
//! # });
//!
//!
//! ```
pub mod data;
pub mod database;
pub mod error;

View File

@@ -314,13 +314,12 @@ impl Table {
///
/// # Arguments
///
/// * `vector` The vector used for this query.
/// * `query_vector` The vector used for this query.
///
/// # Returns
///
/// * A [Query] object.
pub fn search(&self, query_vector: Option<Float32Array>) -> Query {
Query::new(self.dataset.clone(), query_vector)
pub fn search<T: Into<Float32Array>>(&self, query_vector: Option<T>) -> Query {
Query::new(self.dataset.clone(), query_vector.map(|q| q.into()))
}
pub fn filter(&self, expr: String) -> Query {