From 49de13c65a3afb46dbc594a66c1f8d30f9ca43b8 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Mon, 22 Jan 2024 09:15:55 -0800 Subject: [PATCH] doc: add index page for rust crate (#839) Rust API doc for the braves --- rust/ffi/node/src/query.rs | 3 +- rust/vectordb/src/data/inspect.rs | 4 +- rust/vectordb/src/database.rs | 5 +- rust/vectordb/src/lib.rs | 121 +++++++++++++++++++++++++++++- rust/vectordb/src/table.rs | 7 +- 5 files changed, 130 insertions(+), 10 deletions(-) diff --git a/rust/ffi/node/src/query.rs b/rust/ffi/node/src/query.rs index 6250c6f8..dc3a3438 100644 --- a/rust/ffi/node/src/query.rs +++ b/rust/ffi/node/src/query.rs @@ -1,7 +1,6 @@ use std::convert::TryFrom; use std::ops::Deref; -use arrow_array::Float32Array; use futures::{TryFutureExt, TryStreamExt}; use lance_linalg::distance::MetricType; use neon::context::FunctionContext; @@ -72,7 +71,7 @@ impl JsQuery { rt.spawn(async move { let mut builder = table - .search(query.map(Float32Array::from)) + .search(query) .refine_factor(refine_factor) .nprobes(nprobes) .filter(filter) diff --git a/rust/vectordb/src/data/inspect.rs b/rust/vectordb/src/data/inspect.rs index 9563f308..29d4291a 100644 --- a/rust/vectordb/src/data/inspect.rs +++ b/rust/vectordb/src/data/inspect.rs @@ -51,8 +51,8 @@ where /// Parameters /// ---------- /// - reader: RecordBatchReader -/// - strict: if set true, only fixed_size_list is considered as vector column. If set to false, -/// a list column with same length is also considered as vector column. +/// - strict: if set true, only `fixed_size_list` is considered as vector column. If set to false, +/// a `list` column with same length is also considered as vector column. pub fn infer_vector_columns( reader: impl RecordBatchReader + Send, strict: bool, diff --git a/rust/vectordb/src/database.rs b/rust/vectordb/src/database.rs index f8c97af4..7d1d2c35 100644 --- a/rust/vectordb/src/database.rs +++ b/rust/vectordb/src/database.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! LanceDB Database +//! + use std::fs::create_dir_all; use std::path::Path; use std::sync::Arc; @@ -159,7 +162,7 @@ impl Database { /// /// # Returns /// - /// * A [Vec] with all table names. + /// * A [`Vec`] with all table names. pub async fn table_names(&self) -> Result> { let mut f = self .object_store diff --git a/rust/vectordb/src/lib.rs b/rust/vectordb/src/lib.rs index 3cb4c934..6aa4dd91 100644 --- a/rust/vectordb/src/lib.rs +++ b/rust/vectordb/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2023 Lance Developers. +// Copyright 2024 Lance Developers. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,125 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! # VectorDB ([LanceDB](https://github.com/lancedb/lancedb)) -- Developer-friendly, serverless vector database for AI applications +//! +//! [LanceDB](https://github.com/lancedb/lancedb) is an open-source database for vector-search built with persistent storage, +//! which greatly simplifies retrevial, filtering and management of embeddings. +//! +//! The key features of LanceDB include: +//! - Production-scale vector search with no servers to manage. +//! - Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more). +//! - Support for vector similarity search, full-text search and SQL. +//! - Native Rust, Python, Javascript/Typescript support. +//! - Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure. +//! - GPU support in building vector indices[^note]. +//! - Ecosystem integrations with LangChain 🦜️🔗, LlamaIndex 🦙, Apache-Arrow, Pandas, Polars, DuckDB and more on the way. +//! +//! [^note]: Only in Python SDK. +//! +//! ## Getting Started +//! +//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`: +//! +//! ```ignore +//! [dependencies] +//! vectordb = "0.4" +//! arrow-schema = "50" +//! arrow-array = "50" +//! ``` +//! +//! ### Quick Start +//! +//!
Rust API is not stable yet.
+//! +//! #### Connect to a database. +//! +//! ```rust +//! use vectordb::{Database, Table, WriteMode}; +//! use arrow_schema::{Field, Schema}; +//! # tokio::runtime::Runtime::new().unwrap().block_on(async { +//! let db = Database::connect("data/sample-lancedb").await.unwrap(); +//! # }); +//! ``` +//! +//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself. +//! It treats [`FixedSizeList`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html) +//! columns as vectors. +//! +//! #### Create a table +//! +//! To create a Table, you need to provide a [`arrow_schema::Schema`] and a [`arrow_array::RecordBatch`] stream. +//! +//! ```rust +//! # use std::sync::Arc; +//! use arrow_schema::{DataType, Schema, Field}; +//! use arrow_array::{RecordBatch, RecordBatchIterator}; +//! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type}; +//! # use vectordb::Database; +//! +//! # tokio::runtime::Runtime::new().unwrap().block_on(async { +//! # let tmpdir = tempfile::tempdir().unwrap(); +//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap(); +//! let schema = Arc::new(Schema::new(vec![ +//! Field::new("id", DataType::Int32, false), +//! Field::new("vector", DataType::FixedSizeList( +//! Arc::new(Field::new("item", DataType::Float32, true)), 128), true), +//! ])); +//! // Create a RecordBatch stream. +//! let batches = RecordBatchIterator::new(vec![ +//! RecordBatch::try_new(schema.clone(), +//! vec![ +//! Arc::new(Int32Array::from_iter_values(0..10)), +//! Arc::new(FixedSizeListArray::from_iter_primitive::( +//! (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)), +//! ]).unwrap() +//! ].into_iter().map(Ok), +//! schema.clone()); +//! db.create_table("my_table", batches, None).await.unwrap(); +//! # }); +//! ``` +//! +//! #### Open table and run search +//! +//! ```rust +//! # use std::sync::Arc; +//! # use futures::TryStreamExt; +//! # use arrow_schema::{DataType, Schema, Field}; +//! # use arrow_array::{RecordBatch, RecordBatchIterator}; +//! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type}; +//! # use vectordb::Database; +//! # tokio::runtime::Runtime::new().unwrap().block_on(async { +//! # let tmpdir = tempfile::tempdir().unwrap(); +//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap(); +//! # let schema = Arc::new(Schema::new(vec![ +//! # Field::new("id", DataType::Int32, false), +//! # Field::new("vector", DataType::FixedSizeList( +//! # Arc::new(Field::new("item", DataType::Float32, true)), 128), true), +//! # ])); +//! # let batches = RecordBatchIterator::new(vec![ +//! # RecordBatch::try_new(schema.clone(), +//! # vec![ +//! # Arc::new(Int32Array::from_iter_values(0..10)), +//! # Arc::new(FixedSizeListArray::from_iter_primitive::( +//! # (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)), +//! # ]).unwrap() +//! # ].into_iter().map(Ok), +//! # schema.clone()); +//! # db.create_table("my_table", batches, None).await.unwrap(); +//! let table = db.open_table("my_table").await.unwrap(); +//! let results = table +//! .search(Some(vec![1.0; 128])) +//! .execute() +//! .await +//! .unwrap() +//! .try_collect::>() +//! .await +//! .unwrap(); +//! # }); +//! +//! +//! ``` + pub mod data; pub mod database; pub mod error; diff --git a/rust/vectordb/src/table.rs b/rust/vectordb/src/table.rs index 0c370092..2952bc0e 100644 --- a/rust/vectordb/src/table.rs +++ b/rust/vectordb/src/table.rs @@ -314,13 +314,12 @@ impl Table { /// /// # Arguments /// - /// * `vector` The vector used for this query. + /// * `query_vector` The vector used for this query. /// /// # Returns - /// /// * A [Query] object. - pub fn search(&self, query_vector: Option) -> Query { - Query::new(self.dataset.clone(), query_vector) + pub fn search>(&self, query_vector: Option) -> Query { + Query::new(self.dataset.clone(), query_vector.map(|q| q.into())) } pub fn filter(&self, expr: String) -> Query {