From 3dd8522bc96d4f338561fa212080ae294e19238c Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Fri, 26 Jan 2024 11:40:11 -0800 Subject: [PATCH] feat(rust): provide connect and connect_with_options in Rust SDK (#871) * Bring the feature parity of Rust connect methods. * A global connect method that can connect to local and remote / cloud table, as the same as in js/python today. --- rust/vectordb/src/connection.rs | 81 +++++++++++++++++++++++++++++++++ rust/vectordb/src/lib.rs | 34 +++++++++++--- 2 files changed, 109 insertions(+), 6 deletions(-) diff --git a/rust/vectordb/src/connection.rs b/rust/vectordb/src/connection.rs index 92ad0ee5..c8fc7921 100644 --- a/rust/vectordb/src/connection.rs +++ b/rust/vectordb/src/connection.rs @@ -68,6 +68,87 @@ pub trait Connection: Send + Sync { async fn drop_table(&self, name: &str) -> Result<()>; } +#[derive(Debug)] +pub struct ConnectOptions { + /// Database URI + /// + /// # Accpeted URI formats + /// + /// - `/path/to/database` - local database on file system. + /// - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store + /// - `db://dbname` - Lance Cloud + pub uri: String, + + /// Lance Cloud API key + pub api_key: Option, + /// Lance Cloud region + pub region: Option, + /// Lance Cloud host override + pub host_override: Option, + + /// The maximum number of indices to cache in memory. Defaults to 256. + pub index_cache_size: u32, +} + +impl ConnectOptions { + /// Create a new [`ConnectOptions`] with the given database URI. + pub fn new(uri: &str) -> Self { + Self { + uri: uri.to_string(), + api_key: None, + region: None, + host_override: None, + index_cache_size: 256, + } + } + + pub fn api_key(mut self, api_key: &str) -> Self { + self.api_key = Some(api_key.to_string()); + self + } + + pub fn region(mut self, region: &str) -> Self { + self.region = Some(region.to_string()); + self + } + + pub fn host_override(mut self, host_override: &str) -> Self { + self.host_override = Some(host_override.to_string()); + self + } + + pub fn index_cache_size(mut self, index_cache_size: u32) -> Self { + self.index_cache_size = index_cache_size; + self + } +} + +/// Connect to a LanceDB database. +/// +/// # Arguments +/// +/// - `uri` - URI where the database is located, can be a local file or a supported remote cloud storage +/// +/// ## Accepted URI formats +/// +/// - `/path/to/database` - local database on file system. +/// - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store +/// - `db://dbname` - Lance Cloud +/// +pub async fn connect(uri: &str) -> Result> { + let options = ConnectOptions::new(uri); + connect_with_options(&options).await +} + +/// Connect with [`ConnectOptions`]. +/// +/// # Arguments +/// - `options` - [`ConnectOptions`] to connect to the database. +pub async fn connect_with_options(options: &ConnectOptions) -> Result> { + let db = Database::connect(&options.uri).await?; + Ok(Arc::new(db)) +} + pub struct Database { object_store: ObjectStore, query_string: Option, diff --git a/rust/vectordb/src/lib.rs b/rust/vectordb/src/lib.rs index c01d73b5..631aafc8 100644 --- a/rust/vectordb/src/lib.rs +++ b/rust/vectordb/src/lib.rs @@ -41,15 +41,32 @@ //! //! ### Quick Start //! -//!
Rust API is not stable yet.
+//!
Rust API is not stable yet, please expect breaking changes.
//! //! #### Connect to a database. //! //! ```rust -//! use vectordb::connection::Database; +//! use vectordb::connect; //! # use arrow_schema::{Field, Schema}; //! # tokio::runtime::Runtime::new().unwrap().block_on(async { -//! let db = Database::connect("data/sample-lancedb").await.unwrap(); +//! let db = connect("data/sample-lancedb").await.unwrap(); +//! # }); +//! ``` +//! +//! LanceDB accepts the different form of database path: +//! +//! - `/path/to/database` - local database on file system. +//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store +//! - `db://dbname` - Lance Cloud +//! +//! You can also use [`ConnectOptions`] to configure the connectoin to the database. +//! +//! ```rust +//! use vectordb::{connect_with_options, ConnectOptions}; +//! # tokio::runtime::Runtime::new().unwrap().block_on(async { +//! let options = ConnectOptions::new("data/sample-lancedb") +//! .index_cache_size(1024); +//! let db = connect_with_options(&options).await.unwrap(); //! # }); //! ``` //! @@ -57,6 +74,8 @@ //! It treats [`FixedSizeList`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html) //! columns as vector columns. //! +//! For more details, please refer to [LanceDB documentation](https://lancedb.github.io/lancedb/). +//! //! #### Create a table //! //! To create a Table, you need to provide a [`arrow_schema::Schema`] and a [`arrow_array::RecordBatch`] stream. @@ -67,10 +86,11 @@ //! use arrow_array::{RecordBatch, RecordBatchIterator}; //! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type}; //! # use vectordb::connection::{Database, Connection}; +//! # use vectordb::connect; //! //! # tokio::runtime::Runtime::new().unwrap().block_on(async { //! # let tmpdir = tempfile::tempdir().unwrap(); -//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap(); +//! # let db = connect(tmpdir.path().to_str().unwrap()).await.unwrap(); //! let schema = Arc::new(Schema::new(vec![ //! Field::new("id", DataType::Int32, false), //! Field::new("vector", DataType::FixedSizeList( @@ -94,13 +114,13 @@ //! //! ```no_run //! # use std::sync::Arc; -//! # use vectordb::connection::{Database, Connection}; +//! # use vectordb::connect; //! # use arrow_array::{FixedSizeListArray, types::Float32Type, RecordBatch, //! # RecordBatchIterator, Int32Array}; //! # use arrow_schema::{Schema, Field, DataType}; //! # tokio::runtime::Runtime::new().unwrap().block_on(async { //! # let tmpdir = tempfile::tempdir().unwrap(); -//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap(); +//! # let db = connect(tmpdir.path().to_str().unwrap()).await.unwrap(); //! # let tbl = db.open_table("idx_test").await.unwrap(); //! tbl.create_index(&["vector"]) //! .ivf_pq() @@ -166,4 +186,6 @@ pub use connection::{Connection, Database}; pub use error::{Error, Result}; pub use table::{Table, TableRef}; +/// Connect to a database +pub use connection::{connect, connect_with_options, ConnectOptions}; pub use lance::dataset::WriteMode;