diff --git a/Cargo.toml b/Cargo.toml index 35bc3b835..9ddddc4fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ exclude = ["python"] resolver = "2" [workspace.package] -edition = "2021" +edition = "2024" authors = ["LanceDB Devs "] license = "Apache-2.0" repository = "https://github.com/lancedb/lancedb" diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 824ae7562..ccfdc6fc7 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database}; use napi::bindgen_prelude::*; use napi_derive::*; +use crate::ConnectionOptions; use crate::error::NapiErrorExt; use crate::header::JsHeaderProvider; use crate::table::Table; -use crate::ConnectionOptions; use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection}; use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema}; diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs index bf8b280a6..c957cea78 100644 --- a/nodejs/src/index.rs +++ b/nodejs/src/index.rs @@ -3,12 +3,12 @@ use std::sync::Mutex; +use lancedb::index::Index as LanceDbIndex; use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder}; use lancedb::index::vector::{ IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfRqIndexBuilder, }; -use lancedb::index::Index as LanceDbIndex; use napi_derive::napi; use crate::util::parse_distance_type; diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index 21403c0e7..4ad42f32f 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -17,8 +17,8 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery; use napi::bindgen_prelude::*; use napi_derive::napi; -use crate::error::convert_error; use crate::error::NapiErrorExt; +use crate::error::convert_error; use crate::iterator::RecordBatchIterator; use crate::rerankers::RerankHybridCallbackArgs; use crate::rerankers::Reranker; @@ -551,15 +551,12 @@ fn parse_fts_query(query: Object) -> napi::Result { } }; let mut query = FullTextSearchQuery::new_query(query); - if let Some(cols) = columns { - if !cols.is_empty() { - query = query.with_columns(&cols).map_err(|e| { - napi::Error::from_reason(format!( - "Failed to set full text search columns: {}", - e - )) - })?; - } + if let Some(cols) = columns + && !cols.is_empty() + { + query = query.with_columns(&cols).map_err(|e| { + napi::Error::from_reason(format!("Failed to set full text search columns: {}", e)) + })?; } Ok(query) } else { diff --git a/nodejs/src/session.rs b/nodejs/src/session.rs index 6e755cb92..1c31d37da 100644 --- a/nodejs/src/session.rs +++ b/nodejs/src/session.rs @@ -95,7 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session { napi_val: napi::sys::napi_value, ) -> napi::Result { let object: napi::bindgen_prelude::ClassInstance = - napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?; + unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? }; Ok((*object).clone()) } } diff --git a/python/src/arrow.rs b/python/src/arrow.rs index 88c285d30..fd3a05964 100644 --- a/python/src/arrow.rs +++ b/python/src/arrow.rs @@ -10,7 +10,7 @@ use arrow::{ use futures::stream::StreamExt; use lancedb::arrow::SendableRecordBatchStream; use pyo3::{ - exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python, + Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/connection.rs b/python/src/connection.rs index 7df89b101..a8b218a8e 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -9,10 +9,10 @@ use lancedb::{ database::{CreateTableMode, Database, ReadConsistency}, }; use pyo3::{ + Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, exceptions::{PyRuntimeError, PyValueError}, pyclass, pyfunction, pymethods, types::{PyDict, PyDictMethods}, - Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/error.rs b/python/src/error.rs index b406c6b38..062a1dc96 100644 --- a/python/src/error.rs +++ b/python/src/error.rs @@ -2,10 +2,10 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use pyo3::{ + PyErr, PyResult, Python, exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError}, intern, types::{PyAnyMethods, PyNone}, - PyErr, PyResult, Python, }; use lancedb::error::Error as LanceError; diff --git a/python/src/index.rs b/python/src/index.rs index c93b23eb3..602b5e420 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -3,17 +3,17 @@ use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder}; use lancedb::index::{ + Index as LanceDbIndex, scalar::{BTreeIndexBuilder, FtsIndexBuilder}, vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder}, - Index as LanceDbIndex, }; -use pyo3::types::PyStringMethods; use pyo3::IntoPyObject; +use pyo3::types::PyStringMethods; use pyo3::{ + Bound, FromPyObject, PyAny, PyResult, Python, exceptions::{PyKeyError, PyValueError}, intern, pyclass, pymethods, types::PyAnyMethods, - Bound, FromPyObject, PyAny, PyResult, Python, }; use crate::util::parse_distance_type; @@ -41,7 +41,12 @@ pub fn extract_index_params(source: &Option>) -> PyResult>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -64,10 +69,11 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -86,7 +92,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -101,7 +107,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -117,7 +123,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -138,7 +144,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -155,7 +161,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult Err(PyValueError::new_err(format!( "Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq", not_supported diff --git a/python/src/lib.rs b/python/src/lib.rs index 3c7289ca0..4788b3e93 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -2,14 +2,14 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use arrow::RecordBatchStream; -use connection::{connect, Connection}; +use connection::{Connection, connect}; use env_logger::Env; use index::IndexConfig; use permutation::{PyAsyncPermutationBuilder, PyPermutationReader}; use pyo3::{ - pymodule, + Bound, PyResult, Python, pymodule, types::{PyModule, PyModuleMethods}, - wrap_pyfunction, Bound, PyResult, Python, + wrap_pyfunction, }; use query::{FTSQuery, HybridQuery, Query, VectorQuery}; use session::Session; diff --git a/python/src/permutation.rs b/python/src/permutation.rs index 06010396d..21b8c9c47 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -16,10 +16,10 @@ use lancedb::{ query::Select, }; use pyo3::{ + Bound, PyAny, PyRef, PyRefMut, PyResult, Python, exceptions::PyRuntimeError, pyclass, pymethods, types::{PyAnyMethods, PyDict, PyDictMethods, PyType}, - Bound, PyAny, PyRef, PyRefMut, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/query.rs b/python/src/query.rs index 65e47a0db..0c17cf703 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -4,9 +4,9 @@ use std::sync::Arc; use std::time::Duration; -use arrow::array::make_array; use arrow::array::Array; use arrow::array::ArrayData; +use arrow::array::make_array; use arrow::pyarrow::FromPyArrow; use arrow::pyarrow::IntoPyArrow; use arrow::pyarrow::ToPyArrow; @@ -22,23 +22,23 @@ use lancedb::query::{ VectorQuery as LanceDbVectorQuery, }; use lancedb::table::AnyQuery; -use pyo3::prelude::{PyAnyMethods, PyDictMethods}; -use pyo3::pyfunction; -use pyo3::pymethods; -use pyo3::types::PyList; -use pyo3::types::{PyDict, PyString}; use pyo3::Bound; use pyo3::IntoPyObject; use pyo3::PyAny; use pyo3::PyRef; use pyo3::PyResult; use pyo3::Python; -use pyo3::{exceptions::PyRuntimeError, FromPyObject}; +use pyo3::prelude::{PyAnyMethods, PyDictMethods}; +use pyo3::pyfunction; +use pyo3::pymethods; +use pyo3::types::PyList; +use pyo3::types::{PyDict, PyString}; +use pyo3::{FromPyObject, exceptions::PyRuntimeError}; +use pyo3::{PyErr, pyclass}; use pyo3::{ exceptions::{PyNotImplementedError, PyValueError}, intern, }; -use pyo3::{pyclass, PyErr}; use pyo3_async_runtimes::tokio::future_into_py; use crate::util::parse_distance_type; diff --git a/python/src/session.rs b/python/src/session.rs index abc499e39..2433114b6 100644 --- a/python/src/session.rs +++ b/python/src/session.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use lancedb::{ObjectStoreRegistry, Session as LanceSession}; -use pyo3::{pyclass, pymethods, PyResult}; +use pyo3::{PyResult, pyclass, pymethods}; /// A session for managing caches and object stores across LanceDB operations. /// diff --git a/python/src/table.rs b/python/src/table.rs index 3766f3f19..e988cadb4 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -5,7 +5,7 @@ use std::{collections::HashMap, sync::Arc}; use crate::{ connection::Connection, error::PythonErrorExt, - index::{extract_index_params, IndexConfig}, + index::{IndexConfig, extract_index_params}, query::{Query, TakeQuery}, table::scannable::PyScannable, }; @@ -19,10 +19,10 @@ use lancedb::table::{ Table as LanceDbTable, }; use pyo3::{ + Bound, FromPyObject, PyAny, PyRef, PyResult, Python, exceptions::{PyKeyError, PyRuntimeError, PyValueError}, pyclass, pymethods, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, - Bound, FromPyObject, PyAny, PyRef, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; @@ -542,7 +542,7 @@ impl Table { let inner = self_.inner_ref()?.clone(); future_into_py(self_.py(), async move { let versions = inner.list_versions().await.infer_error()?; - let versions_as_dict = Python::attach(|py| { + Python::attach(|py| { versions .iter() .map(|v| { @@ -559,9 +559,7 @@ impl Table { Ok(dict.unbind()) }) .collect::>>() - }); - - versions_as_dict + }) }) } diff --git a/python/src/table/scannable.rs b/python/src/table/scannable.rs index e2bfc1295..5d02ca024 100644 --- a/python/src/table/scannable.rs +++ b/python/src/table/scannable.rs @@ -10,11 +10,11 @@ use arrow::{ }; use futures::StreamExt; use lancedb::{ + Error, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, data::scannable::Scannable, - Error, }; -use pyo3::{types::PyAnyMethods, FromPyObject, Py, PyAny, Python}; +use pyo3::{FromPyObject, Py, PyAny, Python, types::PyAnyMethods}; /// Adapter that implements Scannable for a Python reader factory callable. /// @@ -99,15 +99,15 @@ impl Scannable for PyScannable { // Channel closed. Check if the task panicked — a panic // drops the sender without sending an error, so without // this check we'd silently return a truncated stream. - if let Some(handle) = join_handle { - if let Err(join_err) = handle.await { - return Some(( - Err(Error::Runtime { - message: format!("Reader task panicked: {}", join_err), - }), - (rx, None), - )); - } + if let Some(handle) = join_handle + && let Err(join_err) = handle.await + { + return Some(( + Err(Error::Runtime { + message: format!("Reader task panicked: {}", join_err), + }), + (rx, None), + )); } None } diff --git a/python/src/util.rs b/python/src/util.rs index 8ec8f40cb..977608a73 100644 --- a/python/src/util.rs +++ b/python/src/util.rs @@ -5,8 +5,9 @@ use std::sync::Mutex; use lancedb::DistanceType; use pyo3::{ + PyResult, exceptions::{PyRuntimeError, PyValueError}, - pyfunction, PyResult, + pyfunction, }; /// A wrapper around a rust builder diff --git a/rust/lancedb/examples/bedrock.rs b/rust/lancedb/examples/bedrock.rs index 365453b60..9af569c4e 100644 --- a/rust/lancedb/examples/bedrock.rs +++ b/rust/lancedb/examples/bedrock.rs @@ -9,10 +9,9 @@ use aws_config::Region; use aws_sdk_bedrockruntime::Client; use futures::StreamExt; use lancedb::{ - connect, - embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction}, + Result, connect, + embeddings::{EmbeddingDefinition, EmbeddingFunction, bedrock::BedrockEmbeddingFunction}, query::{ExecutableQuery, QueryBase}, - Result, }; #[tokio::main] diff --git a/rust/lancedb/examples/full_text_search.rs b/rust/lancedb/examples/full_text_search.rs index 15d7d7d88..54d4a38f5 100644 --- a/rust/lancedb/examples/full_text_search.rs +++ b/rust/lancedb/examples/full_text_search.rs @@ -10,10 +10,10 @@ use futures::TryStreamExt; use lance_index::scalar::FullTextSearchQuery; use lancedb::connection::Connection; -use lancedb::index::scalar::FtsIndexBuilder; use lancedb::index::Index; +use lancedb::index::scalar::FtsIndexBuilder; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, Result, Table}; +use lancedb::{Result, Table, connect}; use rand::random; #[tokio::main] @@ -46,19 +46,21 @@ fn create_some_records() -> Result>(); let n_terms = 3; let batches = RecordBatchIterator::new( - vec![RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), - Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| { - (0..n_terms) - .map(|_| words[random::() as usize % words.len()]) - .collect::>() - .join(" ") - }))), - ], - ) - .unwrap()] + vec![ + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), + Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| { + (0..n_terms) + .map(|_| words[random::() as usize % words.len()]) + .collect::>() + .join(" ") + }))), + ], + ) + .unwrap(), + ] .into_iter() .map(Ok), schema.clone(), diff --git a/rust/lancedb/examples/hybrid_search.rs b/rust/lancedb/examples/hybrid_search.rs index dc1a80a39..c28b19594 100644 --- a/rust/lancedb/examples/hybrid_search.rs +++ b/rust/lancedb/examples/hybrid_search.rs @@ -5,16 +5,15 @@ use arrow_array::{RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; use lance_index::scalar::FullTextSearchQuery; -use lancedb::index::scalar::FtsIndexBuilder; use lancedb::index::Index; +use lancedb::index::scalar::FtsIndexBuilder; use lancedb::{ - connect, + Result, Table, connect, embeddings::{ - sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition, - EmbeddingFunction, + EmbeddingDefinition, EmbeddingFunction, + sentence_transformers::SentenceTransformersEmbeddings, }, query::{QueryBase, QueryExecutionOptions}, - Result, Table, }; use std::{iter::once, sync::Arc}; diff --git a/rust/lancedb/examples/ivf_pq.rs b/rust/lancedb/examples/ivf_pq.rs index d171d5a45..79f674615 100644 --- a/rust/lancedb/examples/ivf_pq.rs +++ b/rust/lancedb/examples/ivf_pq.rs @@ -14,10 +14,10 @@ use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; use lancedb::connection::Connection; -use lancedb::index::vector::IvfPqIndexBuilder; use lancedb::index::Index; +use lancedb::index::vector::IvfPqIndexBuilder; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, DistanceType, Result, Table}; +use lancedb::{DistanceType, Result, Table, connect}; #[tokio::main] async fn main() -> Result<()> { @@ -51,19 +51,21 @@ fn create_some_records() -> Result( - (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])), - DIM as i32, + vec![ + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), + Arc::new( + FixedSizeListArray::from_iter_primitive::( + (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])), + DIM as i32, + ), ), - ), - ], - ) - .unwrap()] + ], + ) + .unwrap(), + ] .into_iter() .map(Ok), schema.clone(), diff --git a/rust/lancedb/examples/openai.rs b/rust/lancedb/examples/openai.rs index 2194e519a..b54e3e55b 100644 --- a/rust/lancedb/examples/openai.rs +++ b/rust/lancedb/examples/openai.rs @@ -8,10 +8,9 @@ use std::{iter::once, sync::Arc}; use arrow_array::{RecordBatch, StringArray}; use futures::StreamExt; use lancedb::{ - connect, - embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction}, + Result, connect, + embeddings::{EmbeddingDefinition, EmbeddingFunction, openai::OpenAIEmbeddingFunction}, query::{ExecutableQuery, QueryBase}, - Result, }; // --8<-- [end:imports] diff --git a/rust/lancedb/examples/sentence_transformers.rs b/rust/lancedb/examples/sentence_transformers.rs index 2fa7ed30b..99f2ac63d 100644 --- a/rust/lancedb/examples/sentence_transformers.rs +++ b/rust/lancedb/examples/sentence_transformers.rs @@ -7,13 +7,12 @@ use arrow_array::{RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use futures::StreamExt; use lancedb::{ - connect, + Result, connect, embeddings::{ - sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition, - EmbeddingFunction, + EmbeddingDefinition, EmbeddingFunction, + sentence_transformers::SentenceTransformersEmbeddings, }, query::{ExecutableQuery, QueryBase}, - Result, }; #[tokio::main] diff --git a/rust/lancedb/examples/simple.rs b/rust/lancedb/examples/simple.rs index 157083d0a..14846e037 100644 --- a/rust/lancedb/examples/simple.rs +++ b/rust/lancedb/examples/simple.rs @@ -14,7 +14,7 @@ use futures::TryStreamExt; use lancedb::connection::Connection; use lancedb::index::Index; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, Result, Table as LanceDbTable}; +use lancedb::{Result, Table as LanceDbTable, connect}; #[tokio::main] async fn main() -> Result<()> { diff --git a/rust/lancedb/src/arrow.rs b/rust/lancedb/src/arrow.rs index a8c710865..fe5dd998d 100644 --- a/rust/lancedb/src/arrow.rs +++ b/rust/lancedb/src/arrow.rs @@ -12,7 +12,7 @@ use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount}; #[cfg(feature = "polars")] use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame}; -use crate::{error::Result, Error}; +use crate::{Error, error::Result}; /// An iterator of batches that also has a schema pub trait RecordBatchReader: Iterator> { diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 1e22e7c8f..f442409d2 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -17,6 +17,7 @@ use lance_namespace::models::{ #[cfg(feature = "aws")] use object_store::aws::AwsCredential; +use crate::Table; use crate::connection::create_table::CreateTableBuilder; use crate::data::scannable::Scannable; use crate::database::listing::ListingDatabase; @@ -31,7 +32,6 @@ use crate::remote::{ client::ClientConfig, db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION}, }; -use crate::Table; use lance::io::ObjectStoreParams; pub use lance_encoding::version::LanceFileVersion; #[cfg(feature = "remote")] @@ -758,10 +758,10 @@ impl ConnectBuilder { options: &mut HashMap, ) { for (env_key, opt_key) in env_var_to_remote_storage_option { - if let Ok(env_value) = std::env::var(env_key) { - if !options.contains_key(*opt_key) { - options.insert((*opt_key).to_string(), env_value); - } + if let Ok(env_value) = std::env::var(env_key) + && !options.contains_key(*opt_key) + { + options.insert((*opt_key).to_string(), env_value); } } } @@ -1011,14 +1011,13 @@ mod tests { #[cfg(feature = "remote")] #[test] fn test_apply_env_defaults() { - let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY"; - let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL"; + let env_key = "PATH"; + let env_val = std::env::var(env_key).expect("PATH should be set in test environment"); let opts_key = "test_apply_env_defaults_environment_variable_opts_key"; - std::env::set_var(env_key, env_val); let mut options = HashMap::new(); ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options); - assert_eq!(Some(&env_val.to_string()), options.get(opts_key)); + assert_eq!(Some(&env_val), options.get(opts_key)); options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string()); ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options); diff --git a/rust/lancedb/src/connection/create_table.rs b/rust/lancedb/src/connection/create_table.rs index 8eb7d2207..c33866159 100644 --- a/rust/lancedb/src/connection/create_table.rs +++ b/rust/lancedb/src/connection/create_table.rs @@ -6,12 +6,12 @@ use std::sync::Arc; use lance_io::object_store::StorageOptionsProvider; use crate::{ + Error, Result, Table, connection::{merge_storage_options, set_storage_options_provider}, data::scannable::{Scannable, WithEmbeddingsScannable}, database::{CreateTableMode, CreateTableRequest, Database}, embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry}, table::WriteOptions, - Error, Result, Table, }; pub struct CreateTableBuilder { @@ -167,7 +167,7 @@ impl CreateTableBuilder { #[cfg(test)] mod tests { use arrow_array::{ - record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, + Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, record_batch, }; use arrow_schema::{ArrowError, DataType, Field, Schema}; use futures::TryStreamExt; @@ -380,11 +380,12 @@ mod tests { .await .unwrap(); let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)])); - assert!(db - .create_empty_table("test", other_schema.clone()) - .execute() - .await - .is_err()); // TODO: assert what this error is + assert!( + db.create_empty_table("test", other_schema.clone()) + .execute() + .await + .is_err() + ); // TODO: assert what this error is let overwritten = db .create_empty_table("test", other_schema.clone()) .mode(CreateTableMode::Overwrite) diff --git a/rust/lancedb/src/data/inspect.rs b/rust/lancedb/src/data/inspect.rs index 2b78882ef..3914caa74 100644 --- a/rust/lancedb/src/data/inspect.rs +++ b/rust/lancedb/src/data/inspect.rs @@ -5,9 +5,9 @@ use std::collections::HashMap; use arrow::compute::kernels::{aggregate::bool_and, length::length}; use arrow_array::{ + Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader, cast::AsArray, types::{ArrowPrimitiveType, Int32Type, Int64Type}, - Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader, }; use arrow_ord::cmp::eq; use arrow_schema::DataType; @@ -78,7 +78,7 @@ pub fn infer_vector_columns( _ => { return Err(Error::Schema { message: format!("Column {} is not a list", col_name), - }) + }); } } { if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) { @@ -102,8 +102,8 @@ mod tests { use super::*; use arrow_array::{ - types::{Float32Type, Float64Type}, FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray, + types::{Float32Type, Float64Type}, }; use arrow_schema::{DataType, Field, Schema}; use std::{sync::Arc, vec}; diff --git a/rust/lancedb/src/data/sanitize.rs b/rust/lancedb/src/data/sanitize.rs index 91b9aea95..bcb58261f 100644 --- a/rust/lancedb/src/data/sanitize.rs +++ b/rust/lancedb/src/data/sanitize.rs @@ -4,10 +4,10 @@ use std::{iter::repeat_with, sync::Arc}; use arrow_array::{ - cast::AsArray, - types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type}, Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator, RecordBatchReader, + cast::AsArray, + types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type}, }; use arrow_cast::{can_cast_types, cast}; use arrow_schema::{ArrowError, DataType, Field, Schema}; @@ -184,7 +184,7 @@ mod tests { use std::sync::Arc; use arrow_array::{ - FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int32Array, Int8Array, + FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray, }; use arrow_schema::Field; diff --git a/rust/lancedb/src/data/scannable.rs b/rust/lancedb/src/data/scannable.rs index 67afecb2e..35c10be59 100644 --- a/rust/lancedb/src/data/scannable.rs +++ b/rust/lancedb/src/data/scannable.rs @@ -13,16 +13,16 @@ use crate::arrow::{ SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream, }; use crate::embeddings::{ - compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction, - EmbeddingRegistry, + EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, compute_embeddings_for_batch, + compute_output_schema, }; use crate::table::{ColumnDefinition, ColumnKind, TableDefinition}; use crate::{Error, Result}; use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader}; use arrow_schema::{ArrowError, SchemaRef}; use async_trait::async_trait; -use futures::stream::once; use futures::StreamExt; +use futures::stream::once; use lance_datafusion::utils::StreamingWriteSource; pub trait Scannable: Send { diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index df027b613..95f477dc1 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -19,12 +19,12 @@ use std::sync::Arc; use std::time::Duration; use lance::dataset::ReadParams; +use lance_namespace::LanceNamespace; use lance_namespace::models::{ CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }; -use lance_namespace::LanceNamespace; use crate::data::scannable::Scannable; use crate::error::Result; diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index aea5ce24c..3f6a3de55 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -8,7 +8,7 @@ use std::path::Path; use std::{collections::HashMap, sync::Arc}; use lance::dataset::refs::Ref; -use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode}; +use lance::dataset::{ReadParams, WriteMode, builder::DatasetBuilder}; use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore}; use lance_datafusion::utils::StreamingWriteSource; use lance_encoding::version::LanceFileVersion; @@ -1097,11 +1097,11 @@ impl Database for ListingDatabase { #[cfg(test)] mod tests { use super::*; + use crate::Table; use crate::connection::ConnectRequest; use crate::data::scannable::Scannable; use crate::database::{CreateTableMode, CreateTableRequest}; use crate::table::WriteOptions; - use crate::Table; use arrow_array::{Int32Array, RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use std::path::PathBuf; diff --git a/rust/lancedb/src/database/namespace.rs b/rust/lancedb/src/database/namespace.rs index 66771c135..90b5c19bd 100644 --- a/rust/lancedb/src/database/namespace.rs +++ b/rust/lancedb/src/database/namespace.rs @@ -9,13 +9,13 @@ use std::sync::Arc; use async_trait::async_trait; use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor}; use lance_namespace::{ + LanceNamespace, models::{ CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }, - LanceNamespace, }; use lance_namespace_impls::ConnectBuilder; use log::warn; diff --git a/rust/lancedb/src/dataloader/permutation/builder.rs b/rust/lancedb/src/dataloader/permutation/builder.rs index 94474a33b..377712e98 100644 --- a/rust/lancedb/src/dataloader/permutation/builder.rs +++ b/rust/lancedb/src/dataloader/permutation/builder.rs @@ -11,16 +11,16 @@ use lance_core::ROW_ID; use lance_datafusion::exec::SessionContextExt; use crate::{ + Error, Result, Table, arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream}, connect, database::{CreateTableRequest, Database}, dataloader::permutation::{ shuffle::{Shuffler, ShufflerConfig}, - split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN}, - util::{rename_column, TemporaryDirectory}, + split::{SPLIT_ID_COLUMN, SplitStrategy, Splitter}, + util::{TemporaryDirectory, rename_column}, }, query::{ExecutableQuery, QueryBase, Select}, - Error, Result, Table, }; pub const SRC_ROW_ID_COL: &str = "row_id"; diff --git a/rust/lancedb/src/dataloader/permutation/reader.rs b/rust/lancedb/src/dataloader/permutation/reader.rs index e8d81f4b0..72f146995 100644 --- a/rust/lancedb/src/dataloader/permutation/reader.rs +++ b/rust/lancedb/src/dataloader/permutation/reader.rs @@ -25,8 +25,8 @@ use futures::{StreamExt, TryStreamExt}; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::io::RecordBatchStream; use lance_arrow::RecordBatchExt; -use lance_core::error::LanceOptionExt; use lance_core::ROW_ID; +use lance_core::error::LanceOptionExt; use std::collections::HashMap; use std::sync::Arc; @@ -500,10 +500,10 @@ mod tests { use rand::seq::SliceRandom; use crate::{ + Table, arrow::SendableRecordBatchStream, query::{ExecutableQuery, QueryBase}, - test_utils::datagen::{virtual_table, LanceDbDatagenExt}, - Table, + test_utils::datagen::{LanceDbDatagenExt, virtual_table}, }; use super::*; diff --git a/rust/lancedb/src/dataloader/permutation/shuffle.rs b/rust/lancedb/src/dataloader/permutation/shuffle.rs index 53f7e1af5..05f98eb5a 100644 --- a/rust/lancedb/src/dataloader/permutation/shuffle.rs +++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs @@ -18,12 +18,12 @@ use lance_io::{ scheduler::{ScanScheduler, SchedulerConfig}, utils::CachedFileSize, }; -use rand::{seq::SliceRandom, Rng, RngCore}; +use rand::{Rng, RngCore, seq::SliceRandom}; use crate::{ - arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, - dataloader::permutation::util::{non_crypto_rng, TemporaryDirectory}, Error, Result, + arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, + dataloader::permutation::util::{TemporaryDirectory, non_crypto_rng}, }; #[derive(Debug, Clone)] @@ -281,7 +281,7 @@ mod tests { use datafusion_expr::col; use futures::TryStreamExt; use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed}; - use rand::{rngs::SmallRng, SeedableRng}; + use rand::{SeedableRng, rngs::SmallRng}; fn test_gen() -> BatchGeneratorBuilder { lance_datagen::gen_batch() diff --git a/rust/lancedb/src/dataloader/permutation/split.rs b/rust/lancedb/src/dataloader/permutation/split.rs index 8a0d11b1e..bb6f59d34 100644 --- a/rust/lancedb/src/dataloader/permutation/split.rs +++ b/rust/lancedb/src/dataloader/permutation/split.rs @@ -2,8 +2,8 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use std::sync::{ - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, }; use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array}; @@ -15,13 +15,13 @@ use lance_arrow::SchemaExt; use lance_core::ROW_ID; use crate::{ + Error, Result, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, dataloader::{ permutation::shuffle::{Shuffler, ShufflerConfig}, permutation::util::TemporaryDirectory, }, query::{Query, QueryBase, Select}, - Error, Result, }; pub const SPLIT_ID_COLUMN: &str = "split_id"; diff --git a/rust/lancedb/src/dataloader/permutation/util.rs b/rust/lancedb/src/dataloader/permutation/util.rs index 9e4571125..d793b9704 100644 --- a/rust/lancedb/src/dataloader/permutation/util.rs +++ b/rust/lancedb/src/dataloader/permutation/util.rs @@ -7,12 +7,12 @@ use arrow_array::RecordBatch; use arrow_schema::{Fields, Schema}; use datafusion_execution::disk_manager::DiskManagerMode; use futures::TryStreamExt; -use rand::{rngs::SmallRng, RngCore, SeedableRng}; +use rand::{RngCore, SeedableRng, rngs::SmallRng}; use tempfile::TempDir; use crate::{ - arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, Error, Result, + arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, }; /// Directory to use for temporary files diff --git a/rust/lancedb/src/embeddings.rs b/rust/lancedb/src/embeddings.rs index b6272a123..ac665d76b 100644 --- a/rust/lancedb/src/embeddings.rs +++ b/rust/lancedb/src/embeddings.rs @@ -23,9 +23,9 @@ use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef}; use serde::{Deserialize, Serialize}; use crate::{ + Error, error::Result, table::{ColumnDefinition, ColumnKind, TableDefinition}, - Error, }; /// Trait for embedding functions diff --git a/rust/lancedb/src/embeddings/bedrock.rs b/rust/lancedb/src/embeddings/bedrock.rs index 33b045586..b35ce8611 100644 --- a/rust/lancedb/src/embeddings/bedrock.rs +++ b/rust/lancedb/src/embeddings/bedrock.rs @@ -8,7 +8,7 @@ use arrow::array::{AsArray, Float32Builder}; use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array}; use arrow_data::ArrayData; use arrow_schema::DataType; -use serde_json::{json, Value}; +use serde_json::{Value, json}; use super::EmbeddingFunction; use crate::{Error, Result}; diff --git a/rust/lancedb/src/embeddings/openai.rs b/rust/lancedb/src/embeddings/openai.rs index 7fadad5c4..ea900b43b 100644 --- a/rust/lancedb/src/embeddings/openai.rs +++ b/rust/lancedb/src/embeddings/openai.rs @@ -8,9 +8,9 @@ use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array}; use arrow_data::ArrayData; use arrow_schema::DataType; use async_openai::{ + Client, config::OpenAIConfig, types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat}, - Client, }; use tokio::{runtime::Handle, task}; diff --git a/rust/lancedb/src/embeddings/sentence_transformers.rs b/rust/lancedb/src/embeddings/sentence_transformers.rs index 0bc2f6042..e045e3211 100644 --- a/rust/lancedb/src/embeddings/sentence_transformers.rs +++ b/rust/lancedb/src/embeddings/sentence_transformers.rs @@ -7,7 +7,7 @@ use super::EmbeddingFunction; use arrow::{ array::{AsArray, PrimitiveBuilder}, datatypes::{ - ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt32Type, UInt8Type, + ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt8Type, UInt32Type, }, }; use arrow_array::{Array, FixedSizeListArray, PrimitiveArray}; @@ -16,8 +16,8 @@ use arrow_schema::DataType; use candle_core::{CpuStorage, Device, Layout, Storage, Tensor}; use candle_nn::VarBuilder; use candle_transformers::models::bert::{BertModel, DTYPE}; -use hf_hub::{api::sync::Api, Repo, RepoType}; -use tokenizers::{tokenizer::Tokenizer, PaddingParams}; +use hf_hub::{Repo, RepoType, api::sync::Api}; +use tokenizers::{PaddingParams, tokenizer::Tokenizer}; /// Compute embeddings using huggingface sentence-transformers. pub struct SentenceTransformersEmbeddingsBuilder { @@ -230,7 +230,7 @@ impl SentenceTransformersEmbeddings { Storage::Cpu(CpuStorage::BF16(_)) => { return Err(crate::Error::Runtime { message: "unsupported data type".to_string(), - }) + }); } _ => unreachable!("we already moved the tensor to the CPU device"), }; @@ -298,12 +298,12 @@ impl SentenceTransformersEmbeddings { DataType::Utf8View => { return Err(crate::Error::Runtime { message: "Utf8View not yet implemented".to_string(), - }) + }); } _ => { return Err(crate::Error::Runtime { message: "invalid type".to_string(), - }) + }); } }; diff --git a/rust/lancedb/src/expr.rs b/rust/lancedb/src/expr.rs index eafcdd14f..e256724a0 100644 --- a/rust/lancedb/src/expr.rs +++ b/rust/lancedb/src/expr.rs @@ -24,7 +24,7 @@ pub use sql::expr_to_sql_string; use std::sync::Arc; use arrow_schema::DataType; -use datafusion_expr::{expr_fn::cast, Expr, ScalarUDF}; +use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast}; use datafusion_functions::string::expr_fn as string_expr_fn; pub use datafusion_expr::{col, lit}; diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index 2269ad858..a012fd4cd 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -9,7 +9,7 @@ use std::time::Duration; use vector::IvfFlatIndexBuilder; use crate::index::vector::IvfRqIndexBuilder; -use crate::{table::BaseTable, DistanceType, Error, Result}; +use crate::{DistanceType, Error, Result, table::BaseTable}; use self::{ scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder}, diff --git a/rust/lancedb/src/index/scalar.rs b/rust/lancedb/src/index/scalar.rs index 370dd5abb..b9292105a 100644 --- a/rust/lancedb/src/index/scalar.rs +++ b/rust/lancedb/src/index/scalar.rs @@ -51,7 +51,7 @@ pub struct BitmapIndexBuilder {} #[derive(Debug, Clone, Default, serde::Serialize)] pub struct LabelListIndexBuilder {} -pub use lance_index::scalar::inverted::query::*; pub use lance_index::scalar::FullTextSearchQuery; pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder; pub use lance_index::scalar::InvertedIndexParams; +pub use lance_index::scalar::inverted::query::*; diff --git a/rust/lancedb/src/index/waiter.rs b/rust/lancedb/src/index/waiter.rs index a20bfa2b1..f201ed6c3 100644 --- a/rust/lancedb/src/index/waiter.rs +++ b/rust/lancedb/src/index/waiter.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors +use crate::Error; use crate::error::Result; use crate::table::BaseTable; -use crate::Error; use log::debug; use std::time::{Duration, Instant}; use tokio::time::sleep; diff --git a/rust/lancedb/src/io/object_store.rs b/rust/lancedb/src/io/object_store.rs index 508c9849d..d4739291a 100644 --- a/rust/lancedb/src/io/object_store.rs +++ b/rust/lancedb/src/io/object_store.rs @@ -5,11 +5,11 @@ use std::{fmt::Formatter, sync::Arc}; -use futures::{stream::BoxStream, TryFutureExt}; +use futures::{TryFutureExt, stream::BoxStream}; use lance::io::WrappingObjectStore; use object_store::{ - path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, + Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path, }; use async_trait::async_trait; diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 7c8ae4e92..882ef51fa 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -10,8 +10,9 @@ use bytes::Bytes; use futures::stream::BoxStream; use lance::io::WrappingObjectStore; use object_store::{ - path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart, + path::Path, }; #[derive(Debug, Default)] diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index 301476935..aa77dcc39 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -5,26 +5,26 @@ use std::sync::Arc; use std::{future::Future, time::Duration}; use arrow::compute::concat_batches; -use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array}; +use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array}; use arrow_schema::{DataType, SchemaRef}; use datafusion_expr::Expr; use datafusion_physical_plan::ExecutionPlan; -use futures::{stream, try_join, FutureExt, TryFutureExt, TryStreamExt}; +use futures::{FutureExt, TryFutureExt, TryStreamExt, stream, try_join}; use half::f16; -use lance::dataset::{scanner::DatasetRecordBatchStream, ROW_ID}; +use lance::dataset::{ROW_ID, scanner::DatasetRecordBatchStream}; use lance_arrow::RecordBatchExt; use lance_datafusion::exec::execute_plan; -use lance_index::scalar::inverted::SCORE_COL; use lance_index::scalar::FullTextSearchQuery; +use lance_index::scalar::inverted::SCORE_COL; use lance_index::vector::DIST_COL; use lance_io::stream::RecordBatchStreamAdapter; +use crate::DistanceType; use crate::error::{Error, Result}; use crate::rerankers::rrf::RRFReranker; -use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker}; +use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result}; use crate::table::BaseTable; use crate::utils::TimeoutStream; -use crate::DistanceType; use crate::{arrow::SendableRecordBatchStream, table::AnyQuery}; mod hybrid; @@ -161,10 +161,11 @@ impl IntoQueryVector for &dyn Array { if data_type != self.data_type() { Err(Error::InvalidInput { message: format!( - "failed to create query vector, the input data type was {:?} but the expected data type was {:?}", - self.data_type(), - data_type - )}) + "failed to create query vector, the input data type was {:?} but the expected data type was {:?}", + self.data_type(), + data_type + ), + }) } else { let data = self.to_data(); Ok(make_array(data)) @@ -186,7 +187,7 @@ impl IntoQueryVector for &[f16] { DataType::Float32 => { let arr: Vec = self.iter().map(|x| f32::from(*x)).collect(); Ok(Arc::new(Float32Array::from(arr))) - }, + } DataType::Float64 => { let arr: Vec = self.iter().map(|x| f64::from(*x)).collect(); Ok(Arc::new(Float64Array::from(arr))) @@ -194,8 +195,7 @@ impl IntoQueryVector for &[f16] { _ => Err(Error::InvalidInput { message: format!( "failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type + embedding_model_label, data_type ), }), } @@ -216,7 +216,7 @@ impl IntoQueryVector for &[f32] { DataType::Float32 => { let arr: Vec = self.to_vec(); Ok(Arc::new(Float32Array::from(arr))) - }, + } DataType::Float64 => { let arr: Vec = self.iter().map(|x| *x as f64).collect(); Ok(Arc::new(Float64Array::from(arr))) @@ -224,8 +224,7 @@ impl IntoQueryVector for &[f32] { _ => Err(Error::InvalidInput { message: format!( "failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type + embedding_model_label, data_type ), }), } @@ -239,26 +238,25 @@ impl IntoQueryVector for &[f64] { embedding_model_label: &str, ) -> Result> { match data_type { - DataType::Float16 => { - let arr: Vec = self.iter().map(|x| f16::from_f64(*x)).collect(); - Ok(Arc::new(Float16Array::from(arr))) - } - DataType::Float32 => { - let arr: Vec = self.iter().map(|x| *x as f32).collect(); - Ok(Arc::new(Float32Array::from(arr))) - }, - DataType::Float64 => { - let arr: Vec = self.to_vec(); - Ok(Arc::new(Float64Array::from(arr))) - } - _ => Err(Error::InvalidInput { - message: format!( - "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type - ), - }), + DataType::Float16 => { + let arr: Vec = self.iter().map(|x| f16::from_f64(*x)).collect(); + Ok(Arc::new(Float16Array::from(arr))) } + DataType::Float32 => { + let arr: Vec = self.iter().map(|x| *x as f32).collect(); + Ok(Arc::new(Float32Array::from(arr))) + } + DataType::Float64 => { + let arr: Vec = self.to_vec(); + Ok(Arc::new(Float64Array::from(arr))) + } + _ => Err(Error::InvalidInput { + message: format!( + "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}", + embedding_model_label, data_type + ), + }), + } } } @@ -1011,13 +1009,13 @@ impl VectorQuery { message: "minimum_nprobes must be greater than 0".to_string(), }); } - if let Some(maximum_nprobes) = self.request.maximum_nprobes { - if minimum_nprobes > maximum_nprobes { - return Err(Error::InvalidInput { - message: "minimum_nprobes must be less than or equal to maximum_nprobes" - .to_string(), - }); - } + if let Some(maximum_nprobes) = self.request.maximum_nprobes + && minimum_nprobes > maximum_nprobes + { + return Err(Error::InvalidInput { + message: "minimum_nprobes must be less than or equal to maximum_nprobes" + .to_string(), + }); } self.request.minimum_nprobes = minimum_nprobes; Ok(self) @@ -1407,8 +1405,8 @@ mod tests { use super::*; use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type}; use arrow_array::{ - cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array, - RecordBatch, StringArray, + FixedSizeListArray, Float32Array, Int32Array, RecordBatch, StringArray, cast::AsArray, + types::Float32Type, }; use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema}; use futures::{StreamExt, TryStreamExt}; @@ -1416,7 +1414,7 @@ mod tests { use rand::seq::IndexedRandom; use tempfile::tempdir; - use crate::{connect, database::CreateTableMode, index::Index, Table}; + use crate::{Table, connect, database::CreateTableMode, index::Index}; #[tokio::test] async fn test_setters_getters() { @@ -1754,11 +1752,13 @@ mod tests { .limit(1) .execute() .await; - assert!(error_result - .err() - .unwrap() - .to_string() - .contains("No vector column found to match with the query vector dimension: 3")); + assert!( + error_result + .err() + .unwrap() + .to_string() + .contains("No vector column found to match with the query vector dimension: 3") + ); } #[tokio::test] @@ -2010,7 +2010,7 @@ mod tests { // Sample 1 - 3 tokens for each string value let tokens = ["a", "b", "c", "d", "e"]; - use rand::{rng, Rng}; + use rand::{Rng, rng}; let mut rng = rng(); let text: StringArray = (0..nrows) diff --git a/rust/lancedb/src/query/hybrid.rs b/rust/lancedb/src/query/hybrid.rs index 9e85e870b..35c64d4a9 100644 --- a/rust/lancedb/src/query/hybrid.rs +++ b/rust/lancedb/src/query/hybrid.rs @@ -5,7 +5,7 @@ use arrow::compute::{ kernels::numeric::{div, sub}, max, min, }; -use arrow_array::{cast::downcast_array, Float32Array, RecordBatch}; +use arrow_array::{Float32Array, RecordBatch, cast::downcast_array}; use arrow_schema::{DataType, Field, Schema, SortOptions}; use lance::dataset::ROW_ID; use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL}; @@ -253,7 +253,10 @@ mod test { let result = rank(batch.clone(), "bad_col", None); match result { Err(Error::InvalidInput { message }) => { - assert_eq!("expected column bad_col not found in rank. found columns [\"name\", \"score\"]", message); + assert_eq!( + "expected column bad_col not found in rank. found columns [\"name\", \"score\"]", + message + ); } _ => { panic!("expected invalid input error, received {:?}", result) diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index de71727d9..7304c18c2 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -4,8 +4,8 @@ use http::HeaderName; use log::debug; use reqwest::{ - header::{HeaderMap, HeaderValue}, Body, Request, RequestBuilder, Response, + header::{HeaderMap, HeaderValue}, }; use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration}; @@ -650,14 +650,13 @@ impl RestfulLanceDbClient { pub fn extract_request_id(&self, request: &mut Request) -> String { // Set a request id. // TODO: allow the user to supply this, through middleware? - let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) { + if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) { request_id.to_str().unwrap().to_string() } else { let request_id = uuid::Uuid::new_v4().to_string(); self.set_request_id(request, &request_id); request_id - }; - request_id + } } /// Set the request ID header diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 38541f079..b615fd3bc 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -16,6 +16,7 @@ use lance_namespace::models::{ ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }; +use crate::Error; use crate::database::{ CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest, @@ -23,12 +24,11 @@ use crate::database::{ use crate::error::Result; use crate::remote::util::stream_as_body; use crate::table::BaseTable; -use crate::Error; +use super::ARROW_STREAM_CONTENT_TYPE; use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender}; use super::table::RemoteTable; use super::util::parse_server_version; -use super::ARROW_STREAM_CONTENT_TYPE; // Request structure for the remote clone table API #[derive(serde::Serialize)] @@ -249,9 +249,9 @@ impl RemoteDatabase { #[cfg(all(test, feature = "remote"))] mod test_utils { use super::*; + use crate::remote::ClientConfig; use crate::remote::client::test_utils::MockSender; use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config}; - use crate::remote::ClientConfig; impl RemoteDatabase { pub fn new_mock(handler: F) -> Self @@ -799,9 +799,9 @@ mod tests { use crate::connection::ConnectBuilder; use crate::{ - database::CreateTableMode, - remote::{ClientConfig, HeaderProvider, ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE}, Connection, Error, + database::CreateTableMode, + remote::{ARROW_STREAM_CONTENT_TYPE, ClientConfig, HeaderProvider, JSON_CONTENT_TYPE}, }; #[test] diff --git a/rust/lancedb/src/remote/retry.rs b/rust/lancedb/src/remote/retry.rs index 8f34195f6..1a354eebb 100644 --- a/rust/lancedb/src/remote/retry.rs +++ b/rust/lancedb/src/remote/retry.rs @@ -1,8 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -use crate::remote::RetryConfig; use crate::Error; +use crate::remote::RetryConfig; use log::debug; use std::time::Duration; diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 6e290c4df..a633e00ab 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -6,15 +6,14 @@ pub mod insert; use self::insert::RemoteInsertExec; use crate::expr::expr_to_sql_string; +use super::ARROW_STREAM_CONTENT_TYPE; use super::client::RequestResultExt; use super::client::{HttpSend, RestfulLanceDbClient, Sender}; use super::db::ServerVersion; -use super::ARROW_STREAM_CONTENT_TYPE; -use crate::index::waiter::wait_for_index; use crate::index::Index; use crate::index::IndexStatistics; +use crate::index::waiter::wait_for_index; use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest}; -use crate::table::query::create_multi_vector_plan; use crate::table::AddColumnsResult; use crate::table::AddResult; use crate::table::AlterColumnsResult; @@ -23,19 +22,20 @@ use crate::table::DropColumnsResult; use crate::table::MergeResult; use crate::table::Tags; use crate::table::UpdateResult; +use crate::table::query::create_multi_vector_plan; use crate::table::{AnyQuery, Filter, TableStatistics}; use crate::utils::background_cache::BackgroundCache; use crate::utils::{supported_btree_data_type, supported_vector_data_type}; +use crate::{DistanceType, Error}; use crate::{ error::Result, index::{IndexBuilder, IndexConfig}, query::QueryExecutionOptions, table::{ - merge::MergeInsertBuilder, AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, - TableDefinition, UpdateBuilder, + AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, TableDefinition, UpdateBuilder, + merge::MergeInsertBuilder, }, }; -use crate::{DistanceType, Error}; use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader}; use arrow_ipc::reader::FileReader; use arrow_schema::{DataType, SchemaRef}; @@ -50,7 +50,7 @@ use lance::arrow::json::{JsonDataType, JsonSchema}; use lance::dataset::refs::TagContents; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::{ColumnAlteration, NewColumnTransform, Version}; -use lance_datafusion::exec::{execute_plan, OneShotExec}; +use lance_datafusion::exec::{OneShotExec, execute_plan}; use reqwest::{RequestBuilder, Response}; use serde::{Deserialize, Serialize}; use serde_json::Number; @@ -612,8 +612,8 @@ impl RemoteTable { message: format!( "Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.", version - ) - }) + ), + }), } } @@ -697,10 +697,10 @@ impl RemoteTable { Error::Retry { status_code, .. } => *status_code, _ => None, }; - if let Some(status_code) = status_code { - if Self::should_invalidate_cache_for_status(status_code) { - self.invalidate_schema_cache(); - } + if let Some(status_code) = status_code + && Self::should_invalidate_cache_for_status(status_code) + { + self.invalidate_schema_cache(); } } } @@ -783,9 +783,9 @@ impl std::fmt::Display for RemoteTable { #[cfg(all(test, feature = "remote"))] mod test_utils { use super::*; - use crate::remote::client::test_utils::client_with_handler; - use crate::remote::client::test_utils::{client_with_handler_and_config, MockSender}; use crate::remote::ClientConfig; + use crate::remote::client::test_utils::client_with_handler; + use crate::remote::client::test_utils::{MockSender, client_with_handler_and_config}; impl RemoteTable { pub fn new_mock(name: String, handler: F, version: Option) -> Self @@ -1251,13 +1251,13 @@ impl BaseTable for RemoteTable { 0 => { return Err(Error::InvalidInput { message: "No columns specified".into(), - }) + }); } 1 => index.columns.pop().unwrap(), _ => { return Err(Error::NotSupported { message: "Indices over multiple columns not yet supported".into(), - }) + }); } }; let mut body = serde_json::json!({ @@ -1320,7 +1320,7 @@ impl BaseTable for RemoteTable { _ => { return Err(Error::NotSupported { message: "Index type not supported".into(), - }) + }); } }; @@ -1771,8 +1771,8 @@ impl TryFrom for MergeInsertRequest { #[cfg(test)] mod tests { - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; use std::{collections::HashMap, pin::Pin}; @@ -1781,10 +1781,10 @@ mod tests { use crate::table::AddDataMode; use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type}; - use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator}; + use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch}; use arrow_schema::{DataType, Field, Schema}; use chrono::{DateTime, Utc}; - use futures::{future::BoxFuture, StreamExt, TryFutureExt}; + use futures::{StreamExt, TryFutureExt, future::BoxFuture}; use lance_index::scalar::inverted::query::MatchQuery; use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams}; use reqwest::Body; @@ -1794,14 +1794,14 @@ mod tests { use crate::index::vector::{ IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder, }; - use crate::remote::db::DEFAULT_SERVER_VERSION; use crate::remote::JSON_CONTENT_TYPE; + use crate::remote::db::DEFAULT_SERVER_VERSION; use crate::utils::background_cache::clock; use crate::{ - index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType}, + DistanceType, Error, Table, + index::{Index, IndexStatistics, IndexType, vector::IvfPqIndexBuilder}, query::{ExecutableQuery, QueryBase}, remote::ARROW_FILE_CONTENT_TYPE, - DistanceType, Error, Table, }; #[tokio::test] @@ -2030,11 +2030,13 @@ mod tests { .unwrap(), "/v1/table/my_table/insert/" => { assert_eq!(request.method(), "POST"); - assert!(request - .url() - .query_pairs() - .filter(|(k, _)| k == "mode") - .all(|(_, v)| v == "append")); + assert!( + request + .url() + .query_pairs() + .filter(|(k, _)| k == "mode") + .all(|(_, v)| v == "append") + ); assert_eq!( request.headers().get("Content-Type").unwrap(), ARROW_STREAM_CONTENT_TYPE @@ -3592,7 +3594,7 @@ mod tests { } fn _make_table_with_indices(unindexed_rows: usize) -> Table { - let table = Table::new_with_handler("my_table", move |request| { + Table::new_with_handler("my_table", move |request| { assert_eq!(request.method(), "POST"); let response_body = match request.url().path() { @@ -3636,8 +3638,7 @@ mod tests { let body = serde_json::to_string(&response_body).unwrap(); let status = if body == "null" { 404 } else { 200 }; http::Response::builder().status(status).body(body).unwrap() - }); - table + }) } #[tokio::test] @@ -3848,8 +3849,8 @@ mod tests { #[tokio::test] async fn test_uri_caching() { - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; let call_count = Arc::new(AtomicUsize::new(0)); let call_count_clone = call_count.clone(); diff --git a/rust/lancedb/src/remote/table/insert.rs b/rust/lancedb/src/remote/table/insert.rs index da8cb9af0..c8637281e 100644 --- a/rust/lancedb/src/remote/table/insert.rs +++ b/rust/lancedb/src/remote/table/insert.rs @@ -16,12 +16,12 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Plan use futures::StreamExt; use http::header::CONTENT_TYPE; +use crate::Error; +use crate::remote::ARROW_STREAM_CONTENT_TYPE; use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender}; use crate::remote::table::RemoteTable; -use crate::remote::ARROW_STREAM_CONTENT_TYPE; -use crate::table::datafusion::insert::COUNT_SCHEMA; use crate::table::AddResult; -use crate::Error; +use crate::table::datafusion::insert::COUNT_SCHEMA; /// ExecutionPlan for inserting data into a remote LanceDB table. /// @@ -309,12 +309,12 @@ mod tests { use arrow_schema::{DataType, Field, Schema as ArrowSchema}; use datafusion::prelude::SessionContext; use datafusion_catalog::MemTable; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + use crate::Table; use crate::remote::ARROW_STREAM_CONTENT_TYPE; use crate::table::datafusion::BaseTableAdapter; - use crate::Table; fn schema_json() -> &'static str { r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"# diff --git a/rust/lancedb/src/remote/util.rs b/rust/lancedb/src/remote/util.rs index 51ddb97de..1c368e2f5 100644 --- a/rust/lancedb/src/remote/util.rs +++ b/rust/lancedb/src/remote/util.rs @@ -5,7 +5,7 @@ use arrow_ipc::CompressionType; use futures::{Stream, StreamExt}; use reqwest::Response; -use crate::{arrow::SendableRecordBatchStream, Result}; +use crate::{Result, arrow::SendableRecordBatchStream}; use super::db::ServerVersion; diff --git a/rust/lancedb/src/rerankers/rrf.rs b/rust/lancedb/src/rerankers/rrf.rs index 7b5d83614..4da1927be 100644 --- a/rust/lancedb/src/rerankers/rrf.rs +++ b/rust/lancedb/src/rerankers/rrf.rs @@ -14,7 +14,7 @@ use async_trait::async_trait; use lance::dataset::ROW_ID; use crate::error::{Error, Result}; -use crate::rerankers::{Reranker, RELEVANCE_SCORE}; +use crate::rerankers::{RELEVANCE_SCORE, Reranker}; /// Reranks the results using Reciprocal Rank Fusion(RRF) algorithm based /// on the scores of vector and FTS search. diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index e92a817b6..611ec1f26 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -8,29 +8,29 @@ use arrow_schema::{DataType, Field, Schema, SchemaRef}; use async_trait::async_trait; use datafusion_execution::TaskContext; use datafusion_expr::Expr; -use datafusion_physical_plan::display::DisplayableExecutionPlan; use datafusion_physical_plan::ExecutionPlan; -use futures::stream::FuturesUnordered; +use datafusion_physical_plan::display::DisplayableExecutionPlan; use futures::StreamExt; -use lance::dataset::builder::DatasetBuilder; +use futures::stream::FuturesUnordered; pub use lance::dataset::ColumnAlteration; pub use lance::dataset::NewColumnTransform; pub use lance::dataset::ReadParams; pub use lance::dataset::Version; use lance::dataset::WriteMode; +use lance::dataset::builder::DatasetBuilder; use lance::dataset::{InsertBuilder, WriteParams}; -use lance::index::vector::utils::infer_vector_dim; use lance::index::vector::VectorIndexParams; +use lance::index::vector::utils::infer_vector_dim; use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lance_datafusion::utils::StreamingWriteSource; +use lance_index::DatasetIndexExt; +use lance_index::IndexType; use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams}; use lance_index::vector::bq::RQBuildParams; use lance_index::vector::hnsw::builder::HnswBuildParams; use lance_index::vector::ivf::IvfBuildParams; use lance_index::vector::pq::PQBuildParams; use lance_index::vector::sq::builder::SQBuildParams; -use lance_index::DatasetIndexExt; -use lance_index::IndexType; use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor}; pub use query::AnyQuery; @@ -43,19 +43,19 @@ use std::format; use std::path::Path; use std::sync::Arc; -use crate::data::scannable::{estimate_write_partitions, PeekedScannable, Scannable}; +use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions}; use crate::database::Database; use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MemoryRegistry}; use crate::error::{Error, Result}; -use crate::index::vector::VectorIndex; use crate::index::IndexStatistics; -use crate::index::{vector::suggested_num_sub_vectors, Index, IndexBuilder}; +use crate::index::vector::VectorIndex; +use crate::index::{Index, IndexBuilder, vector::suggested_num_sub_vectors}; use crate::index::{IndexConfig, IndexStatisticsImpl}; use crate::query::{IntoQueryVector, Query, QueryExecutionOptions, TakeQuery, VectorQuery}; use crate::table::datafusion::insert::InsertExec; use crate::utils::{ - supported_bitmap_data_type, supported_btree_data_type, supported_fts_data_type, - supported_label_list_data_type, supported_vector_data_type, PatchReadParam, PatchWriteParam, + PatchReadParam, PatchWriteParam, supported_bitmap_data_type, supported_btree_data_type, + supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type, }; use self::dataset::DatasetConsistencyWrapper; @@ -2555,22 +2555,21 @@ pub struct FragmentSummaryStats { #[cfg(test)] #[allow(deprecated)] mod tests { - use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use arrow_array::{ - builder::{ListBuilder, StringBuilder}, Array, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray, RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray, + builder::{ListBuilder, StringBuilder}, }; use arrow_array::{BinaryArray, LargeBinaryArray}; use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; - use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lance::Dataset; - use rand::Rng; + use lance::io::{ObjectStoreParams, WrappingObjectStore}; use tempfile::tempdir; use super::*; @@ -2777,9 +2776,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -2884,8 +2882,8 @@ mod tests { .await .unwrap(); - use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq; use lance::index::DatasetIndexInternalExt; + use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq; use lance_index::metrics::NoOpMetricsCollector; use lance_index::vector::VectorIndex as LanceVectorIndex; @@ -2933,9 +2931,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -2993,9 +2990,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -3256,16 +3252,20 @@ mod tests { .unwrap(); // Can not create btree or bitmap index on list column - assert!(table - .create_index(&["tags"], Index::BTree(Default::default())) - .execute() - .await - .is_err()); - assert!(table - .create_index(&["tags"], Index::Bitmap(Default::default())) - .execute() - .await - .is_err()); + assert!( + table + .create_index(&["tags"], Index::BTree(Default::default())) + .execute() + .await + .is_err() + ); + assert!( + table + .create_index(&["tags"], Index::Bitmap(Default::default())) + .execute() + .await + .is_err() + ); // Create bitmap index on the "category" column table diff --git a/rust/lancedb/src/table/add_data.rs b/rust/lancedb/src/table/add_data.rs index 5c83a8154..5921c54ea 100644 --- a/rust/lancedb/src/table/add_data.rs +++ b/rust/lancedb/src/table/add_data.rs @@ -7,8 +7,8 @@ use arrow_schema::{DataType, Fields, Schema}; use lance::dataset::WriteMode; use serde::{Deserialize, Serialize}; -use crate::data::scannable::scannable_with_embeddings; use crate::data::scannable::Scannable; +use crate::data::scannable::scannable_with_embeddings; use crate::embeddings::EmbeddingRegistry; use crate::table::datafusion::cast::cast_to_table_schema; use crate::table::datafusion::reject_nan::reject_nan_vectors; @@ -204,13 +204,14 @@ mod tests { use arrow::datatypes::Float64Type; use arrow_array::{ - record_batch, FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, - RecordBatch, RecordBatchIterator, + FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, RecordBatch, + RecordBatchIterator, record_batch, }; use arrow_schema::{ArrowError, DataType, Field, Schema}; use futures::TryStreamExt; use lance::dataset::{WriteMode, WriteParams}; + use crate::Error; use crate::arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}; use crate::connect; use crate::data::scannable::Scannable; @@ -220,9 +221,8 @@ mod tests { use crate::query::{ExecutableQuery, QueryBase, Select}; use crate::table::add_data::NaNVectorBehavior; use crate::table::{ColumnDefinition, ColumnKind, Table, TableDefinition, WriteOptions}; - use crate::test_utils::embeddings::MockEmbed; use crate::test_utils::TestCustomError; - use crate::Error; + use crate::test_utils::embeddings::MockEmbed; use super::AddDataMode; diff --git a/rust/lancedb/src/table/datafusion.rs b/rust/lancedb/src/table/datafusion.rs index aaa5b8d7f..bd93dd05d 100644 --- a/rust/lancedb/src/table/datafusion.rs +++ b/rust/lancedb/src/table/datafusion.rs @@ -17,17 +17,17 @@ use async_trait::async_trait; use datafusion_catalog::{Session, TableProvider}; use datafusion_common::{DataFusionError, Result as DataFusionResult, Statistics}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown, TableType}; +use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType, dml::InsertOp}; use datafusion_physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, stream::RecordBatchStreamAdapter, }; use futures::{TryFutureExt, TryStreamExt}; use lance::dataset::{WriteMode, WriteParams}; use super::{AnyQuery, BaseTable}; use crate::{ - query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select}, Result, + query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select}, }; use arrow_schema::{DataType, Field}; use lance_index::scalar::FullTextSearchQuery; @@ -268,7 +268,7 @@ impl TableProvider for BaseTableAdapter { InsertOp::Replace => { return Err(DataFusionError::NotImplemented( "Replace mode is not supported for LanceDB tables".to_string(), - )) + )); } }; @@ -300,13 +300,13 @@ pub mod tests { use datafusion_catalog::TableProvider; use datafusion_common::stats::Precision; use datafusion_execution::SendableRecordBatchStream; - use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder}; + use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, col, lit}; use futures::{StreamExt, TryStreamExt}; use tempfile::tempdir; use crate::{ connect, - index::{scalar::BTreeIndexBuilder, Index}, + index::{Index, scalar::BTreeIndexBuilder}, table::datafusion::BaseTableAdapter, }; diff --git a/rust/lancedb/src/table/datafusion/cast.rs b/rust/lancedb/src/table/datafusion/cast.rs index 76459ea87..7220de96d 100644 --- a/rust/lancedb/src/table/datafusion/cast.rs +++ b/rust/lancedb/src/table/datafusion/cast.rs @@ -5,10 +5,10 @@ use std::sync::Arc; use arrow_schema::{DataType, Field, FieldRef, Fields, Schema}; use datafusion::functions::core::{get_field, named_struct}; -use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use datafusion_physical_expr::expressions::{cast, Literal}; +use datafusion_common::config::ConfigOptions; use datafusion_physical_expr::ScalarFunctionExpr; +use datafusion_physical_expr::expressions::{Literal, cast}; use datafusion_physical_plan::expressions::Column; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr}; diff --git a/rust/lancedb/src/table/datafusion/insert.rs b/rust/lancedb/src/table/datafusion/insert.rs index ae0489156..4c3d66195 100644 --- a/rust/lancedb/src/table/datafusion/insert.rs +++ b/rust/lancedb/src/table/datafusion/insert.rs @@ -16,9 +16,9 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, }; +use lance::Dataset; use lance::dataset::transaction::{Operation, Transaction}; use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams}; -use lance::Dataset; use lance_table::format::Fragment; use crate::table::dataset::DatasetConsistencyWrapper; @@ -195,13 +195,13 @@ impl ExecutionPlan for InsertExec { } }; - if let Some(transactions) = to_commit { - if let Some(merged_txn) = merge_transactions(transactions) { - let new_dataset = CommitBuilder::new(dataset.clone()) - .execute(merged_txn) - .await?; - ds_wrapper.update(new_dataset); - } + if let Some(transactions) = to_commit + && let Some(merged_txn) = merge_transactions(transactions) + { + let new_dataset = CommitBuilder::new(dataset.clone()) + .execute(merged_txn) + .await?; + ds_wrapper.update(new_dataset); } Ok(RecordBatch::try_new( @@ -222,7 +222,7 @@ mod tests { use std::vec; use super::*; - use arrow_array::{record_batch, RecordBatchIterator}; + use arrow_array::{RecordBatchIterator, record_batch}; use datafusion::prelude::SessionContext; use datafusion_catalog::MemTable; use tempfile::tempdir; diff --git a/rust/lancedb/src/table/datafusion/scannable_exec.rs b/rust/lancedb/src/table/datafusion/scannable_exec.rs index 1dafa55de..eb128ac18 100644 --- a/rust/lancedb/src/table/datafusion/scannable_exec.rs +++ b/rust/lancedb/src/table/datafusion/scannable_exec.rs @@ -4,11 +4,11 @@ use core::fmt; use std::sync::{Arc, Mutex}; -use datafusion_common::{stats::Precision, DataFusionError, Result as DFResult, Statistics}; +use datafusion_common::{DataFusionError, Result as DFResult, Statistics, stats::Precision}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use datafusion_physical_plan::{ - execution_plan::EmissionType, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, execution_plan::EmissionType, }; use crate::{arrow::SendableRecordBatchStreamExt, data::scannable::Scannable}; diff --git a/rust/lancedb/src/table/datafusion/udtf/fts.rs b/rust/lancedb/src/table/datafusion/udtf/fts.rs index 2f8b8beb8..5b50ddfa3 100644 --- a/rust/lancedb/src/table/datafusion/udtf/fts.rs +++ b/rust/lancedb/src/table/datafusion/udtf/fts.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use datafusion::catalog::TableFunctionImpl; use datafusion_catalog::TableProvider; -use datafusion_common::{plan_err, DataFusionError, Result as DataFusionResult, ScalarValue}; +use datafusion_common::{DataFusionError, Result as DataFusionResult, ScalarValue, plan_err}; use datafusion_expr::Expr; use lance_index::scalar::FullTextSearchQuery; @@ -93,9 +93,9 @@ pub fn from_json(json: &str) -> crate::Result() { - println!("{}", plan_str.value(row_idx)); - } + if let Some(col) = batch.column_by_name("plan") + && let Some(plan_str) = col.as_any().downcast_ref::() + { + println!("{}", plan_str.value(row_idx)); } } } @@ -229,10 +229,10 @@ mod tests { let explain_analyze_results = explain_analyze_df.collect().await.unwrap(); for batch in &explain_analyze_results { for row_idx in 0..batch.num_rows() { - if let Some(col) = batch.column_by_name("plan") { - if let Some(plan_str) = col.as_any().downcast_ref::() { - println!("{}", plan_str.value(row_idx)); - } + if let Some(col) = batch.column_by_name("plan") + && let Some(plan_str) = col.as_any().downcast_ref::() + { + println!("{}", plan_str.value(row_idx)); } } } diff --git a/rust/lancedb/src/table/dataset.rs b/rust/lancedb/src/table/dataset.rs index 9c4580891..89fcf55dd 100644 --- a/rust/lancedb/src/table/dataset.rs +++ b/rust/lancedb/src/table/dataset.rs @@ -6,9 +6,9 @@ use std::{ time::Duration, }; -use lance::{dataset::refs, Dataset}; +use lance::{Dataset, dataset::refs}; -use crate::{error::Result, utils::background_cache::BackgroundCache, Error}; +use crate::{Error, error::Result, utils::background_cache::BackgroundCache}; /// A wrapper around a [Dataset] that provides consistency checks. /// diff --git a/rust/lancedb/src/table/delete.rs b/rust/lancedb/src/table/delete.rs index add8e9e50..d58263026 100644 --- a/rust/lancedb/src/table/delete.rs +++ b/rust/lancedb/src/table/delete.rs @@ -36,7 +36,7 @@ pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Resu #[cfg(test)] mod tests { use crate::connect; - use arrow_array::{record_batch, Int32Array, RecordBatch}; + use arrow_array::{Int32Array, RecordBatch, record_batch}; use arrow_schema::{DataType, Field, Schema}; use std::sync::Arc; diff --git a/rust/lancedb/src/table/optimize.rs b/rust/lancedb/src/table/optimize.rs index e8946b85f..3d9a7d476 100644 --- a/rust/lancedb/src/table/optimize.rs +++ b/rust/lancedb/src/table/optimize.rs @@ -9,9 +9,9 @@ use std::sync::Arc; use lance::dataset::cleanup::RemovalStats; -use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions}; -use lance_index::optimize::OptimizeOptions; +use lance::dataset::optimize::{CompactionMetrics, IndexRemapperOptions, compact_files}; use lance_index::DatasetIndexExt; +use lance_index::optimize::OptimizeOptions; use log::info; pub use chrono::Duration; @@ -213,7 +213,7 @@ mod tests { use std::sync::Arc; use crate::connect; - use crate::index::{scalar::BTreeIndexBuilder, Index}; + use crate::index::{Index, scalar::BTreeIndexBuilder}; use crate::query::ExecutableQuery; use crate::table::{CompactionOptions, OptimizeAction, OptimizeStats}; use futures::TryStreamExt; diff --git a/rust/lancedb/src/table/query.rs b/rust/lancedb/src/table/query.rs index 203525180..e168f7a8f 100644 --- a/rust/lancedb/src/table/query.rs +++ b/rust/lancedb/src/table/query.rs @@ -7,26 +7,26 @@ use super::NativeTable; use crate::error::{Error, Result}; use crate::expr::expr_to_sql_string; use crate::query::{ - QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, DEFAULT_TOP_K, + DEFAULT_TOP_K, QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, }; -use crate::utils::{default_vector_column, TimeoutStream}; +use crate::utils::{TimeoutStream, default_vector_column}; use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder}; use arrow::datatypes::{Float32Type, UInt8Type}; use arrow_array::Array; use arrow_schema::{DataType, Schema}; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::union::UnionExec; -use datafusion_physical_plan::ExecutionPlan; use futures::future::try_join_all; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::scanner::Scanner; use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan}; +use lance_namespace::LanceNamespace; use lance_namespace::models::{ QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns, QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery, }; -use lance_namespace::LanceNamespace; #[derive(Debug, Clone)] pub enum AnyQuery { diff --git a/rust/lancedb/src/table/schema_evolution.rs b/rust/lancedb/src/table/schema_evolution.rs index 6adf7f3a8..c9bf9d7a8 100644 --- a/rust/lancedb/src/table/schema_evolution.rs +++ b/rust/lancedb/src/table/schema_evolution.rs @@ -92,7 +92,7 @@ pub(crate) async fn execute_drop_columns( #[cfg(test)] mod tests { - use arrow_array::{record_batch, Int32Array, StringArray}; + use arrow_array::{Int32Array, StringArray, record_batch}; use arrow_schema::DataType; use futures::TryStreamExt; use lance::dataset::ColumnAlteration; diff --git a/rust/lancedb/src/table/update.rs b/rust/lancedb/src/table/update.rs index 07ff7fd8e..61eb93992 100644 --- a/rust/lancedb/src/table/update.rs +++ b/rust/lancedb/src/table/update.rs @@ -115,9 +115,9 @@ mod tests { use crate::query::QueryBase; use crate::query::{ExecutableQuery, Select}; use arrow_array::{ - record_batch, Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, - Float64Array, Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray, - TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array, + Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, Float64Array, + Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray, + TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array, record_batch, }; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit}; diff --git a/rust/lancedb/src/test_utils/connection.rs b/rust/lancedb/src/test_utils/connection.rs index c5c3294ed..0185f20c7 100644 --- a/rust/lancedb/src/test_utils/connection.rs +++ b/rust/lancedb/src/test_utils/connection.rs @@ -10,9 +10,9 @@ use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{Child, ChildStdout, Command}; use tokio::sync::mpsc; -use crate::{connect, Connection}; -use anyhow::{anyhow, bail, Result}; -use tempfile::{tempdir, TempDir}; +use crate::{Connection, connect}; +use anyhow::{Result, anyhow, bail}; +use tempfile::{TempDir, tempdir}; pub struct TestConnection { pub uri: String, @@ -113,8 +113,13 @@ async fn new_remote_connection(script_path: &str) -> Result { let (port_sender, mut port_receiver) = mpsc::channel(5); let _reader = spawn_stdout_reader(stdout, port_sender).await; let port = match port_receiver.recv().await { - None => bail!("Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon."), - Some(Err(err)) => bail!("Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", err), + None => bail!( + "Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon." + ), + Some(Err(err)) => bail!( + "Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", + err + ), Some(Ok(port)) => port, }; let uri = "db://test"; diff --git a/rust/lancedb/src/test_utils/datagen.rs b/rust/lancedb/src/test_utils/datagen.rs index 6439fbc5f..c9824a04b 100644 --- a/rust/lancedb/src/test_utils/datagen.rs +++ b/rust/lancedb/src/test_utils/datagen.rs @@ -6,8 +6,9 @@ use futures::TryStreamExt; use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount}; use crate::{ + Error, Table, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, - connect, Error, Table, + connect, }; #[async_trait::async_trait] diff --git a/rust/lancedb/src/test_utils/embeddings.rs b/rust/lancedb/src/test_utils/embeddings.rs index 48c9b5743..34bb9bc5e 100644 --- a/rust/lancedb/src/test_utils/embeddings.rs +++ b/rust/lancedb/src/test_utils/embeddings.rs @@ -6,8 +6,8 @@ use std::{borrow::Cow, sync::Arc}; use arrow_array::{Array, FixedSizeListArray, Float32Array}; use arrow_schema::{DataType, Field}; -use crate::embeddings::EmbeddingFunction; use crate::Result; +use crate::embeddings::EmbeddingFunction; #[derive(Debug, Clone)] pub struct MockEmbed { diff --git a/rust/lancedb/src/utils/background_cache.rs b/rust/lancedb/src/utils/background_cache.rs index 37d0aa2b8..211630556 100644 --- a/rust/lancedb/src/utils/background_cache.rs +++ b/rust/lancedb/src/utils/background_cache.rs @@ -9,8 +9,8 @@ use std::future::Future; use std::sync::{Arc, Mutex}; use std::time::Duration; -use futures::future::{BoxFuture, Shared}; use futures::FutureExt; +use futures::future::{BoxFuture, Shared}; type SharedFut = Shared>>>; diff --git a/rust/lancedb/src/utils/mod.rs b/rust/lancedb/src/utils/mod.rs index 031487a50..ffed533f6 100644 --- a/rust/lancedb/src/utils/mod.rs +++ b/rust/lancedb/src/utils/mod.rs @@ -53,7 +53,7 @@ impl PatchStoreParam for Option { pub trait PatchWriteParam { fn patch_with_store_wrapper(self, wrapper: Arc) - -> Result; + -> Result; } impl PatchWriteParam for WriteParams { @@ -340,7 +340,7 @@ mod tests { use arrow_array::Int32Array; use arrow_schema::Field; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; - use futures::{stream, StreamExt}; + use futures::{StreamExt, stream}; use tokio::time::sleep; use super::*; @@ -351,10 +351,12 @@ mod tests { Field::new("id", DataType::Int16, true), Field::new("tag", DataType::Utf8, false), ]); - assert!(default_vector_column(&schema_no_vector, None) - .unwrap_err() - .to_string() - .contains("No vector column")); + assert!( + default_vector_column(&schema_no_vector, None) + .unwrap_err() + .to_string() + .contains("No vector column") + ); let schema_with_vec_col = Schema::new(vec![ Field::new("id", DataType::Int16, true), @@ -382,10 +384,12 @@ mod tests { false, ), ]); - assert!(default_vector_column(&multi_vec_col, None) - .unwrap_err() - .to_string() - .contains("More than one")); + assert!( + default_vector_column(&multi_vec_col, None) + .unwrap_err() + .to_string() + .contains("More than one") + ); } #[test] @@ -501,10 +505,12 @@ mod tests { // Poll the stream again and ensure it returns a timeout error let second_result = timeout_stream.next().await.unwrap(); assert!(second_result.is_err()); - assert!(second_result - .unwrap_err() - .to_string() - .contains("Query timeout")); + assert!( + second_result + .unwrap_err() + .to_string() + .contains("Query timeout") + ); } #[tokio::test] diff --git a/rust/lancedb/tests/embedding_registry_test.rs b/rust/lancedb/tests/embedding_registry_test.rs index b002e47ff..8e1a80ec4 100644 --- a/rust/lancedb/tests/embedding_registry_test.rs +++ b/rust/lancedb/tests/embedding_registry_test.rs @@ -15,10 +15,9 @@ use arrow_array::{ use arrow_schema::{DataType, Field, Schema}; use futures::StreamExt; use lancedb::{ - connect, + Error, Result, connect, embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry}, query::ExecutableQuery, - Error, Result, }; #[tokio::test] @@ -262,17 +261,19 @@ fn create_some_records() -> Result