From fa1b04f3419a1a7d7efc004d84b86ed5a46eeee1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 4 Mar 2026 08:23:29 +0800 Subject: [PATCH] chore: migrate Rust crates to edition 2024 and fix clippy warnings (#3098) This PR migrates all Rust crates in the workspace to Rust 2024 edition and addresses the resulting compatibility updates. It also fixes all clippy warnings surfaced by the workspace checks so the codebase remains warning-free under the current lint configuration. Context: - Scope: workspace edition bump (`2021` -> `2024`) plus follow-up refactors required by new edition and clippy rules. - Validation: `cargo fmt --all` and `cargo clippy --quiet --features remote --tests --examples -- -D warnings` both pass. --- Cargo.toml | 2 +- nodejs/src/connection.rs | 2 +- nodejs/src/index.rs | 2 +- nodejs/src/query.rs | 17 ++- nodejs/src/session.rs | 2 +- python/src/arrow.rs | 2 +- python/src/connection.rs | 2 +- python/src/error.rs | 2 +- python/src/index.rs | 30 +++--- python/src/lib.rs | 6 +- python/src/permutation.rs | 2 +- python/src/query.rs | 16 +-- python/src/session.rs | 2 +- python/src/table.rs | 10 +- python/src/table/scannable.rs | 22 ++-- python/src/util.rs | 3 +- rust/lancedb/examples/bedrock.rs | 5 +- rust/lancedb/examples/full_text_search.rs | 32 +++--- rust/lancedb/examples/hybrid_search.rs | 9 +- rust/lancedb/examples/ivf_pq.rs | 30 +++--- rust/lancedb/examples/openai.rs | 5 +- .../lancedb/examples/sentence_transformers.rs | 7 +- rust/lancedb/examples/simple.rs | 2 +- rust/lancedb/src/arrow.rs | 2 +- rust/lancedb/src/connection.rs | 17 ++- rust/lancedb/src/connection/create_table.rs | 15 +-- rust/lancedb/src/data/inspect.rs | 6 +- rust/lancedb/src/data/sanitize.rs | 6 +- rust/lancedb/src/data/scannable.rs | 6 +- rust/lancedb/src/database.rs | 2 +- rust/lancedb/src/database/listing.rs | 4 +- rust/lancedb/src/database/namespace.rs | 2 +- .../src/dataloader/permutation/builder.rs | 6 +- .../src/dataloader/permutation/reader.rs | 6 +- .../src/dataloader/permutation/shuffle.rs | 8 +- .../src/dataloader/permutation/split.rs | 4 +- .../src/dataloader/permutation/util.rs | 4 +- rust/lancedb/src/embeddings.rs | 2 +- rust/lancedb/src/embeddings/bedrock.rs | 2 +- rust/lancedb/src/embeddings/openai.rs | 2 +- .../src/embeddings/sentence_transformers.rs | 12 +-- rust/lancedb/src/expr.rs | 2 +- rust/lancedb/src/index.rs | 2 +- rust/lancedb/src/index/scalar.rs | 2 +- rust/lancedb/src/index/waiter.rs | 2 +- rust/lancedb/src/io/object_store.rs | 6 +- .../src/io/object_store/io_tracking.rs | 3 +- rust/lancedb/src/query.rs | 102 +++++++++--------- rust/lancedb/src/query/hybrid.rs | 7 +- rust/lancedb/src/remote/client.rs | 7 +- rust/lancedb/src/remote/db.rs | 10 +- rust/lancedb/src/remote/retry.rs | 2 +- rust/lancedb/src/remote/table.rs | 67 ++++++------ rust/lancedb/src/remote/table/insert.rs | 10 +- rust/lancedb/src/remote/util.rs | 2 +- rust/lancedb/src/rerankers/rrf.rs | 2 +- rust/lancedb/src/table.rs | 64 +++++------ rust/lancedb/src/table/add_data.rs | 10 +- rust/lancedb/src/table/datafusion.rs | 12 +-- rust/lancedb/src/table/datafusion/cast.rs | 4 +- rust/lancedb/src/table/datafusion/insert.rs | 18 ++-- .../src/table/datafusion/scannable_exec.rs | 4 +- rust/lancedb/src/table/datafusion/udtf/fts.rs | 22 ++-- rust/lancedb/src/table/dataset.rs | 4 +- rust/lancedb/src/table/delete.rs | 2 +- rust/lancedb/src/table/optimize.rs | 6 +- rust/lancedb/src/table/query.rs | 8 +- rust/lancedb/src/table/schema_evolution.rs | 2 +- rust/lancedb/src/table/update.rs | 6 +- rust/lancedb/src/test_utils/connection.rs | 15 ++- rust/lancedb/src/test_utils/datagen.rs | 3 +- rust/lancedb/src/test_utils/embeddings.rs | 2 +- rust/lancedb/src/utils/background_cache.rs | 2 +- rust/lancedb/src/utils/mod.rs | 34 +++--- rust/lancedb/tests/embedding_registry_test.rs | 27 ++--- .../lancedb/tests/embeddings_parallel_test.rs | 4 +- rust/lancedb/tests/object_store_test.rs | 2 +- 77 files changed, 407 insertions(+), 388 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 35bc3b835..9ddddc4fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ exclude = ["python"] resolver = "2" [workspace.package] -edition = "2021" +edition = "2024" authors = ["LanceDB Devs "] license = "Apache-2.0" repository = "https://github.com/lancedb/lancedb" diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 824ae7562..ccfdc6fc7 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database}; use napi::bindgen_prelude::*; use napi_derive::*; +use crate::ConnectionOptions; use crate::error::NapiErrorExt; use crate::header::JsHeaderProvider; use crate::table::Table; -use crate::ConnectionOptions; use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection}; use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema}; diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs index bf8b280a6..c957cea78 100644 --- a/nodejs/src/index.rs +++ b/nodejs/src/index.rs @@ -3,12 +3,12 @@ use std::sync::Mutex; +use lancedb::index::Index as LanceDbIndex; use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder}; use lancedb::index::vector::{ IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfRqIndexBuilder, }; -use lancedb::index::Index as LanceDbIndex; use napi_derive::napi; use crate::util::parse_distance_type; diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index 21403c0e7..4ad42f32f 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -17,8 +17,8 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery; use napi::bindgen_prelude::*; use napi_derive::napi; -use crate::error::convert_error; use crate::error::NapiErrorExt; +use crate::error::convert_error; use crate::iterator::RecordBatchIterator; use crate::rerankers::RerankHybridCallbackArgs; use crate::rerankers::Reranker; @@ -551,15 +551,12 @@ fn parse_fts_query(query: Object) -> napi::Result { } }; let mut query = FullTextSearchQuery::new_query(query); - if let Some(cols) = columns { - if !cols.is_empty() { - query = query.with_columns(&cols).map_err(|e| { - napi::Error::from_reason(format!( - "Failed to set full text search columns: {}", - e - )) - })?; - } + if let Some(cols) = columns + && !cols.is_empty() + { + query = query.with_columns(&cols).map_err(|e| { + napi::Error::from_reason(format!("Failed to set full text search columns: {}", e)) + })?; } Ok(query) } else { diff --git a/nodejs/src/session.rs b/nodejs/src/session.rs index 6e755cb92..1c31d37da 100644 --- a/nodejs/src/session.rs +++ b/nodejs/src/session.rs @@ -95,7 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session { napi_val: napi::sys::napi_value, ) -> napi::Result { let object: napi::bindgen_prelude::ClassInstance = - napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?; + unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? }; Ok((*object).clone()) } } diff --git a/python/src/arrow.rs b/python/src/arrow.rs index 88c285d30..fd3a05964 100644 --- a/python/src/arrow.rs +++ b/python/src/arrow.rs @@ -10,7 +10,7 @@ use arrow::{ use futures::stream::StreamExt; use lancedb::arrow::SendableRecordBatchStream; use pyo3::{ - exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python, + Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/connection.rs b/python/src/connection.rs index 7df89b101..a8b218a8e 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -9,10 +9,10 @@ use lancedb::{ database::{CreateTableMode, Database, ReadConsistency}, }; use pyo3::{ + Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, exceptions::{PyRuntimeError, PyValueError}, pyclass, pyfunction, pymethods, types::{PyDict, PyDictMethods}, - Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/error.rs b/python/src/error.rs index b406c6b38..062a1dc96 100644 --- a/python/src/error.rs +++ b/python/src/error.rs @@ -2,10 +2,10 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use pyo3::{ + PyErr, PyResult, Python, exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError}, intern, types::{PyAnyMethods, PyNone}, - PyErr, PyResult, Python, }; use lancedb::error::Error as LanceError; diff --git a/python/src/index.rs b/python/src/index.rs index c93b23eb3..602b5e420 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -3,17 +3,17 @@ use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder}; use lancedb::index::{ + Index as LanceDbIndex, scalar::{BTreeIndexBuilder, FtsIndexBuilder}, vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder}, - Index as LanceDbIndex, }; -use pyo3::types::PyStringMethods; use pyo3::IntoPyObject; +use pyo3::types::PyStringMethods; use pyo3::{ + Bound, FromPyObject, PyAny, PyResult, Python, exceptions::{PyKeyError, PyValueError}, intern, pyclass, pymethods, types::PyAnyMethods, - Bound, FromPyObject, PyAny, PyResult, Python, }; use crate::util::parse_distance_type; @@ -41,7 +41,12 @@ pub fn extract_index_params(source: &Option>) -> PyResult>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -64,10 +69,11 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -86,7 +92,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -101,7 +107,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -117,7 +123,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -138,7 +144,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -155,7 +161,7 @@ pub fn extract_index_params(source: &Option>) -> PyResult Err(PyValueError::new_err(format!( "Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq", not_supported diff --git a/python/src/lib.rs b/python/src/lib.rs index 3c7289ca0..4788b3e93 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -2,14 +2,14 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use arrow::RecordBatchStream; -use connection::{connect, Connection}; +use connection::{Connection, connect}; use env_logger::Env; use index::IndexConfig; use permutation::{PyAsyncPermutationBuilder, PyPermutationReader}; use pyo3::{ - pymodule, + Bound, PyResult, Python, pymodule, types::{PyModule, PyModuleMethods}, - wrap_pyfunction, Bound, PyResult, Python, + wrap_pyfunction, }; use query::{FTSQuery, HybridQuery, Query, VectorQuery}; use session::Session; diff --git a/python/src/permutation.rs b/python/src/permutation.rs index 06010396d..21b8c9c47 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -16,10 +16,10 @@ use lancedb::{ query::Select, }; use pyo3::{ + Bound, PyAny, PyRef, PyRefMut, PyResult, Python, exceptions::PyRuntimeError, pyclass, pymethods, types::{PyAnyMethods, PyDict, PyDictMethods, PyType}, - Bound, PyAny, PyRef, PyRefMut, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; diff --git a/python/src/query.rs b/python/src/query.rs index 65e47a0db..0c17cf703 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -4,9 +4,9 @@ use std::sync::Arc; use std::time::Duration; -use arrow::array::make_array; use arrow::array::Array; use arrow::array::ArrayData; +use arrow::array::make_array; use arrow::pyarrow::FromPyArrow; use arrow::pyarrow::IntoPyArrow; use arrow::pyarrow::ToPyArrow; @@ -22,23 +22,23 @@ use lancedb::query::{ VectorQuery as LanceDbVectorQuery, }; use lancedb::table::AnyQuery; -use pyo3::prelude::{PyAnyMethods, PyDictMethods}; -use pyo3::pyfunction; -use pyo3::pymethods; -use pyo3::types::PyList; -use pyo3::types::{PyDict, PyString}; use pyo3::Bound; use pyo3::IntoPyObject; use pyo3::PyAny; use pyo3::PyRef; use pyo3::PyResult; use pyo3::Python; -use pyo3::{exceptions::PyRuntimeError, FromPyObject}; +use pyo3::prelude::{PyAnyMethods, PyDictMethods}; +use pyo3::pyfunction; +use pyo3::pymethods; +use pyo3::types::PyList; +use pyo3::types::{PyDict, PyString}; +use pyo3::{FromPyObject, exceptions::PyRuntimeError}; +use pyo3::{PyErr, pyclass}; use pyo3::{ exceptions::{PyNotImplementedError, PyValueError}, intern, }; -use pyo3::{pyclass, PyErr}; use pyo3_async_runtimes::tokio::future_into_py; use crate::util::parse_distance_type; diff --git a/python/src/session.rs b/python/src/session.rs index abc499e39..2433114b6 100644 --- a/python/src/session.rs +++ b/python/src/session.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use lancedb::{ObjectStoreRegistry, Session as LanceSession}; -use pyo3::{pyclass, pymethods, PyResult}; +use pyo3::{PyResult, pyclass, pymethods}; /// A session for managing caches and object stores across LanceDB operations. /// diff --git a/python/src/table.rs b/python/src/table.rs index 3766f3f19..e988cadb4 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -5,7 +5,7 @@ use std::{collections::HashMap, sync::Arc}; use crate::{ connection::Connection, error::PythonErrorExt, - index::{extract_index_params, IndexConfig}, + index::{IndexConfig, extract_index_params}, query::{Query, TakeQuery}, table::scannable::PyScannable, }; @@ -19,10 +19,10 @@ use lancedb::table::{ Table as LanceDbTable, }; use pyo3::{ + Bound, FromPyObject, PyAny, PyRef, PyResult, Python, exceptions::{PyKeyError, PyRuntimeError, PyValueError}, pyclass, pymethods, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, - Bound, FromPyObject, PyAny, PyRef, PyResult, Python, }; use pyo3_async_runtimes::tokio::future_into_py; @@ -542,7 +542,7 @@ impl Table { let inner = self_.inner_ref()?.clone(); future_into_py(self_.py(), async move { let versions = inner.list_versions().await.infer_error()?; - let versions_as_dict = Python::attach(|py| { + Python::attach(|py| { versions .iter() .map(|v| { @@ -559,9 +559,7 @@ impl Table { Ok(dict.unbind()) }) .collect::>>() - }); - - versions_as_dict + }) }) } diff --git a/python/src/table/scannable.rs b/python/src/table/scannable.rs index e2bfc1295..5d02ca024 100644 --- a/python/src/table/scannable.rs +++ b/python/src/table/scannable.rs @@ -10,11 +10,11 @@ use arrow::{ }; use futures::StreamExt; use lancedb::{ + Error, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, data::scannable::Scannable, - Error, }; -use pyo3::{types::PyAnyMethods, FromPyObject, Py, PyAny, Python}; +use pyo3::{FromPyObject, Py, PyAny, Python, types::PyAnyMethods}; /// Adapter that implements Scannable for a Python reader factory callable. /// @@ -99,15 +99,15 @@ impl Scannable for PyScannable { // Channel closed. Check if the task panicked — a panic // drops the sender without sending an error, so without // this check we'd silently return a truncated stream. - if let Some(handle) = join_handle { - if let Err(join_err) = handle.await { - return Some(( - Err(Error::Runtime { - message: format!("Reader task panicked: {}", join_err), - }), - (rx, None), - )); - } + if let Some(handle) = join_handle + && let Err(join_err) = handle.await + { + return Some(( + Err(Error::Runtime { + message: format!("Reader task panicked: {}", join_err), + }), + (rx, None), + )); } None } diff --git a/python/src/util.rs b/python/src/util.rs index 8ec8f40cb..977608a73 100644 --- a/python/src/util.rs +++ b/python/src/util.rs @@ -5,8 +5,9 @@ use std::sync::Mutex; use lancedb::DistanceType; use pyo3::{ + PyResult, exceptions::{PyRuntimeError, PyValueError}, - pyfunction, PyResult, + pyfunction, }; /// A wrapper around a rust builder diff --git a/rust/lancedb/examples/bedrock.rs b/rust/lancedb/examples/bedrock.rs index 365453b60..9af569c4e 100644 --- a/rust/lancedb/examples/bedrock.rs +++ b/rust/lancedb/examples/bedrock.rs @@ -9,10 +9,9 @@ use aws_config::Region; use aws_sdk_bedrockruntime::Client; use futures::StreamExt; use lancedb::{ - connect, - embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction}, + Result, connect, + embeddings::{EmbeddingDefinition, EmbeddingFunction, bedrock::BedrockEmbeddingFunction}, query::{ExecutableQuery, QueryBase}, - Result, }; #[tokio::main] diff --git a/rust/lancedb/examples/full_text_search.rs b/rust/lancedb/examples/full_text_search.rs index 15d7d7d88..54d4a38f5 100644 --- a/rust/lancedb/examples/full_text_search.rs +++ b/rust/lancedb/examples/full_text_search.rs @@ -10,10 +10,10 @@ use futures::TryStreamExt; use lance_index::scalar::FullTextSearchQuery; use lancedb::connection::Connection; -use lancedb::index::scalar::FtsIndexBuilder; use lancedb::index::Index; +use lancedb::index::scalar::FtsIndexBuilder; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, Result, Table}; +use lancedb::{Result, Table, connect}; use rand::random; #[tokio::main] @@ -46,19 +46,21 @@ fn create_some_records() -> Result>(); let n_terms = 3; let batches = RecordBatchIterator::new( - vec![RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), - Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| { - (0..n_terms) - .map(|_| words[random::() as usize % words.len()]) - .collect::>() - .join(" ") - }))), - ], - ) - .unwrap()] + vec![ + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), + Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| { + (0..n_terms) + .map(|_| words[random::() as usize % words.len()]) + .collect::>() + .join(" ") + }))), + ], + ) + .unwrap(), + ] .into_iter() .map(Ok), schema.clone(), diff --git a/rust/lancedb/examples/hybrid_search.rs b/rust/lancedb/examples/hybrid_search.rs index dc1a80a39..c28b19594 100644 --- a/rust/lancedb/examples/hybrid_search.rs +++ b/rust/lancedb/examples/hybrid_search.rs @@ -5,16 +5,15 @@ use arrow_array::{RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; use lance_index::scalar::FullTextSearchQuery; -use lancedb::index::scalar::FtsIndexBuilder; use lancedb::index::Index; +use lancedb::index::scalar::FtsIndexBuilder; use lancedb::{ - connect, + Result, Table, connect, embeddings::{ - sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition, - EmbeddingFunction, + EmbeddingDefinition, EmbeddingFunction, + sentence_transformers::SentenceTransformersEmbeddings, }, query::{QueryBase, QueryExecutionOptions}, - Result, Table, }; use std::{iter::once, sync::Arc}; diff --git a/rust/lancedb/examples/ivf_pq.rs b/rust/lancedb/examples/ivf_pq.rs index d171d5a45..79f674615 100644 --- a/rust/lancedb/examples/ivf_pq.rs +++ b/rust/lancedb/examples/ivf_pq.rs @@ -14,10 +14,10 @@ use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; use lancedb::connection::Connection; -use lancedb::index::vector::IvfPqIndexBuilder; use lancedb::index::Index; +use lancedb::index::vector::IvfPqIndexBuilder; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, DistanceType, Result, Table}; +use lancedb::{DistanceType, Result, Table, connect}; #[tokio::main] async fn main() -> Result<()> { @@ -51,19 +51,21 @@ fn create_some_records() -> Result( - (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])), - DIM as i32, + vec![ + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), + Arc::new( + FixedSizeListArray::from_iter_primitive::( + (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])), + DIM as i32, + ), ), - ), - ], - ) - .unwrap()] + ], + ) + .unwrap(), + ] .into_iter() .map(Ok), schema.clone(), diff --git a/rust/lancedb/examples/openai.rs b/rust/lancedb/examples/openai.rs index 2194e519a..b54e3e55b 100644 --- a/rust/lancedb/examples/openai.rs +++ b/rust/lancedb/examples/openai.rs @@ -8,10 +8,9 @@ use std::{iter::once, sync::Arc}; use arrow_array::{RecordBatch, StringArray}; use futures::StreamExt; use lancedb::{ - connect, - embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction}, + Result, connect, + embeddings::{EmbeddingDefinition, EmbeddingFunction, openai::OpenAIEmbeddingFunction}, query::{ExecutableQuery, QueryBase}, - Result, }; // --8<-- [end:imports] diff --git a/rust/lancedb/examples/sentence_transformers.rs b/rust/lancedb/examples/sentence_transformers.rs index 2fa7ed30b..99f2ac63d 100644 --- a/rust/lancedb/examples/sentence_transformers.rs +++ b/rust/lancedb/examples/sentence_transformers.rs @@ -7,13 +7,12 @@ use arrow_array::{RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use futures::StreamExt; use lancedb::{ - connect, + Result, connect, embeddings::{ - sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition, - EmbeddingFunction, + EmbeddingDefinition, EmbeddingFunction, + sentence_transformers::SentenceTransformersEmbeddings, }, query::{ExecutableQuery, QueryBase}, - Result, }; #[tokio::main] diff --git a/rust/lancedb/examples/simple.rs b/rust/lancedb/examples/simple.rs index 157083d0a..14846e037 100644 --- a/rust/lancedb/examples/simple.rs +++ b/rust/lancedb/examples/simple.rs @@ -14,7 +14,7 @@ use futures::TryStreamExt; use lancedb::connection::Connection; use lancedb::index::Index; use lancedb::query::{ExecutableQuery, QueryBase}; -use lancedb::{connect, Result, Table as LanceDbTable}; +use lancedb::{Result, Table as LanceDbTable, connect}; #[tokio::main] async fn main() -> Result<()> { diff --git a/rust/lancedb/src/arrow.rs b/rust/lancedb/src/arrow.rs index a8c710865..fe5dd998d 100644 --- a/rust/lancedb/src/arrow.rs +++ b/rust/lancedb/src/arrow.rs @@ -12,7 +12,7 @@ use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount}; #[cfg(feature = "polars")] use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame}; -use crate::{error::Result, Error}; +use crate::{Error, error::Result}; /// An iterator of batches that also has a schema pub trait RecordBatchReader: Iterator> { diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 1e22e7c8f..f442409d2 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -17,6 +17,7 @@ use lance_namespace::models::{ #[cfg(feature = "aws")] use object_store::aws::AwsCredential; +use crate::Table; use crate::connection::create_table::CreateTableBuilder; use crate::data::scannable::Scannable; use crate::database::listing::ListingDatabase; @@ -31,7 +32,6 @@ use crate::remote::{ client::ClientConfig, db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION}, }; -use crate::Table; use lance::io::ObjectStoreParams; pub use lance_encoding::version::LanceFileVersion; #[cfg(feature = "remote")] @@ -758,10 +758,10 @@ impl ConnectBuilder { options: &mut HashMap, ) { for (env_key, opt_key) in env_var_to_remote_storage_option { - if let Ok(env_value) = std::env::var(env_key) { - if !options.contains_key(*opt_key) { - options.insert((*opt_key).to_string(), env_value); - } + if let Ok(env_value) = std::env::var(env_key) + && !options.contains_key(*opt_key) + { + options.insert((*opt_key).to_string(), env_value); } } } @@ -1011,14 +1011,13 @@ mod tests { #[cfg(feature = "remote")] #[test] fn test_apply_env_defaults() { - let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY"; - let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL"; + let env_key = "PATH"; + let env_val = std::env::var(env_key).expect("PATH should be set in test environment"); let opts_key = "test_apply_env_defaults_environment_variable_opts_key"; - std::env::set_var(env_key, env_val); let mut options = HashMap::new(); ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options); - assert_eq!(Some(&env_val.to_string()), options.get(opts_key)); + assert_eq!(Some(&env_val), options.get(opts_key)); options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string()); ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options); diff --git a/rust/lancedb/src/connection/create_table.rs b/rust/lancedb/src/connection/create_table.rs index 8eb7d2207..c33866159 100644 --- a/rust/lancedb/src/connection/create_table.rs +++ b/rust/lancedb/src/connection/create_table.rs @@ -6,12 +6,12 @@ use std::sync::Arc; use lance_io::object_store::StorageOptionsProvider; use crate::{ + Error, Result, Table, connection::{merge_storage_options, set_storage_options_provider}, data::scannable::{Scannable, WithEmbeddingsScannable}, database::{CreateTableMode, CreateTableRequest, Database}, embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry}, table::WriteOptions, - Error, Result, Table, }; pub struct CreateTableBuilder { @@ -167,7 +167,7 @@ impl CreateTableBuilder { #[cfg(test)] mod tests { use arrow_array::{ - record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, + Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, record_batch, }; use arrow_schema::{ArrowError, DataType, Field, Schema}; use futures::TryStreamExt; @@ -380,11 +380,12 @@ mod tests { .await .unwrap(); let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)])); - assert!(db - .create_empty_table("test", other_schema.clone()) - .execute() - .await - .is_err()); // TODO: assert what this error is + assert!( + db.create_empty_table("test", other_schema.clone()) + .execute() + .await + .is_err() + ); // TODO: assert what this error is let overwritten = db .create_empty_table("test", other_schema.clone()) .mode(CreateTableMode::Overwrite) diff --git a/rust/lancedb/src/data/inspect.rs b/rust/lancedb/src/data/inspect.rs index 2b78882ef..3914caa74 100644 --- a/rust/lancedb/src/data/inspect.rs +++ b/rust/lancedb/src/data/inspect.rs @@ -5,9 +5,9 @@ use std::collections::HashMap; use arrow::compute::kernels::{aggregate::bool_and, length::length}; use arrow_array::{ + Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader, cast::AsArray, types::{ArrowPrimitiveType, Int32Type, Int64Type}, - Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader, }; use arrow_ord::cmp::eq; use arrow_schema::DataType; @@ -78,7 +78,7 @@ pub fn infer_vector_columns( _ => { return Err(Error::Schema { message: format!("Column {} is not a list", col_name), - }) + }); } } { if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) { @@ -102,8 +102,8 @@ mod tests { use super::*; use arrow_array::{ - types::{Float32Type, Float64Type}, FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray, + types::{Float32Type, Float64Type}, }; use arrow_schema::{DataType, Field, Schema}; use std::{sync::Arc, vec}; diff --git a/rust/lancedb/src/data/sanitize.rs b/rust/lancedb/src/data/sanitize.rs index 91b9aea95..bcb58261f 100644 --- a/rust/lancedb/src/data/sanitize.rs +++ b/rust/lancedb/src/data/sanitize.rs @@ -4,10 +4,10 @@ use std::{iter::repeat_with, sync::Arc}; use arrow_array::{ - cast::AsArray, - types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type}, Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator, RecordBatchReader, + cast::AsArray, + types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type}, }; use arrow_cast::{can_cast_types, cast}; use arrow_schema::{ArrowError, DataType, Field, Schema}; @@ -184,7 +184,7 @@ mod tests { use std::sync::Arc; use arrow_array::{ - FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int32Array, Int8Array, + FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray, }; use arrow_schema::Field; diff --git a/rust/lancedb/src/data/scannable.rs b/rust/lancedb/src/data/scannable.rs index 67afecb2e..35c10be59 100644 --- a/rust/lancedb/src/data/scannable.rs +++ b/rust/lancedb/src/data/scannable.rs @@ -13,16 +13,16 @@ use crate::arrow::{ SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream, }; use crate::embeddings::{ - compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction, - EmbeddingRegistry, + EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, compute_embeddings_for_batch, + compute_output_schema, }; use crate::table::{ColumnDefinition, ColumnKind, TableDefinition}; use crate::{Error, Result}; use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader}; use arrow_schema::{ArrowError, SchemaRef}; use async_trait::async_trait; -use futures::stream::once; use futures::StreamExt; +use futures::stream::once; use lance_datafusion::utils::StreamingWriteSource; pub trait Scannable: Send { diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index df027b613..95f477dc1 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -19,12 +19,12 @@ use std::sync::Arc; use std::time::Duration; use lance::dataset::ReadParams; +use lance_namespace::LanceNamespace; use lance_namespace::models::{ CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }; -use lance_namespace::LanceNamespace; use crate::data::scannable::Scannable; use crate::error::Result; diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index aea5ce24c..3f6a3de55 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -8,7 +8,7 @@ use std::path::Path; use std::{collections::HashMap, sync::Arc}; use lance::dataset::refs::Ref; -use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode}; +use lance::dataset::{ReadParams, WriteMode, builder::DatasetBuilder}; use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore}; use lance_datafusion::utils::StreamingWriteSource; use lance_encoding::version::LanceFileVersion; @@ -1097,11 +1097,11 @@ impl Database for ListingDatabase { #[cfg(test)] mod tests { use super::*; + use crate::Table; use crate::connection::ConnectRequest; use crate::data::scannable::Scannable; use crate::database::{CreateTableMode, CreateTableRequest}; use crate::table::WriteOptions; - use crate::Table; use arrow_array::{Int32Array, RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use std::path::PathBuf; diff --git a/rust/lancedb/src/database/namespace.rs b/rust/lancedb/src/database/namespace.rs index 66771c135..90b5c19bd 100644 --- a/rust/lancedb/src/database/namespace.rs +++ b/rust/lancedb/src/database/namespace.rs @@ -9,13 +9,13 @@ use std::sync::Arc; use async_trait::async_trait; use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor}; use lance_namespace::{ + LanceNamespace, models::{ CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }, - LanceNamespace, }; use lance_namespace_impls::ConnectBuilder; use log::warn; diff --git a/rust/lancedb/src/dataloader/permutation/builder.rs b/rust/lancedb/src/dataloader/permutation/builder.rs index 94474a33b..377712e98 100644 --- a/rust/lancedb/src/dataloader/permutation/builder.rs +++ b/rust/lancedb/src/dataloader/permutation/builder.rs @@ -11,16 +11,16 @@ use lance_core::ROW_ID; use lance_datafusion::exec::SessionContextExt; use crate::{ + Error, Result, Table, arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream}, connect, database::{CreateTableRequest, Database}, dataloader::permutation::{ shuffle::{Shuffler, ShufflerConfig}, - split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN}, - util::{rename_column, TemporaryDirectory}, + split::{SPLIT_ID_COLUMN, SplitStrategy, Splitter}, + util::{TemporaryDirectory, rename_column}, }, query::{ExecutableQuery, QueryBase, Select}, - Error, Result, Table, }; pub const SRC_ROW_ID_COL: &str = "row_id"; diff --git a/rust/lancedb/src/dataloader/permutation/reader.rs b/rust/lancedb/src/dataloader/permutation/reader.rs index e8d81f4b0..72f146995 100644 --- a/rust/lancedb/src/dataloader/permutation/reader.rs +++ b/rust/lancedb/src/dataloader/permutation/reader.rs @@ -25,8 +25,8 @@ use futures::{StreamExt, TryStreamExt}; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::io::RecordBatchStream; use lance_arrow::RecordBatchExt; -use lance_core::error::LanceOptionExt; use lance_core::ROW_ID; +use lance_core::error::LanceOptionExt; use std::collections::HashMap; use std::sync::Arc; @@ -500,10 +500,10 @@ mod tests { use rand::seq::SliceRandom; use crate::{ + Table, arrow::SendableRecordBatchStream, query::{ExecutableQuery, QueryBase}, - test_utils::datagen::{virtual_table, LanceDbDatagenExt}, - Table, + test_utils::datagen::{LanceDbDatagenExt, virtual_table}, }; use super::*; diff --git a/rust/lancedb/src/dataloader/permutation/shuffle.rs b/rust/lancedb/src/dataloader/permutation/shuffle.rs index 53f7e1af5..05f98eb5a 100644 --- a/rust/lancedb/src/dataloader/permutation/shuffle.rs +++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs @@ -18,12 +18,12 @@ use lance_io::{ scheduler::{ScanScheduler, SchedulerConfig}, utils::CachedFileSize, }; -use rand::{seq::SliceRandom, Rng, RngCore}; +use rand::{Rng, RngCore, seq::SliceRandom}; use crate::{ - arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, - dataloader::permutation::util::{non_crypto_rng, TemporaryDirectory}, Error, Result, + arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, + dataloader::permutation::util::{TemporaryDirectory, non_crypto_rng}, }; #[derive(Debug, Clone)] @@ -281,7 +281,7 @@ mod tests { use datafusion_expr::col; use futures::TryStreamExt; use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed}; - use rand::{rngs::SmallRng, SeedableRng}; + use rand::{SeedableRng, rngs::SmallRng}; fn test_gen() -> BatchGeneratorBuilder { lance_datagen::gen_batch() diff --git a/rust/lancedb/src/dataloader/permutation/split.rs b/rust/lancedb/src/dataloader/permutation/split.rs index 8a0d11b1e..bb6f59d34 100644 --- a/rust/lancedb/src/dataloader/permutation/split.rs +++ b/rust/lancedb/src/dataloader/permutation/split.rs @@ -2,8 +2,8 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use std::sync::{ - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, }; use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array}; @@ -15,13 +15,13 @@ use lance_arrow::SchemaExt; use lance_core::ROW_ID; use crate::{ + Error, Result, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, dataloader::{ permutation::shuffle::{Shuffler, ShufflerConfig}, permutation::util::TemporaryDirectory, }, query::{Query, QueryBase, Select}, - Error, Result, }; pub const SPLIT_ID_COLUMN: &str = "split_id"; diff --git a/rust/lancedb/src/dataloader/permutation/util.rs b/rust/lancedb/src/dataloader/permutation/util.rs index 9e4571125..d793b9704 100644 --- a/rust/lancedb/src/dataloader/permutation/util.rs +++ b/rust/lancedb/src/dataloader/permutation/util.rs @@ -7,12 +7,12 @@ use arrow_array::RecordBatch; use arrow_schema::{Fields, Schema}; use datafusion_execution::disk_manager::DiskManagerMode; use futures::TryStreamExt; -use rand::{rngs::SmallRng, RngCore, SeedableRng}; +use rand::{RngCore, SeedableRng, rngs::SmallRng}; use tempfile::TempDir; use crate::{ - arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, Error, Result, + arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, }; /// Directory to use for temporary files diff --git a/rust/lancedb/src/embeddings.rs b/rust/lancedb/src/embeddings.rs index b6272a123..ac665d76b 100644 --- a/rust/lancedb/src/embeddings.rs +++ b/rust/lancedb/src/embeddings.rs @@ -23,9 +23,9 @@ use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef}; use serde::{Deserialize, Serialize}; use crate::{ + Error, error::Result, table::{ColumnDefinition, ColumnKind, TableDefinition}, - Error, }; /// Trait for embedding functions diff --git a/rust/lancedb/src/embeddings/bedrock.rs b/rust/lancedb/src/embeddings/bedrock.rs index 33b045586..b35ce8611 100644 --- a/rust/lancedb/src/embeddings/bedrock.rs +++ b/rust/lancedb/src/embeddings/bedrock.rs @@ -8,7 +8,7 @@ use arrow::array::{AsArray, Float32Builder}; use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array}; use arrow_data::ArrayData; use arrow_schema::DataType; -use serde_json::{json, Value}; +use serde_json::{Value, json}; use super::EmbeddingFunction; use crate::{Error, Result}; diff --git a/rust/lancedb/src/embeddings/openai.rs b/rust/lancedb/src/embeddings/openai.rs index 7fadad5c4..ea900b43b 100644 --- a/rust/lancedb/src/embeddings/openai.rs +++ b/rust/lancedb/src/embeddings/openai.rs @@ -8,9 +8,9 @@ use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array}; use arrow_data::ArrayData; use arrow_schema::DataType; use async_openai::{ + Client, config::OpenAIConfig, types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat}, - Client, }; use tokio::{runtime::Handle, task}; diff --git a/rust/lancedb/src/embeddings/sentence_transformers.rs b/rust/lancedb/src/embeddings/sentence_transformers.rs index 0bc2f6042..e045e3211 100644 --- a/rust/lancedb/src/embeddings/sentence_transformers.rs +++ b/rust/lancedb/src/embeddings/sentence_transformers.rs @@ -7,7 +7,7 @@ use super::EmbeddingFunction; use arrow::{ array::{AsArray, PrimitiveBuilder}, datatypes::{ - ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt32Type, UInt8Type, + ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt8Type, UInt32Type, }, }; use arrow_array::{Array, FixedSizeListArray, PrimitiveArray}; @@ -16,8 +16,8 @@ use arrow_schema::DataType; use candle_core::{CpuStorage, Device, Layout, Storage, Tensor}; use candle_nn::VarBuilder; use candle_transformers::models::bert::{BertModel, DTYPE}; -use hf_hub::{api::sync::Api, Repo, RepoType}; -use tokenizers::{tokenizer::Tokenizer, PaddingParams}; +use hf_hub::{Repo, RepoType, api::sync::Api}; +use tokenizers::{PaddingParams, tokenizer::Tokenizer}; /// Compute embeddings using huggingface sentence-transformers. pub struct SentenceTransformersEmbeddingsBuilder { @@ -230,7 +230,7 @@ impl SentenceTransformersEmbeddings { Storage::Cpu(CpuStorage::BF16(_)) => { return Err(crate::Error::Runtime { message: "unsupported data type".to_string(), - }) + }); } _ => unreachable!("we already moved the tensor to the CPU device"), }; @@ -298,12 +298,12 @@ impl SentenceTransformersEmbeddings { DataType::Utf8View => { return Err(crate::Error::Runtime { message: "Utf8View not yet implemented".to_string(), - }) + }); } _ => { return Err(crate::Error::Runtime { message: "invalid type".to_string(), - }) + }); } }; diff --git a/rust/lancedb/src/expr.rs b/rust/lancedb/src/expr.rs index eafcdd14f..e256724a0 100644 --- a/rust/lancedb/src/expr.rs +++ b/rust/lancedb/src/expr.rs @@ -24,7 +24,7 @@ pub use sql::expr_to_sql_string; use std::sync::Arc; use arrow_schema::DataType; -use datafusion_expr::{expr_fn::cast, Expr, ScalarUDF}; +use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast}; use datafusion_functions::string::expr_fn as string_expr_fn; pub use datafusion_expr::{col, lit}; diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index 2269ad858..a012fd4cd 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -9,7 +9,7 @@ use std::time::Duration; use vector::IvfFlatIndexBuilder; use crate::index::vector::IvfRqIndexBuilder; -use crate::{table::BaseTable, DistanceType, Error, Result}; +use crate::{DistanceType, Error, Result, table::BaseTable}; use self::{ scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder}, diff --git a/rust/lancedb/src/index/scalar.rs b/rust/lancedb/src/index/scalar.rs index 370dd5abb..b9292105a 100644 --- a/rust/lancedb/src/index/scalar.rs +++ b/rust/lancedb/src/index/scalar.rs @@ -51,7 +51,7 @@ pub struct BitmapIndexBuilder {} #[derive(Debug, Clone, Default, serde::Serialize)] pub struct LabelListIndexBuilder {} -pub use lance_index::scalar::inverted::query::*; pub use lance_index::scalar::FullTextSearchQuery; pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder; pub use lance_index::scalar::InvertedIndexParams; +pub use lance_index::scalar::inverted::query::*; diff --git a/rust/lancedb/src/index/waiter.rs b/rust/lancedb/src/index/waiter.rs index a20bfa2b1..f201ed6c3 100644 --- a/rust/lancedb/src/index/waiter.rs +++ b/rust/lancedb/src/index/waiter.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors +use crate::Error; use crate::error::Result; use crate::table::BaseTable; -use crate::Error; use log::debug; use std::time::{Duration, Instant}; use tokio::time::sleep; diff --git a/rust/lancedb/src/io/object_store.rs b/rust/lancedb/src/io/object_store.rs index 508c9849d..d4739291a 100644 --- a/rust/lancedb/src/io/object_store.rs +++ b/rust/lancedb/src/io/object_store.rs @@ -5,11 +5,11 @@ use std::{fmt::Formatter, sync::Arc}; -use futures::{stream::BoxStream, TryFutureExt}; +use futures::{TryFutureExt, stream::BoxStream}; use lance::io::WrappingObjectStore; use object_store::{ - path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, + Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path, }; use async_trait::async_trait; diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 7c8ae4e92..882ef51fa 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -10,8 +10,9 @@ use bytes::Bytes; use futures::stream::BoxStream; use lance::io::WrappingObjectStore; use object_store::{ - path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart, + path::Path, }; #[derive(Debug, Default)] diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index 301476935..aa77dcc39 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -5,26 +5,26 @@ use std::sync::Arc; use std::{future::Future, time::Duration}; use arrow::compute::concat_batches; -use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array}; +use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array}; use arrow_schema::{DataType, SchemaRef}; use datafusion_expr::Expr; use datafusion_physical_plan::ExecutionPlan; -use futures::{stream, try_join, FutureExt, TryFutureExt, TryStreamExt}; +use futures::{FutureExt, TryFutureExt, TryStreamExt, stream, try_join}; use half::f16; -use lance::dataset::{scanner::DatasetRecordBatchStream, ROW_ID}; +use lance::dataset::{ROW_ID, scanner::DatasetRecordBatchStream}; use lance_arrow::RecordBatchExt; use lance_datafusion::exec::execute_plan; -use lance_index::scalar::inverted::SCORE_COL; use lance_index::scalar::FullTextSearchQuery; +use lance_index::scalar::inverted::SCORE_COL; use lance_index::vector::DIST_COL; use lance_io::stream::RecordBatchStreamAdapter; +use crate::DistanceType; use crate::error::{Error, Result}; use crate::rerankers::rrf::RRFReranker; -use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker}; +use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result}; use crate::table::BaseTable; use crate::utils::TimeoutStream; -use crate::DistanceType; use crate::{arrow::SendableRecordBatchStream, table::AnyQuery}; mod hybrid; @@ -161,10 +161,11 @@ impl IntoQueryVector for &dyn Array { if data_type != self.data_type() { Err(Error::InvalidInput { message: format!( - "failed to create query vector, the input data type was {:?} but the expected data type was {:?}", - self.data_type(), - data_type - )}) + "failed to create query vector, the input data type was {:?} but the expected data type was {:?}", + self.data_type(), + data_type + ), + }) } else { let data = self.to_data(); Ok(make_array(data)) @@ -186,7 +187,7 @@ impl IntoQueryVector for &[f16] { DataType::Float32 => { let arr: Vec = self.iter().map(|x| f32::from(*x)).collect(); Ok(Arc::new(Float32Array::from(arr))) - }, + } DataType::Float64 => { let arr: Vec = self.iter().map(|x| f64::from(*x)).collect(); Ok(Arc::new(Float64Array::from(arr))) @@ -194,8 +195,7 @@ impl IntoQueryVector for &[f16] { _ => Err(Error::InvalidInput { message: format!( "failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type + embedding_model_label, data_type ), }), } @@ -216,7 +216,7 @@ impl IntoQueryVector for &[f32] { DataType::Float32 => { let arr: Vec = self.to_vec(); Ok(Arc::new(Float32Array::from(arr))) - }, + } DataType::Float64 => { let arr: Vec = self.iter().map(|x| *x as f64).collect(); Ok(Arc::new(Float64Array::from(arr))) @@ -224,8 +224,7 @@ impl IntoQueryVector for &[f32] { _ => Err(Error::InvalidInput { message: format!( "failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type + embedding_model_label, data_type ), }), } @@ -239,26 +238,25 @@ impl IntoQueryVector for &[f64] { embedding_model_label: &str, ) -> Result> { match data_type { - DataType::Float16 => { - let arr: Vec = self.iter().map(|x| f16::from_f64(*x)).collect(); - Ok(Arc::new(Float16Array::from(arr))) - } - DataType::Float32 => { - let arr: Vec = self.iter().map(|x| *x as f32).collect(); - Ok(Arc::new(Float32Array::from(arr))) - }, - DataType::Float64 => { - let arr: Vec = self.to_vec(); - Ok(Arc::new(Float64Array::from(arr))) - } - _ => Err(Error::InvalidInput { - message: format!( - "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}", - embedding_model_label, - data_type - ), - }), + DataType::Float16 => { + let arr: Vec = self.iter().map(|x| f16::from_f64(*x)).collect(); + Ok(Arc::new(Float16Array::from(arr))) } + DataType::Float32 => { + let arr: Vec = self.iter().map(|x| *x as f32).collect(); + Ok(Arc::new(Float32Array::from(arr))) + } + DataType::Float64 => { + let arr: Vec = self.to_vec(); + Ok(Arc::new(Float64Array::from(arr))) + } + _ => Err(Error::InvalidInput { + message: format!( + "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}", + embedding_model_label, data_type + ), + }), + } } } @@ -1011,13 +1009,13 @@ impl VectorQuery { message: "minimum_nprobes must be greater than 0".to_string(), }); } - if let Some(maximum_nprobes) = self.request.maximum_nprobes { - if minimum_nprobes > maximum_nprobes { - return Err(Error::InvalidInput { - message: "minimum_nprobes must be less than or equal to maximum_nprobes" - .to_string(), - }); - } + if let Some(maximum_nprobes) = self.request.maximum_nprobes + && minimum_nprobes > maximum_nprobes + { + return Err(Error::InvalidInput { + message: "minimum_nprobes must be less than or equal to maximum_nprobes" + .to_string(), + }); } self.request.minimum_nprobes = minimum_nprobes; Ok(self) @@ -1407,8 +1405,8 @@ mod tests { use super::*; use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type}; use arrow_array::{ - cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array, - RecordBatch, StringArray, + FixedSizeListArray, Float32Array, Int32Array, RecordBatch, StringArray, cast::AsArray, + types::Float32Type, }; use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema}; use futures::{StreamExt, TryStreamExt}; @@ -1416,7 +1414,7 @@ mod tests { use rand::seq::IndexedRandom; use tempfile::tempdir; - use crate::{connect, database::CreateTableMode, index::Index, Table}; + use crate::{Table, connect, database::CreateTableMode, index::Index}; #[tokio::test] async fn test_setters_getters() { @@ -1754,11 +1752,13 @@ mod tests { .limit(1) .execute() .await; - assert!(error_result - .err() - .unwrap() - .to_string() - .contains("No vector column found to match with the query vector dimension: 3")); + assert!( + error_result + .err() + .unwrap() + .to_string() + .contains("No vector column found to match with the query vector dimension: 3") + ); } #[tokio::test] @@ -2010,7 +2010,7 @@ mod tests { // Sample 1 - 3 tokens for each string value let tokens = ["a", "b", "c", "d", "e"]; - use rand::{rng, Rng}; + use rand::{Rng, rng}; let mut rng = rng(); let text: StringArray = (0..nrows) diff --git a/rust/lancedb/src/query/hybrid.rs b/rust/lancedb/src/query/hybrid.rs index 9e85e870b..35c64d4a9 100644 --- a/rust/lancedb/src/query/hybrid.rs +++ b/rust/lancedb/src/query/hybrid.rs @@ -5,7 +5,7 @@ use arrow::compute::{ kernels::numeric::{div, sub}, max, min, }; -use arrow_array::{cast::downcast_array, Float32Array, RecordBatch}; +use arrow_array::{Float32Array, RecordBatch, cast::downcast_array}; use arrow_schema::{DataType, Field, Schema, SortOptions}; use lance::dataset::ROW_ID; use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL}; @@ -253,7 +253,10 @@ mod test { let result = rank(batch.clone(), "bad_col", None); match result { Err(Error::InvalidInput { message }) => { - assert_eq!("expected column bad_col not found in rank. found columns [\"name\", \"score\"]", message); + assert_eq!( + "expected column bad_col not found in rank. found columns [\"name\", \"score\"]", + message + ); } _ => { panic!("expected invalid input error, received {:?}", result) diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index de71727d9..7304c18c2 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -4,8 +4,8 @@ use http::HeaderName; use log::debug; use reqwest::{ - header::{HeaderMap, HeaderValue}, Body, Request, RequestBuilder, Response, + header::{HeaderMap, HeaderValue}, }; use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration}; @@ -650,14 +650,13 @@ impl RestfulLanceDbClient { pub fn extract_request_id(&self, request: &mut Request) -> String { // Set a request id. // TODO: allow the user to supply this, through middleware? - let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) { + if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) { request_id.to_str().unwrap().to_string() } else { let request_id = uuid::Uuid::new_v4().to_string(); self.set_request_id(request, &request_id); request_id - }; - request_id + } } /// Set the request ID header diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 38541f079..b615fd3bc 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -16,6 +16,7 @@ use lance_namespace::models::{ ListNamespacesResponse, ListTablesRequest, ListTablesResponse, }; +use crate::Error; use crate::database::{ CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest, @@ -23,12 +24,11 @@ use crate::database::{ use crate::error::Result; use crate::remote::util::stream_as_body; use crate::table::BaseTable; -use crate::Error; +use super::ARROW_STREAM_CONTENT_TYPE; use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender}; use super::table::RemoteTable; use super::util::parse_server_version; -use super::ARROW_STREAM_CONTENT_TYPE; // Request structure for the remote clone table API #[derive(serde::Serialize)] @@ -249,9 +249,9 @@ impl RemoteDatabase { #[cfg(all(test, feature = "remote"))] mod test_utils { use super::*; + use crate::remote::ClientConfig; use crate::remote::client::test_utils::MockSender; use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config}; - use crate::remote::ClientConfig; impl RemoteDatabase { pub fn new_mock(handler: F) -> Self @@ -799,9 +799,9 @@ mod tests { use crate::connection::ConnectBuilder; use crate::{ - database::CreateTableMode, - remote::{ClientConfig, HeaderProvider, ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE}, Connection, Error, + database::CreateTableMode, + remote::{ARROW_STREAM_CONTENT_TYPE, ClientConfig, HeaderProvider, JSON_CONTENT_TYPE}, }; #[test] diff --git a/rust/lancedb/src/remote/retry.rs b/rust/lancedb/src/remote/retry.rs index 8f34195f6..1a354eebb 100644 --- a/rust/lancedb/src/remote/retry.rs +++ b/rust/lancedb/src/remote/retry.rs @@ -1,8 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -use crate::remote::RetryConfig; use crate::Error; +use crate::remote::RetryConfig; use log::debug; use std::time::Duration; diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 6e290c4df..a633e00ab 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -6,15 +6,14 @@ pub mod insert; use self::insert::RemoteInsertExec; use crate::expr::expr_to_sql_string; +use super::ARROW_STREAM_CONTENT_TYPE; use super::client::RequestResultExt; use super::client::{HttpSend, RestfulLanceDbClient, Sender}; use super::db::ServerVersion; -use super::ARROW_STREAM_CONTENT_TYPE; -use crate::index::waiter::wait_for_index; use crate::index::Index; use crate::index::IndexStatistics; +use crate::index::waiter::wait_for_index; use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest}; -use crate::table::query::create_multi_vector_plan; use crate::table::AddColumnsResult; use crate::table::AddResult; use crate::table::AlterColumnsResult; @@ -23,19 +22,20 @@ use crate::table::DropColumnsResult; use crate::table::MergeResult; use crate::table::Tags; use crate::table::UpdateResult; +use crate::table::query::create_multi_vector_plan; use crate::table::{AnyQuery, Filter, TableStatistics}; use crate::utils::background_cache::BackgroundCache; use crate::utils::{supported_btree_data_type, supported_vector_data_type}; +use crate::{DistanceType, Error}; use crate::{ error::Result, index::{IndexBuilder, IndexConfig}, query::QueryExecutionOptions, table::{ - merge::MergeInsertBuilder, AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, - TableDefinition, UpdateBuilder, + AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, TableDefinition, UpdateBuilder, + merge::MergeInsertBuilder, }, }; -use crate::{DistanceType, Error}; use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader}; use arrow_ipc::reader::FileReader; use arrow_schema::{DataType, SchemaRef}; @@ -50,7 +50,7 @@ use lance::arrow::json::{JsonDataType, JsonSchema}; use lance::dataset::refs::TagContents; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::{ColumnAlteration, NewColumnTransform, Version}; -use lance_datafusion::exec::{execute_plan, OneShotExec}; +use lance_datafusion::exec::{OneShotExec, execute_plan}; use reqwest::{RequestBuilder, Response}; use serde::{Deserialize, Serialize}; use serde_json::Number; @@ -612,8 +612,8 @@ impl RemoteTable { message: format!( "Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.", version - ) - }) + ), + }), } } @@ -697,10 +697,10 @@ impl RemoteTable { Error::Retry { status_code, .. } => *status_code, _ => None, }; - if let Some(status_code) = status_code { - if Self::should_invalidate_cache_for_status(status_code) { - self.invalidate_schema_cache(); - } + if let Some(status_code) = status_code + && Self::should_invalidate_cache_for_status(status_code) + { + self.invalidate_schema_cache(); } } } @@ -783,9 +783,9 @@ impl std::fmt::Display for RemoteTable { #[cfg(all(test, feature = "remote"))] mod test_utils { use super::*; - use crate::remote::client::test_utils::client_with_handler; - use crate::remote::client::test_utils::{client_with_handler_and_config, MockSender}; use crate::remote::ClientConfig; + use crate::remote::client::test_utils::client_with_handler; + use crate::remote::client::test_utils::{MockSender, client_with_handler_and_config}; impl RemoteTable { pub fn new_mock(name: String, handler: F, version: Option) -> Self @@ -1251,13 +1251,13 @@ impl BaseTable for RemoteTable { 0 => { return Err(Error::InvalidInput { message: "No columns specified".into(), - }) + }); } 1 => index.columns.pop().unwrap(), _ => { return Err(Error::NotSupported { message: "Indices over multiple columns not yet supported".into(), - }) + }); } }; let mut body = serde_json::json!({ @@ -1320,7 +1320,7 @@ impl BaseTable for RemoteTable { _ => { return Err(Error::NotSupported { message: "Index type not supported".into(), - }) + }); } }; @@ -1771,8 +1771,8 @@ impl TryFrom for MergeInsertRequest { #[cfg(test)] mod tests { - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; use std::{collections::HashMap, pin::Pin}; @@ -1781,10 +1781,10 @@ mod tests { use crate::table::AddDataMode; use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type}; - use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator}; + use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch}; use arrow_schema::{DataType, Field, Schema}; use chrono::{DateTime, Utc}; - use futures::{future::BoxFuture, StreamExt, TryFutureExt}; + use futures::{StreamExt, TryFutureExt, future::BoxFuture}; use lance_index::scalar::inverted::query::MatchQuery; use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams}; use reqwest::Body; @@ -1794,14 +1794,14 @@ mod tests { use crate::index::vector::{ IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder, }; - use crate::remote::db::DEFAULT_SERVER_VERSION; use crate::remote::JSON_CONTENT_TYPE; + use crate::remote::db::DEFAULT_SERVER_VERSION; use crate::utils::background_cache::clock; use crate::{ - index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType}, + DistanceType, Error, Table, + index::{Index, IndexStatistics, IndexType, vector::IvfPqIndexBuilder}, query::{ExecutableQuery, QueryBase}, remote::ARROW_FILE_CONTENT_TYPE, - DistanceType, Error, Table, }; #[tokio::test] @@ -2030,11 +2030,13 @@ mod tests { .unwrap(), "/v1/table/my_table/insert/" => { assert_eq!(request.method(), "POST"); - assert!(request - .url() - .query_pairs() - .filter(|(k, _)| k == "mode") - .all(|(_, v)| v == "append")); + assert!( + request + .url() + .query_pairs() + .filter(|(k, _)| k == "mode") + .all(|(_, v)| v == "append") + ); assert_eq!( request.headers().get("Content-Type").unwrap(), ARROW_STREAM_CONTENT_TYPE @@ -3592,7 +3594,7 @@ mod tests { } fn _make_table_with_indices(unindexed_rows: usize) -> Table { - let table = Table::new_with_handler("my_table", move |request| { + Table::new_with_handler("my_table", move |request| { assert_eq!(request.method(), "POST"); let response_body = match request.url().path() { @@ -3636,8 +3638,7 @@ mod tests { let body = serde_json::to_string(&response_body).unwrap(); let status = if body == "null" { 404 } else { 200 }; http::Response::builder().status(status).body(body).unwrap() - }); - table + }) } #[tokio::test] @@ -3848,8 +3849,8 @@ mod tests { #[tokio::test] async fn test_uri_caching() { - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; let call_count = Arc::new(AtomicUsize::new(0)); let call_count_clone = call_count.clone(); diff --git a/rust/lancedb/src/remote/table/insert.rs b/rust/lancedb/src/remote/table/insert.rs index da8cb9af0..c8637281e 100644 --- a/rust/lancedb/src/remote/table/insert.rs +++ b/rust/lancedb/src/remote/table/insert.rs @@ -16,12 +16,12 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Plan use futures::StreamExt; use http::header::CONTENT_TYPE; +use crate::Error; +use crate::remote::ARROW_STREAM_CONTENT_TYPE; use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender}; use crate::remote::table::RemoteTable; -use crate::remote::ARROW_STREAM_CONTENT_TYPE; -use crate::table::datafusion::insert::COUNT_SCHEMA; use crate::table::AddResult; -use crate::Error; +use crate::table::datafusion::insert::COUNT_SCHEMA; /// ExecutionPlan for inserting data into a remote LanceDB table. /// @@ -309,12 +309,12 @@ mod tests { use arrow_schema::{DataType, Field, Schema as ArrowSchema}; use datafusion::prelude::SessionContext; use datafusion_catalog::MemTable; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + use crate::Table; use crate::remote::ARROW_STREAM_CONTENT_TYPE; use crate::table::datafusion::BaseTableAdapter; - use crate::Table; fn schema_json() -> &'static str { r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"# diff --git a/rust/lancedb/src/remote/util.rs b/rust/lancedb/src/remote/util.rs index 51ddb97de..1c368e2f5 100644 --- a/rust/lancedb/src/remote/util.rs +++ b/rust/lancedb/src/remote/util.rs @@ -5,7 +5,7 @@ use arrow_ipc::CompressionType; use futures::{Stream, StreamExt}; use reqwest::Response; -use crate::{arrow::SendableRecordBatchStream, Result}; +use crate::{Result, arrow::SendableRecordBatchStream}; use super::db::ServerVersion; diff --git a/rust/lancedb/src/rerankers/rrf.rs b/rust/lancedb/src/rerankers/rrf.rs index 7b5d83614..4da1927be 100644 --- a/rust/lancedb/src/rerankers/rrf.rs +++ b/rust/lancedb/src/rerankers/rrf.rs @@ -14,7 +14,7 @@ use async_trait::async_trait; use lance::dataset::ROW_ID; use crate::error::{Error, Result}; -use crate::rerankers::{Reranker, RELEVANCE_SCORE}; +use crate::rerankers::{RELEVANCE_SCORE, Reranker}; /// Reranks the results using Reciprocal Rank Fusion(RRF) algorithm based /// on the scores of vector and FTS search. diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index e92a817b6..611ec1f26 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -8,29 +8,29 @@ use arrow_schema::{DataType, Field, Schema, SchemaRef}; use async_trait::async_trait; use datafusion_execution::TaskContext; use datafusion_expr::Expr; -use datafusion_physical_plan::display::DisplayableExecutionPlan; use datafusion_physical_plan::ExecutionPlan; -use futures::stream::FuturesUnordered; +use datafusion_physical_plan::display::DisplayableExecutionPlan; use futures::StreamExt; -use lance::dataset::builder::DatasetBuilder; +use futures::stream::FuturesUnordered; pub use lance::dataset::ColumnAlteration; pub use lance::dataset::NewColumnTransform; pub use lance::dataset::ReadParams; pub use lance::dataset::Version; use lance::dataset::WriteMode; +use lance::dataset::builder::DatasetBuilder; use lance::dataset::{InsertBuilder, WriteParams}; -use lance::index::vector::utils::infer_vector_dim; use lance::index::vector::VectorIndexParams; +use lance::index::vector::utils::infer_vector_dim; use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lance_datafusion::utils::StreamingWriteSource; +use lance_index::DatasetIndexExt; +use lance_index::IndexType; use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams}; use lance_index::vector::bq::RQBuildParams; use lance_index::vector::hnsw::builder::HnswBuildParams; use lance_index::vector::ivf::IvfBuildParams; use lance_index::vector::pq::PQBuildParams; use lance_index::vector::sq::builder::SQBuildParams; -use lance_index::DatasetIndexExt; -use lance_index::IndexType; use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor}; pub use query::AnyQuery; @@ -43,19 +43,19 @@ use std::format; use std::path::Path; use std::sync::Arc; -use crate::data::scannable::{estimate_write_partitions, PeekedScannable, Scannable}; +use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions}; use crate::database::Database; use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MemoryRegistry}; use crate::error::{Error, Result}; -use crate::index::vector::VectorIndex; use crate::index::IndexStatistics; -use crate::index::{vector::suggested_num_sub_vectors, Index, IndexBuilder}; +use crate::index::vector::VectorIndex; +use crate::index::{Index, IndexBuilder, vector::suggested_num_sub_vectors}; use crate::index::{IndexConfig, IndexStatisticsImpl}; use crate::query::{IntoQueryVector, Query, QueryExecutionOptions, TakeQuery, VectorQuery}; use crate::table::datafusion::insert::InsertExec; use crate::utils::{ - supported_bitmap_data_type, supported_btree_data_type, supported_fts_data_type, - supported_label_list_data_type, supported_vector_data_type, PatchReadParam, PatchWriteParam, + PatchReadParam, PatchWriteParam, supported_bitmap_data_type, supported_btree_data_type, + supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type, }; use self::dataset::DatasetConsistencyWrapper; @@ -2555,22 +2555,21 @@ pub struct FragmentSummaryStats { #[cfg(test)] #[allow(deprecated)] mod tests { - use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use arrow_array::{ - builder::{ListBuilder, StringBuilder}, Array, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray, RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray, + builder::{ListBuilder, StringBuilder}, }; use arrow_array::{BinaryArray, LargeBinaryArray}; use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType, Field, Schema}; use futures::TryStreamExt; - use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lance::Dataset; - use rand::Rng; + use lance::io::{ObjectStoreParams, WrappingObjectStore}; use tempfile::tempdir; use super::*; @@ -2777,9 +2776,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -2884,8 +2882,8 @@ mod tests { .await .unwrap(); - use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq; use lance::index::DatasetIndexInternalExt; + use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq; use lance_index::metrics::NoOpMetricsCollector; use lance_index::vector::VectorIndex as LanceVectorIndex; @@ -2933,9 +2931,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -2993,9 +2990,8 @@ mod tests { false, )])); - let mut rng = rand::thread_rng(); let float_arr = Float32Array::from( - repeat_with(|| rng.gen::()) + repeat_with(rand::random::) .take(512 * dimension as usize) .collect::>(), ); @@ -3256,16 +3252,20 @@ mod tests { .unwrap(); // Can not create btree or bitmap index on list column - assert!(table - .create_index(&["tags"], Index::BTree(Default::default())) - .execute() - .await - .is_err()); - assert!(table - .create_index(&["tags"], Index::Bitmap(Default::default())) - .execute() - .await - .is_err()); + assert!( + table + .create_index(&["tags"], Index::BTree(Default::default())) + .execute() + .await + .is_err() + ); + assert!( + table + .create_index(&["tags"], Index::Bitmap(Default::default())) + .execute() + .await + .is_err() + ); // Create bitmap index on the "category" column table diff --git a/rust/lancedb/src/table/add_data.rs b/rust/lancedb/src/table/add_data.rs index 5c83a8154..5921c54ea 100644 --- a/rust/lancedb/src/table/add_data.rs +++ b/rust/lancedb/src/table/add_data.rs @@ -7,8 +7,8 @@ use arrow_schema::{DataType, Fields, Schema}; use lance::dataset::WriteMode; use serde::{Deserialize, Serialize}; -use crate::data::scannable::scannable_with_embeddings; use crate::data::scannable::Scannable; +use crate::data::scannable::scannable_with_embeddings; use crate::embeddings::EmbeddingRegistry; use crate::table::datafusion::cast::cast_to_table_schema; use crate::table::datafusion::reject_nan::reject_nan_vectors; @@ -204,13 +204,14 @@ mod tests { use arrow::datatypes::Float64Type; use arrow_array::{ - record_batch, FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, - RecordBatch, RecordBatchIterator, + FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, RecordBatch, + RecordBatchIterator, record_batch, }; use arrow_schema::{ArrowError, DataType, Field, Schema}; use futures::TryStreamExt; use lance::dataset::{WriteMode, WriteParams}; + use crate::Error; use crate::arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}; use crate::connect; use crate::data::scannable::Scannable; @@ -220,9 +221,8 @@ mod tests { use crate::query::{ExecutableQuery, QueryBase, Select}; use crate::table::add_data::NaNVectorBehavior; use crate::table::{ColumnDefinition, ColumnKind, Table, TableDefinition, WriteOptions}; - use crate::test_utils::embeddings::MockEmbed; use crate::test_utils::TestCustomError; - use crate::Error; + use crate::test_utils::embeddings::MockEmbed; use super::AddDataMode; diff --git a/rust/lancedb/src/table/datafusion.rs b/rust/lancedb/src/table/datafusion.rs index aaa5b8d7f..bd93dd05d 100644 --- a/rust/lancedb/src/table/datafusion.rs +++ b/rust/lancedb/src/table/datafusion.rs @@ -17,17 +17,17 @@ use async_trait::async_trait; use datafusion_catalog::{Session, TableProvider}; use datafusion_common::{DataFusionError, Result as DataFusionResult, Statistics}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown, TableType}; +use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType, dml::InsertOp}; use datafusion_physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, stream::RecordBatchStreamAdapter, }; use futures::{TryFutureExt, TryStreamExt}; use lance::dataset::{WriteMode, WriteParams}; use super::{AnyQuery, BaseTable}; use crate::{ - query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select}, Result, + query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select}, }; use arrow_schema::{DataType, Field}; use lance_index::scalar::FullTextSearchQuery; @@ -268,7 +268,7 @@ impl TableProvider for BaseTableAdapter { InsertOp::Replace => { return Err(DataFusionError::NotImplemented( "Replace mode is not supported for LanceDB tables".to_string(), - )) + )); } }; @@ -300,13 +300,13 @@ pub mod tests { use datafusion_catalog::TableProvider; use datafusion_common::stats::Precision; use datafusion_execution::SendableRecordBatchStream; - use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder}; + use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, col, lit}; use futures::{StreamExt, TryStreamExt}; use tempfile::tempdir; use crate::{ connect, - index::{scalar::BTreeIndexBuilder, Index}, + index::{Index, scalar::BTreeIndexBuilder}, table::datafusion::BaseTableAdapter, }; diff --git a/rust/lancedb/src/table/datafusion/cast.rs b/rust/lancedb/src/table/datafusion/cast.rs index 76459ea87..7220de96d 100644 --- a/rust/lancedb/src/table/datafusion/cast.rs +++ b/rust/lancedb/src/table/datafusion/cast.rs @@ -5,10 +5,10 @@ use std::sync::Arc; use arrow_schema::{DataType, Field, FieldRef, Fields, Schema}; use datafusion::functions::core::{get_field, named_struct}; -use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use datafusion_physical_expr::expressions::{cast, Literal}; +use datafusion_common::config::ConfigOptions; use datafusion_physical_expr::ScalarFunctionExpr; +use datafusion_physical_expr::expressions::{Literal, cast}; use datafusion_physical_plan::expressions::Column; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr}; diff --git a/rust/lancedb/src/table/datafusion/insert.rs b/rust/lancedb/src/table/datafusion/insert.rs index ae0489156..4c3d66195 100644 --- a/rust/lancedb/src/table/datafusion/insert.rs +++ b/rust/lancedb/src/table/datafusion/insert.rs @@ -16,9 +16,9 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, }; +use lance::Dataset; use lance::dataset::transaction::{Operation, Transaction}; use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams}; -use lance::Dataset; use lance_table::format::Fragment; use crate::table::dataset::DatasetConsistencyWrapper; @@ -195,13 +195,13 @@ impl ExecutionPlan for InsertExec { } }; - if let Some(transactions) = to_commit { - if let Some(merged_txn) = merge_transactions(transactions) { - let new_dataset = CommitBuilder::new(dataset.clone()) - .execute(merged_txn) - .await?; - ds_wrapper.update(new_dataset); - } + if let Some(transactions) = to_commit + && let Some(merged_txn) = merge_transactions(transactions) + { + let new_dataset = CommitBuilder::new(dataset.clone()) + .execute(merged_txn) + .await?; + ds_wrapper.update(new_dataset); } Ok(RecordBatch::try_new( @@ -222,7 +222,7 @@ mod tests { use std::vec; use super::*; - use arrow_array::{record_batch, RecordBatchIterator}; + use arrow_array::{RecordBatchIterator, record_batch}; use datafusion::prelude::SessionContext; use datafusion_catalog::MemTable; use tempfile::tempdir; diff --git a/rust/lancedb/src/table/datafusion/scannable_exec.rs b/rust/lancedb/src/table/datafusion/scannable_exec.rs index 1dafa55de..eb128ac18 100644 --- a/rust/lancedb/src/table/datafusion/scannable_exec.rs +++ b/rust/lancedb/src/table/datafusion/scannable_exec.rs @@ -4,11 +4,11 @@ use core::fmt; use std::sync::{Arc, Mutex}; -use datafusion_common::{stats::Precision, DataFusionError, Result as DFResult, Statistics}; +use datafusion_common::{DataFusionError, Result as DFResult, Statistics, stats::Precision}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use datafusion_physical_plan::{ - execution_plan::EmissionType, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, execution_plan::EmissionType, }; use crate::{arrow::SendableRecordBatchStreamExt, data::scannable::Scannable}; diff --git a/rust/lancedb/src/table/datafusion/udtf/fts.rs b/rust/lancedb/src/table/datafusion/udtf/fts.rs index 2f8b8beb8..5b50ddfa3 100644 --- a/rust/lancedb/src/table/datafusion/udtf/fts.rs +++ b/rust/lancedb/src/table/datafusion/udtf/fts.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use datafusion::catalog::TableFunctionImpl; use datafusion_catalog::TableProvider; -use datafusion_common::{plan_err, DataFusionError, Result as DataFusionResult, ScalarValue}; +use datafusion_common::{DataFusionError, Result as DataFusionResult, ScalarValue, plan_err}; use datafusion_expr::Expr; use lance_index::scalar::FullTextSearchQuery; @@ -93,9 +93,9 @@ pub fn from_json(json: &str) -> crate::Result() { - println!("{}", plan_str.value(row_idx)); - } + if let Some(col) = batch.column_by_name("plan") + && let Some(plan_str) = col.as_any().downcast_ref::() + { + println!("{}", plan_str.value(row_idx)); } } } @@ -229,10 +229,10 @@ mod tests { let explain_analyze_results = explain_analyze_df.collect().await.unwrap(); for batch in &explain_analyze_results { for row_idx in 0..batch.num_rows() { - if let Some(col) = batch.column_by_name("plan") { - if let Some(plan_str) = col.as_any().downcast_ref::() { - println!("{}", plan_str.value(row_idx)); - } + if let Some(col) = batch.column_by_name("plan") + && let Some(plan_str) = col.as_any().downcast_ref::() + { + println!("{}", plan_str.value(row_idx)); } } } diff --git a/rust/lancedb/src/table/dataset.rs b/rust/lancedb/src/table/dataset.rs index 9c4580891..89fcf55dd 100644 --- a/rust/lancedb/src/table/dataset.rs +++ b/rust/lancedb/src/table/dataset.rs @@ -6,9 +6,9 @@ use std::{ time::Duration, }; -use lance::{dataset::refs, Dataset}; +use lance::{Dataset, dataset::refs}; -use crate::{error::Result, utils::background_cache::BackgroundCache, Error}; +use crate::{Error, error::Result, utils::background_cache::BackgroundCache}; /// A wrapper around a [Dataset] that provides consistency checks. /// diff --git a/rust/lancedb/src/table/delete.rs b/rust/lancedb/src/table/delete.rs index add8e9e50..d58263026 100644 --- a/rust/lancedb/src/table/delete.rs +++ b/rust/lancedb/src/table/delete.rs @@ -36,7 +36,7 @@ pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Resu #[cfg(test)] mod tests { use crate::connect; - use arrow_array::{record_batch, Int32Array, RecordBatch}; + use arrow_array::{Int32Array, RecordBatch, record_batch}; use arrow_schema::{DataType, Field, Schema}; use std::sync::Arc; diff --git a/rust/lancedb/src/table/optimize.rs b/rust/lancedb/src/table/optimize.rs index e8946b85f..3d9a7d476 100644 --- a/rust/lancedb/src/table/optimize.rs +++ b/rust/lancedb/src/table/optimize.rs @@ -9,9 +9,9 @@ use std::sync::Arc; use lance::dataset::cleanup::RemovalStats; -use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions}; -use lance_index::optimize::OptimizeOptions; +use lance::dataset::optimize::{CompactionMetrics, IndexRemapperOptions, compact_files}; use lance_index::DatasetIndexExt; +use lance_index::optimize::OptimizeOptions; use log::info; pub use chrono::Duration; @@ -213,7 +213,7 @@ mod tests { use std::sync::Arc; use crate::connect; - use crate::index::{scalar::BTreeIndexBuilder, Index}; + use crate::index::{Index, scalar::BTreeIndexBuilder}; use crate::query::ExecutableQuery; use crate::table::{CompactionOptions, OptimizeAction, OptimizeStats}; use futures::TryStreamExt; diff --git a/rust/lancedb/src/table/query.rs b/rust/lancedb/src/table/query.rs index 203525180..e168f7a8f 100644 --- a/rust/lancedb/src/table/query.rs +++ b/rust/lancedb/src/table/query.rs @@ -7,26 +7,26 @@ use super::NativeTable; use crate::error::{Error, Result}; use crate::expr::expr_to_sql_string; use crate::query::{ - QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, DEFAULT_TOP_K, + DEFAULT_TOP_K, QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, }; -use crate::utils::{default_vector_column, TimeoutStream}; +use crate::utils::{TimeoutStream, default_vector_column}; use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder}; use arrow::datatypes::{Float32Type, UInt8Type}; use arrow_array::Array; use arrow_schema::{DataType, Schema}; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::union::UnionExec; -use datafusion_physical_plan::ExecutionPlan; use futures::future::try_join_all; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::scanner::Scanner; use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan}; +use lance_namespace::LanceNamespace; use lance_namespace::models::{ QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns, QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery, }; -use lance_namespace::LanceNamespace; #[derive(Debug, Clone)] pub enum AnyQuery { diff --git a/rust/lancedb/src/table/schema_evolution.rs b/rust/lancedb/src/table/schema_evolution.rs index 6adf7f3a8..c9bf9d7a8 100644 --- a/rust/lancedb/src/table/schema_evolution.rs +++ b/rust/lancedb/src/table/schema_evolution.rs @@ -92,7 +92,7 @@ pub(crate) async fn execute_drop_columns( #[cfg(test)] mod tests { - use arrow_array::{record_batch, Int32Array, StringArray}; + use arrow_array::{Int32Array, StringArray, record_batch}; use arrow_schema::DataType; use futures::TryStreamExt; use lance::dataset::ColumnAlteration; diff --git a/rust/lancedb/src/table/update.rs b/rust/lancedb/src/table/update.rs index 07ff7fd8e..61eb93992 100644 --- a/rust/lancedb/src/table/update.rs +++ b/rust/lancedb/src/table/update.rs @@ -115,9 +115,9 @@ mod tests { use crate::query::QueryBase; use crate::query::{ExecutableQuery, Select}; use arrow_array::{ - record_batch, Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, - Float64Array, Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray, - TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array, + Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, Float64Array, + Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray, + TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array, record_batch, }; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit}; diff --git a/rust/lancedb/src/test_utils/connection.rs b/rust/lancedb/src/test_utils/connection.rs index c5c3294ed..0185f20c7 100644 --- a/rust/lancedb/src/test_utils/connection.rs +++ b/rust/lancedb/src/test_utils/connection.rs @@ -10,9 +10,9 @@ use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{Child, ChildStdout, Command}; use tokio::sync::mpsc; -use crate::{connect, Connection}; -use anyhow::{anyhow, bail, Result}; -use tempfile::{tempdir, TempDir}; +use crate::{Connection, connect}; +use anyhow::{Result, anyhow, bail}; +use tempfile::{TempDir, tempdir}; pub struct TestConnection { pub uri: String, @@ -113,8 +113,13 @@ async fn new_remote_connection(script_path: &str) -> Result { let (port_sender, mut port_receiver) = mpsc::channel(5); let _reader = spawn_stdout_reader(stdout, port_sender).await; let port = match port_receiver.recv().await { - None => bail!("Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon."), - Some(Err(err)) => bail!("Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", err), + None => bail!( + "Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon." + ), + Some(Err(err)) => bail!( + "Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", + err + ), Some(Ok(port)) => port, }; let uri = "db://test"; diff --git a/rust/lancedb/src/test_utils/datagen.rs b/rust/lancedb/src/test_utils/datagen.rs index 6439fbc5f..c9824a04b 100644 --- a/rust/lancedb/src/test_utils/datagen.rs +++ b/rust/lancedb/src/test_utils/datagen.rs @@ -6,8 +6,9 @@ use futures::TryStreamExt; use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount}; use crate::{ + Error, Table, arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, - connect, Error, Table, + connect, }; #[async_trait::async_trait] diff --git a/rust/lancedb/src/test_utils/embeddings.rs b/rust/lancedb/src/test_utils/embeddings.rs index 48c9b5743..34bb9bc5e 100644 --- a/rust/lancedb/src/test_utils/embeddings.rs +++ b/rust/lancedb/src/test_utils/embeddings.rs @@ -6,8 +6,8 @@ use std::{borrow::Cow, sync::Arc}; use arrow_array::{Array, FixedSizeListArray, Float32Array}; use arrow_schema::{DataType, Field}; -use crate::embeddings::EmbeddingFunction; use crate::Result; +use crate::embeddings::EmbeddingFunction; #[derive(Debug, Clone)] pub struct MockEmbed { diff --git a/rust/lancedb/src/utils/background_cache.rs b/rust/lancedb/src/utils/background_cache.rs index 37d0aa2b8..211630556 100644 --- a/rust/lancedb/src/utils/background_cache.rs +++ b/rust/lancedb/src/utils/background_cache.rs @@ -9,8 +9,8 @@ use std::future::Future; use std::sync::{Arc, Mutex}; use std::time::Duration; -use futures::future::{BoxFuture, Shared}; use futures::FutureExt; +use futures::future::{BoxFuture, Shared}; type SharedFut = Shared>>>; diff --git a/rust/lancedb/src/utils/mod.rs b/rust/lancedb/src/utils/mod.rs index 031487a50..ffed533f6 100644 --- a/rust/lancedb/src/utils/mod.rs +++ b/rust/lancedb/src/utils/mod.rs @@ -53,7 +53,7 @@ impl PatchStoreParam for Option { pub trait PatchWriteParam { fn patch_with_store_wrapper(self, wrapper: Arc) - -> Result; + -> Result; } impl PatchWriteParam for WriteParams { @@ -340,7 +340,7 @@ mod tests { use arrow_array::Int32Array; use arrow_schema::Field; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; - use futures::{stream, StreamExt}; + use futures::{StreamExt, stream}; use tokio::time::sleep; use super::*; @@ -351,10 +351,12 @@ mod tests { Field::new("id", DataType::Int16, true), Field::new("tag", DataType::Utf8, false), ]); - assert!(default_vector_column(&schema_no_vector, None) - .unwrap_err() - .to_string() - .contains("No vector column")); + assert!( + default_vector_column(&schema_no_vector, None) + .unwrap_err() + .to_string() + .contains("No vector column") + ); let schema_with_vec_col = Schema::new(vec![ Field::new("id", DataType::Int16, true), @@ -382,10 +384,12 @@ mod tests { false, ), ]); - assert!(default_vector_column(&multi_vec_col, None) - .unwrap_err() - .to_string() - .contains("More than one")); + assert!( + default_vector_column(&multi_vec_col, None) + .unwrap_err() + .to_string() + .contains("More than one") + ); } #[test] @@ -501,10 +505,12 @@ mod tests { // Poll the stream again and ensure it returns a timeout error let second_result = timeout_stream.next().await.unwrap(); assert!(second_result.is_err()); - assert!(second_result - .unwrap_err() - .to_string() - .contains("Query timeout")); + assert!( + second_result + .unwrap_err() + .to_string() + .contains("Query timeout") + ); } #[tokio::test] diff --git a/rust/lancedb/tests/embedding_registry_test.rs b/rust/lancedb/tests/embedding_registry_test.rs index b002e47ff..8e1a80ec4 100644 --- a/rust/lancedb/tests/embedding_registry_test.rs +++ b/rust/lancedb/tests/embedding_registry_test.rs @@ -15,10 +15,9 @@ use arrow_array::{ use arrow_schema::{DataType, Field, Schema}; use futures::StreamExt; use lancedb::{ - connect, + Error, Result, connect, embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry}, query::ExecutableQuery, - Error, Result, }; #[tokio::test] @@ -262,17 +261,19 @@ fn create_some_records() -> Result