mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-27 08:50:39 +00:00
chore: migrate Rust crates to edition 2024 and fix clippy warnings (#3098)
This PR migrates all Rust crates in the workspace to Rust 2024 edition and addresses the resulting compatibility updates. It also fixes all clippy warnings surfaced by the workspace checks so the codebase remains warning-free under the current lint configuration. Context: - Scope: workspace edition bump (`2021` -> `2024`) plus follow-up refactors required by new edition and clippy rules. - Validation: `cargo fmt --all` and `cargo clippy --quiet --features remote --tests --examples -- -D warnings` both pass.
This commit is contained in:
@@ -9,10 +9,9 @@ use aws_config::Region;
|
||||
use aws_sdk_bedrockruntime::Client;
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
connect,
|
||||
embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
||||
Result, connect,
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, bedrock::BedrockEmbeddingFunction},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
Result,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
|
||||
@@ -10,10 +10,10 @@ use futures::TryStreamExt;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lancedb::connection::Connection;
|
||||
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
use lancedb::{connect, Result, Table};
|
||||
use lancedb::{Result, Table, connect};
|
||||
use rand::random;
|
||||
|
||||
#[tokio::main]
|
||||
@@ -46,19 +46,21 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
|
||||
.collect::<Vec<_>>();
|
||||
let n_terms = 3;
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
|
||||
(0..n_terms)
|
||||
.map(|_| words[random::<u32>() as usize % words.len()])
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}))),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
vec![
|
||||
RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
|
||||
(0..n_terms)
|
||||
.map(|_| words[random::<u32>() as usize % words.len()])
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}))),
|
||||
],
|
||||
)
|
||||
.unwrap(),
|
||||
]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
|
||||
@@ -5,16 +5,15 @@ use arrow_array::{RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::{
|
||||
connect,
|
||||
Result, Table, connect,
|
||||
embeddings::{
|
||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
||||
EmbeddingFunction,
|
||||
EmbeddingDefinition, EmbeddingFunction,
|
||||
sentence_transformers::SentenceTransformersEmbeddings,
|
||||
},
|
||||
query::{QueryBase, QueryExecutionOptions},
|
||||
Result, Table,
|
||||
};
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lancedb::connection::Connection;
|
||||
|
||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
use lancedb::{connect, DistanceType, Result, Table};
|
||||
use lancedb::{DistanceType, Result, Table, connect};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
@@ -51,19 +51,21 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
|
||||
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
vec![
|
||||
RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
],
|
||||
)
|
||||
.unwrap(),
|
||||
]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
|
||||
@@ -8,10 +8,9 @@ use std::{iter::once, sync::Arc};
|
||||
use arrow_array::{RecordBatch, StringArray};
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
connect,
|
||||
embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
||||
Result, connect,
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, openai::OpenAIEmbeddingFunction},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
Result,
|
||||
};
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
@@ -7,13 +7,12 @@ use arrow_array::{RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
connect,
|
||||
Result, connect,
|
||||
embeddings::{
|
||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
||||
EmbeddingFunction,
|
||||
EmbeddingDefinition, EmbeddingFunction,
|
||||
sentence_transformers::SentenceTransformersEmbeddings,
|
||||
},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
Result,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
|
||||
@@ -14,7 +14,7 @@ use futures::TryStreamExt;
|
||||
use lancedb::connection::Connection;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
use lancedb::{connect, Result, Table as LanceDbTable};
|
||||
use lancedb::{Result, Table as LanceDbTable, connect};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
|
||||
@@ -12,7 +12,7 @@ use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount};
|
||||
#[cfg(feature = "polars")]
|
||||
use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};
|
||||
|
||||
use crate::{error::Result, Error};
|
||||
use crate::{Error, error::Result};
|
||||
|
||||
/// An iterator of batches that also has a schema
|
||||
pub trait RecordBatchReader: Iterator<Item = Result<arrow_array::RecordBatch>> {
|
||||
|
||||
@@ -17,6 +17,7 @@ use lance_namespace::models::{
|
||||
#[cfg(feature = "aws")]
|
||||
use object_store::aws::AwsCredential;
|
||||
|
||||
use crate::Table;
|
||||
use crate::connection::create_table::CreateTableBuilder;
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::database::listing::ListingDatabase;
|
||||
@@ -31,7 +32,6 @@ use crate::remote::{
|
||||
client::ClientConfig,
|
||||
db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
|
||||
};
|
||||
use crate::Table;
|
||||
use lance::io::ObjectStoreParams;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
#[cfg(feature = "remote")]
|
||||
@@ -758,10 +758,10 @@ impl ConnectBuilder {
|
||||
options: &mut HashMap<String, String>,
|
||||
) {
|
||||
for (env_key, opt_key) in env_var_to_remote_storage_option {
|
||||
if let Ok(env_value) = std::env::var(env_key) {
|
||||
if !options.contains_key(*opt_key) {
|
||||
options.insert((*opt_key).to_string(), env_value);
|
||||
}
|
||||
if let Ok(env_value) = std::env::var(env_key)
|
||||
&& !options.contains_key(*opt_key)
|
||||
{
|
||||
options.insert((*opt_key).to_string(), env_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1011,14 +1011,13 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
#[test]
|
||||
fn test_apply_env_defaults() {
|
||||
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
|
||||
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
|
||||
let env_key = "PATH";
|
||||
let env_val = std::env::var(env_key).expect("PATH should be set in test environment");
|
||||
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
|
||||
std::env::set_var(env_key, env_val);
|
||||
|
||||
let mut options = HashMap::new();
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
|
||||
assert_eq!(Some(&env_val), options.get(opts_key));
|
||||
|
||||
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
|
||||
@@ -6,12 +6,12 @@ use std::sync::Arc;
|
||||
use lance_io::object_store::StorageOptionsProvider;
|
||||
|
||||
use crate::{
|
||||
Error, Result, Table,
|
||||
connection::{merge_storage_options, set_storage_options_provider},
|
||||
data::scannable::{Scannable, WithEmbeddingsScannable},
|
||||
database::{CreateTableMode, CreateTableRequest, Database},
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
|
||||
table::WriteOptions,
|
||||
Error, Result, Table,
|
||||
};
|
||||
|
||||
pub struct CreateTableBuilder {
|
||||
@@ -167,7 +167,7 @@ impl CreateTableBuilder {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_array::{
|
||||
record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator,
|
||||
Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, record_batch,
|
||||
};
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
@@ -380,11 +380,12 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
|
||||
assert!(db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.is_err()); // TODO: assert what this error is
|
||||
assert!(
|
||||
db.create_empty_table("test", other_schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.is_err()
|
||||
); // TODO: assert what this error is
|
||||
let overwritten = db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
|
||||
@@ -5,9 +5,9 @@ use std::collections::HashMap;
|
||||
|
||||
use arrow::compute::kernels::{aggregate::bool_and, length::length};
|
||||
use arrow_array::{
|
||||
Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
|
||||
cast::AsArray,
|
||||
types::{ArrowPrimitiveType, Int32Type, Int64Type},
|
||||
Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
|
||||
};
|
||||
use arrow_ord::cmp::eq;
|
||||
use arrow_schema::DataType;
|
||||
@@ -78,7 +78,7 @@ pub fn infer_vector_columns(
|
||||
_ => {
|
||||
return Err(Error::Schema {
|
||||
message: format!("Column {} is not a list", col_name),
|
||||
})
|
||||
});
|
||||
}
|
||||
} {
|
||||
if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) {
|
||||
@@ -102,8 +102,8 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
use arrow_array::{
|
||||
types::{Float32Type, Float64Type},
|
||||
FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray,
|
||||
types::{Float32Type, Float64Type},
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use std::{sync::Arc, vec};
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
use std::{iter::repeat_with, sync::Arc};
|
||||
|
||||
use arrow_array::{
|
||||
cast::AsArray,
|
||||
types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type},
|
||||
Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator,
|
||||
RecordBatchReader,
|
||||
cast::AsArray,
|
||||
types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type},
|
||||
};
|
||||
use arrow_cast::{can_cast_types, cast};
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
@@ -184,7 +184,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{
|
||||
FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int32Array, Int8Array,
|
||||
FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int32Array,
|
||||
RecordBatch, RecordBatchIterator, StringArray,
|
||||
};
|
||||
use arrow_schema::Field;
|
||||
|
||||
@@ -13,16 +13,16 @@ use crate::arrow::{
|
||||
SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream,
|
||||
};
|
||||
use crate::embeddings::{
|
||||
compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction,
|
||||
EmbeddingRegistry,
|
||||
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, compute_embeddings_for_batch,
|
||||
compute_output_schema,
|
||||
};
|
||||
use crate::table::{ColumnDefinition, ColumnKind, TableDefinition};
|
||||
use crate::{Error, Result};
|
||||
use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||
use arrow_schema::{ArrowError, SchemaRef};
|
||||
use async_trait::async_trait;
|
||||
use futures::stream::once;
|
||||
use futures::StreamExt;
|
||||
use futures::stream::once;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
|
||||
pub trait Scannable: Send {
|
||||
|
||||
@@ -19,12 +19,12 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
use lance_namespace::LanceNamespace;
|
||||
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::error::Result;
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::path::Path;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use lance::dataset::refs::Ref;
|
||||
use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
|
||||
use lance::dataset::{ReadParams, WriteMode, builder::DatasetBuilder};
|
||||
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_encoding::version::LanceFileVersion;
|
||||
@@ -1097,11 +1097,11 @@ impl Database for ListingDatabase {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::Table;
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::database::{CreateTableMode, CreateTableRequest};
|
||||
use crate::table::WriteOptions;
|
||||
use crate::Table;
|
||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -9,13 +9,13 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor};
|
||||
use lance_namespace::{
|
||||
LanceNamespace,
|
||||
models::{
|
||||
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
|
||||
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
|
||||
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
|
||||
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
},
|
||||
LanceNamespace,
|
||||
};
|
||||
use lance_namespace_impls::ConnectBuilder;
|
||||
use log::warn;
|
||||
|
||||
@@ -11,16 +11,16 @@ use lance_core::ROW_ID;
|
||||
use lance_datafusion::exec::SessionContextExt;
|
||||
|
||||
use crate::{
|
||||
Error, Result, Table,
|
||||
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
||||
connect,
|
||||
database::{CreateTableRequest, Database},
|
||||
dataloader::permutation::{
|
||||
shuffle::{Shuffler, ShufflerConfig},
|
||||
split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
|
||||
util::{rename_column, TemporaryDirectory},
|
||||
split::{SPLIT_ID_COLUMN, SplitStrategy, Splitter},
|
||||
util::{TemporaryDirectory, rename_column},
|
||||
},
|
||||
query::{ExecutableQuery, QueryBase, Select},
|
||||
Error, Result, Table,
|
||||
};
|
||||
|
||||
pub const SRC_ROW_ID_COL: &str = "row_id";
|
||||
|
||||
@@ -25,8 +25,8 @@ use futures::{StreamExt, TryStreamExt};
|
||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||
use lance::io::RecordBatchStream;
|
||||
use lance_arrow::RecordBatchExt;
|
||||
use lance_core::error::LanceOptionExt;
|
||||
use lance_core::ROW_ID;
|
||||
use lance_core::error::LanceOptionExt;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -500,10 +500,10 @@ mod tests {
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
use crate::{
|
||||
Table,
|
||||
arrow::SendableRecordBatchStream,
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
test_utils::datagen::{virtual_table, LanceDbDatagenExt},
|
||||
Table,
|
||||
test_utils::datagen::{LanceDbDatagenExt, virtual_table},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -18,12 +18,12 @@ use lance_io::{
|
||||
scheduler::{ScanScheduler, SchedulerConfig},
|
||||
utils::CachedFileSize,
|
||||
};
|
||||
use rand::{seq::SliceRandom, Rng, RngCore};
|
||||
use rand::{Rng, RngCore, seq::SliceRandom};
|
||||
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
dataloader::permutation::util::{non_crypto_rng, TemporaryDirectory},
|
||||
Error, Result,
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
dataloader::permutation::util::{TemporaryDirectory, non_crypto_rng},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -281,7 +281,7 @@ mod tests {
|
||||
use datafusion_expr::col;
|
||||
use futures::TryStreamExt;
|
||||
use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed};
|
||||
use rand::{rngs::SmallRng, SeedableRng};
|
||||
use rand::{SeedableRng, rngs::SmallRng};
|
||||
|
||||
fn test_gen() -> BatchGeneratorBuilder {
|
||||
lance_datagen::gen_batch()
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::sync::{
|
||||
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
||||
Arc,
|
||||
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
||||
};
|
||||
|
||||
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
|
||||
@@ -15,13 +15,13 @@ use lance_arrow::SchemaExt;
|
||||
use lance_core::ROW_ID;
|
||||
|
||||
use crate::{
|
||||
Error, Result,
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
dataloader::{
|
||||
permutation::shuffle::{Shuffler, ShufflerConfig},
|
||||
permutation::util::TemporaryDirectory,
|
||||
},
|
||||
query::{Query, QueryBase, Select},
|
||||
Error, Result,
|
||||
};
|
||||
|
||||
pub const SPLIT_ID_COLUMN: &str = "split_id";
|
||||
|
||||
@@ -7,12 +7,12 @@ use arrow_array::RecordBatch;
|
||||
use arrow_schema::{Fields, Schema};
|
||||
use datafusion_execution::disk_manager::DiskManagerMode;
|
||||
use futures::TryStreamExt;
|
||||
use rand::{rngs::SmallRng, RngCore, SeedableRng};
|
||||
use rand::{RngCore, SeedableRng, rngs::SmallRng};
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
Error, Result,
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
};
|
||||
|
||||
/// Directory to use for temporary files
|
||||
|
||||
@@ -23,9 +23,9 @@ use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
Error,
|
||||
error::Result,
|
||||
table::{ColumnDefinition, ColumnKind, TableDefinition},
|
||||
Error,
|
||||
};
|
||||
|
||||
/// Trait for embedding functions
|
||||
|
||||
@@ -8,7 +8,7 @@ use arrow::array::{AsArray, Float32Builder};
|
||||
use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
|
||||
use arrow_data::ArrayData;
|
||||
use arrow_schema::DataType;
|
||||
use serde_json::{json, Value};
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use super::EmbeddingFunction;
|
||||
use crate::{Error, Result};
|
||||
|
||||
@@ -8,9 +8,9 @@ use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
|
||||
use arrow_data::ArrayData;
|
||||
use arrow_schema::DataType;
|
||||
use async_openai::{
|
||||
Client,
|
||||
config::OpenAIConfig,
|
||||
types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat},
|
||||
Client,
|
||||
};
|
||||
use tokio::{runtime::Handle, task};
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use super::EmbeddingFunction;
|
||||
use arrow::{
|
||||
array::{AsArray, PrimitiveBuilder},
|
||||
datatypes::{
|
||||
ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt32Type, UInt8Type,
|
||||
ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt8Type, UInt32Type,
|
||||
},
|
||||
};
|
||||
use arrow_array::{Array, FixedSizeListArray, PrimitiveArray};
|
||||
@@ -16,8 +16,8 @@ use arrow_schema::DataType;
|
||||
use candle_core::{CpuStorage, Device, Layout, Storage, Tensor};
|
||||
use candle_nn::VarBuilder;
|
||||
use candle_transformers::models::bert::{BertModel, DTYPE};
|
||||
use hf_hub::{api::sync::Api, Repo, RepoType};
|
||||
use tokenizers::{tokenizer::Tokenizer, PaddingParams};
|
||||
use hf_hub::{Repo, RepoType, api::sync::Api};
|
||||
use tokenizers::{PaddingParams, tokenizer::Tokenizer};
|
||||
|
||||
/// Compute embeddings using huggingface sentence-transformers.
|
||||
pub struct SentenceTransformersEmbeddingsBuilder {
|
||||
@@ -230,7 +230,7 @@ impl SentenceTransformersEmbeddings {
|
||||
Storage::Cpu(CpuStorage::BF16(_)) => {
|
||||
return Err(crate::Error::Runtime {
|
||||
message: "unsupported data type".to_string(),
|
||||
})
|
||||
});
|
||||
}
|
||||
_ => unreachable!("we already moved the tensor to the CPU device"),
|
||||
};
|
||||
@@ -298,12 +298,12 @@ impl SentenceTransformersEmbeddings {
|
||||
DataType::Utf8View => {
|
||||
return Err(crate::Error::Runtime {
|
||||
message: "Utf8View not yet implemented".to_string(),
|
||||
})
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
return Err(crate::Error::Runtime {
|
||||
message: "invalid type".to_string(),
|
||||
})
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ pub use sql::expr_to_sql_string;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::DataType;
|
||||
use datafusion_expr::{expr_fn::cast, Expr, ScalarUDF};
|
||||
use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast};
|
||||
use datafusion_functions::string::expr_fn as string_expr_fn;
|
||||
|
||||
pub use datafusion_expr::{col, lit};
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::time::Duration;
|
||||
use vector::IvfFlatIndexBuilder;
|
||||
|
||||
use crate::index::vector::IvfRqIndexBuilder;
|
||||
use crate::{table::BaseTable, DistanceType, Error, Result};
|
||||
use crate::{DistanceType, Error, Result, table::BaseTable};
|
||||
|
||||
use self::{
|
||||
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
|
||||
|
||||
@@ -51,7 +51,7 @@ pub struct BitmapIndexBuilder {}
|
||||
#[derive(Debug, Clone, Default, serde::Serialize)]
|
||||
pub struct LabelListIndexBuilder {}
|
||||
|
||||
pub use lance_index::scalar::inverted::query::*;
|
||||
pub use lance_index::scalar::FullTextSearchQuery;
|
||||
pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder;
|
||||
pub use lance_index::scalar::InvertedIndexParams;
|
||||
pub use lance_index::scalar::inverted::query::*;
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use crate::Error;
|
||||
use crate::error::Result;
|
||||
use crate::table::BaseTable;
|
||||
use crate::Error;
|
||||
use log::debug;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::time::sleep;
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
|
||||
use std::{fmt::Formatter, sync::Arc};
|
||||
|
||||
use futures::{stream::BoxStream, TryFutureExt};
|
||||
use futures::{TryFutureExt, stream::BoxStream};
|
||||
use lance::io::WrappingObjectStore;
|
||||
use object_store::{
|
||||
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart,
|
||||
Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
@@ -10,8 +10,9 @@ use bytes::Bytes;
|
||||
use futures::stream::BoxStream;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use object_store::{
|
||||
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||
GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
||||
@@ -5,26 +5,26 @@ use std::sync::Arc;
|
||||
use std::{future::Future, time::Duration};
|
||||
|
||||
use arrow::compute::concat_batches;
|
||||
use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array};
|
||||
use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array};
|
||||
use arrow_schema::{DataType, SchemaRef};
|
||||
use datafusion_expr::Expr;
|
||||
use datafusion_physical_plan::ExecutionPlan;
|
||||
use futures::{stream, try_join, FutureExt, TryFutureExt, TryStreamExt};
|
||||
use futures::{FutureExt, TryFutureExt, TryStreamExt, stream, try_join};
|
||||
use half::f16;
|
||||
use lance::dataset::{scanner::DatasetRecordBatchStream, ROW_ID};
|
||||
use lance::dataset::{ROW_ID, scanner::DatasetRecordBatchStream};
|
||||
use lance_arrow::RecordBatchExt;
|
||||
use lance_datafusion::exec::execute_plan;
|
||||
use lance_index::scalar::inverted::SCORE_COL;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lance_index::scalar::inverted::SCORE_COL;
|
||||
use lance_index::vector::DIST_COL;
|
||||
use lance_io::stream::RecordBatchStreamAdapter;
|
||||
|
||||
use crate::DistanceType;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rerankers::rrf::RRFReranker;
|
||||
use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker};
|
||||
use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result};
|
||||
use crate::table::BaseTable;
|
||||
use crate::utils::TimeoutStream;
|
||||
use crate::DistanceType;
|
||||
use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};
|
||||
|
||||
mod hybrid;
|
||||
@@ -161,10 +161,11 @@ impl IntoQueryVector for &dyn Array {
|
||||
if data_type != self.data_type() {
|
||||
Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
|
||||
self.data_type(),
|
||||
data_type
|
||||
)})
|
||||
"failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
|
||||
self.data_type(),
|
||||
data_type
|
||||
),
|
||||
})
|
||||
} else {
|
||||
let data = self.to_data();
|
||||
Ok(make_array(data))
|
||||
@@ -186,7 +187,7 @@ impl IntoQueryVector for &[f16] {
|
||||
DataType::Float32 => {
|
||||
let arr: Vec<f32> = self.iter().map(|x| f32::from(*x)).collect();
|
||||
Ok(Arc::new(Float32Array::from(arr)))
|
||||
},
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let arr: Vec<f64> = self.iter().map(|x| f64::from(*x)).collect();
|
||||
Ok(Arc::new(Float64Array::from(arr)))
|
||||
@@ -194,8 +195,7 @@ impl IntoQueryVector for &[f16] {
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}",
|
||||
embedding_model_label,
|
||||
data_type
|
||||
embedding_model_label, data_type
|
||||
),
|
||||
}),
|
||||
}
|
||||
@@ -216,7 +216,7 @@ impl IntoQueryVector for &[f32] {
|
||||
DataType::Float32 => {
|
||||
let arr: Vec<f32> = self.to_vec();
|
||||
Ok(Arc::new(Float32Array::from(arr)))
|
||||
},
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let arr: Vec<f64> = self.iter().map(|x| *x as f64).collect();
|
||||
Ok(Arc::new(Float64Array::from(arr)))
|
||||
@@ -224,8 +224,7 @@ impl IntoQueryVector for &[f32] {
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}",
|
||||
embedding_model_label,
|
||||
data_type
|
||||
embedding_model_label, data_type
|
||||
),
|
||||
}),
|
||||
}
|
||||
@@ -239,26 +238,25 @@ impl IntoQueryVector for &[f64] {
|
||||
embedding_model_label: &str,
|
||||
) -> Result<Arc<dyn Array>> {
|
||||
match data_type {
|
||||
DataType::Float16 => {
|
||||
let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
|
||||
Ok(Arc::new(Float16Array::from(arr)))
|
||||
}
|
||||
DataType::Float32 => {
|
||||
let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
|
||||
Ok(Arc::new(Float32Array::from(arr)))
|
||||
},
|
||||
DataType::Float64 => {
|
||||
let arr: Vec<f64> = self.to_vec();
|
||||
Ok(Arc::new(Float64Array::from(arr)))
|
||||
}
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
|
||||
embedding_model_label,
|
||||
data_type
|
||||
),
|
||||
}),
|
||||
DataType::Float16 => {
|
||||
let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
|
||||
Ok(Arc::new(Float16Array::from(arr)))
|
||||
}
|
||||
DataType::Float32 => {
|
||||
let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
|
||||
Ok(Arc::new(Float32Array::from(arr)))
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let arr: Vec<f64> = self.to_vec();
|
||||
Ok(Arc::new(Float64Array::from(arr)))
|
||||
}
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
|
||||
embedding_model_label, data_type
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1011,13 +1009,13 @@ impl VectorQuery {
|
||||
message: "minimum_nprobes must be greater than 0".to_string(),
|
||||
});
|
||||
}
|
||||
if let Some(maximum_nprobes) = self.request.maximum_nprobes {
|
||||
if minimum_nprobes > maximum_nprobes {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "minimum_nprobes must be less than or equal to maximum_nprobes"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
if let Some(maximum_nprobes) = self.request.maximum_nprobes
|
||||
&& minimum_nprobes > maximum_nprobes
|
||||
{
|
||||
return Err(Error::InvalidInput {
|
||||
message: "minimum_nprobes must be less than or equal to maximum_nprobes"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
self.request.minimum_nprobes = minimum_nprobes;
|
||||
Ok(self)
|
||||
@@ -1407,8 +1405,8 @@ mod tests {
|
||||
use super::*;
|
||||
use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type};
|
||||
use arrow_array::{
|
||||
cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array,
|
||||
RecordBatch, StringArray,
|
||||
FixedSizeListArray, Float32Array, Int32Array, RecordBatch, StringArray, cast::AsArray,
|
||||
types::Float32Type,
|
||||
};
|
||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
@@ -1416,7 +1414,7 @@ mod tests {
|
||||
use rand::seq::IndexedRandom;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{connect, database::CreateTableMode, index::Index, Table};
|
||||
use crate::{Table, connect, database::CreateTableMode, index::Index};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_setters_getters() {
|
||||
@@ -1754,11 +1752,13 @@ mod tests {
|
||||
.limit(1)
|
||||
.execute()
|
||||
.await;
|
||||
assert!(error_result
|
||||
.err()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
.contains("No vector column found to match with the query vector dimension: 3"));
|
||||
assert!(
|
||||
error_result
|
||||
.err()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
.contains("No vector column found to match with the query vector dimension: 3")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -2010,7 +2010,7 @@ mod tests {
|
||||
|
||||
// Sample 1 - 3 tokens for each string value
|
||||
let tokens = ["a", "b", "c", "d", "e"];
|
||||
use rand::{rng, Rng};
|
||||
use rand::{Rng, rng};
|
||||
|
||||
let mut rng = rng();
|
||||
let text: StringArray = (0..nrows)
|
||||
|
||||
@@ -5,7 +5,7 @@ use arrow::compute::{
|
||||
kernels::numeric::{div, sub},
|
||||
max, min,
|
||||
};
|
||||
use arrow_array::{cast::downcast_array, Float32Array, RecordBatch};
|
||||
use arrow_array::{Float32Array, RecordBatch, cast::downcast_array};
|
||||
use arrow_schema::{DataType, Field, Schema, SortOptions};
|
||||
use lance::dataset::ROW_ID;
|
||||
use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL};
|
||||
@@ -253,7 +253,10 @@ mod test {
|
||||
let result = rank(batch.clone(), "bad_col", None);
|
||||
match result {
|
||||
Err(Error::InvalidInput { message }) => {
|
||||
assert_eq!("expected column bad_col not found in rank. found columns [\"name\", \"score\"]", message);
|
||||
assert_eq!(
|
||||
"expected column bad_col not found in rank. found columns [\"name\", \"score\"]",
|
||||
message
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("expected invalid input error, received {:?}", result)
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
use http::HeaderName;
|
||||
use log::debug;
|
||||
use reqwest::{
|
||||
header::{HeaderMap, HeaderValue},
|
||||
Body, Request, RequestBuilder, Response,
|
||||
header::{HeaderMap, HeaderValue},
|
||||
};
|
||||
use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration};
|
||||
|
||||
@@ -650,14 +650,13 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
pub fn extract_request_id(&self, request: &mut Request) -> String {
|
||||
// Set a request id.
|
||||
// TODO: allow the user to supply this, through middleware?
|
||||
let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
|
||||
if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
|
||||
request_id.to_str().unwrap().to_string()
|
||||
} else {
|
||||
let request_id = uuid::Uuid::new_v4().to_string();
|
||||
self.set_request_id(request, &request_id);
|
||||
request_id
|
||||
};
|
||||
request_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the request ID header
|
||||
|
||||
@@ -16,6 +16,7 @@ use lance_namespace::models::{
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
|
||||
use crate::Error;
|
||||
use crate::database::{
|
||||
CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
@@ -23,12 +24,11 @@ use crate::database::{
|
||||
use crate::error::Result;
|
||||
use crate::remote::util::stream_as_body;
|
||||
use crate::table::BaseTable;
|
||||
use crate::Error;
|
||||
|
||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
|
||||
use super::table::RemoteTable;
|
||||
use super::util::parse_server_version;
|
||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||
|
||||
// Request structure for the remote clone table API
|
||||
#[derive(serde::Serialize)]
|
||||
@@ -249,9 +249,9 @@ impl RemoteDatabase {
|
||||
#[cfg(all(test, feature = "remote"))]
|
||||
mod test_utils {
|
||||
use super::*;
|
||||
use crate::remote::ClientConfig;
|
||||
use crate::remote::client::test_utils::MockSender;
|
||||
use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config};
|
||||
use crate::remote::ClientConfig;
|
||||
|
||||
impl RemoteDatabase<MockSender> {
|
||||
pub fn new_mock<F, T>(handler: F) -> Self
|
||||
@@ -799,9 +799,9 @@ mod tests {
|
||||
|
||||
use crate::connection::ConnectBuilder;
|
||||
use crate::{
|
||||
database::CreateTableMode,
|
||||
remote::{ClientConfig, HeaderProvider, ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
|
||||
Connection, Error,
|
||||
database::CreateTableMode,
|
||||
remote::{ARROW_STREAM_CONTENT_TYPE, ClientConfig, HeaderProvider, JSON_CONTENT_TYPE},
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use crate::remote::RetryConfig;
|
||||
use crate::Error;
|
||||
use crate::remote::RetryConfig;
|
||||
use log::debug;
|
||||
use std::time::Duration;
|
||||
|
||||
|
||||
@@ -6,15 +6,14 @@ pub mod insert;
|
||||
use self::insert::RemoteInsertExec;
|
||||
use crate::expr::expr_to_sql_string;
|
||||
|
||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||
use super::client::RequestResultExt;
|
||||
use super::client::{HttpSend, RestfulLanceDbClient, Sender};
|
||||
use super::db::ServerVersion;
|
||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::index::waiter::wait_for_index;
|
||||
use crate::index::Index;
|
||||
use crate::index::IndexStatistics;
|
||||
use crate::index::waiter::wait_for_index;
|
||||
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
||||
use crate::table::query::create_multi_vector_plan;
|
||||
use crate::table::AddColumnsResult;
|
||||
use crate::table::AddResult;
|
||||
use crate::table::AlterColumnsResult;
|
||||
@@ -23,19 +22,20 @@ use crate::table::DropColumnsResult;
|
||||
use crate::table::MergeResult;
|
||||
use crate::table::Tags;
|
||||
use crate::table::UpdateResult;
|
||||
use crate::table::query::create_multi_vector_plan;
|
||||
use crate::table::{AnyQuery, Filter, TableStatistics};
|
||||
use crate::utils::background_cache::BackgroundCache;
|
||||
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
||||
use crate::{DistanceType, Error};
|
||||
use crate::{
|
||||
error::Result,
|
||||
index::{IndexBuilder, IndexConfig},
|
||||
query::QueryExecutionOptions,
|
||||
table::{
|
||||
merge::MergeInsertBuilder, AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats,
|
||||
TableDefinition, UpdateBuilder,
|
||||
AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, TableDefinition, UpdateBuilder,
|
||||
merge::MergeInsertBuilder,
|
||||
},
|
||||
};
|
||||
use crate::{DistanceType, Error};
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||
use arrow_ipc::reader::FileReader;
|
||||
use arrow_schema::{DataType, SchemaRef};
|
||||
@@ -50,7 +50,7 @@ use lance::arrow::json::{JsonDataType, JsonSchema};
|
||||
use lance::dataset::refs::TagContents;
|
||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
||||
use lance_datafusion::exec::{execute_plan, OneShotExec};
|
||||
use lance_datafusion::exec::{OneShotExec, execute_plan};
|
||||
use reqwest::{RequestBuilder, Response};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Number;
|
||||
@@ -612,8 +612,8 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
message: format!(
|
||||
"Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
|
||||
version
|
||||
)
|
||||
})
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -697,10 +697,10 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
Error::Retry { status_code, .. } => *status_code,
|
||||
_ => None,
|
||||
};
|
||||
if let Some(status_code) = status_code {
|
||||
if Self::should_invalidate_cache_for_status(status_code) {
|
||||
self.invalidate_schema_cache();
|
||||
}
|
||||
if let Some(status_code) = status_code
|
||||
&& Self::should_invalidate_cache_for_status(status_code)
|
||||
{
|
||||
self.invalidate_schema_cache();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -783,9 +783,9 @@ impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
|
||||
#[cfg(all(test, feature = "remote"))]
|
||||
mod test_utils {
|
||||
use super::*;
|
||||
use crate::remote::client::test_utils::client_with_handler;
|
||||
use crate::remote::client::test_utils::{client_with_handler_and_config, MockSender};
|
||||
use crate::remote::ClientConfig;
|
||||
use crate::remote::client::test_utils::client_with_handler;
|
||||
use crate::remote::client::test_utils::{MockSender, client_with_handler_and_config};
|
||||
|
||||
impl RemoteTable<MockSender> {
|
||||
pub fn new_mock<F, T>(name: String, handler: F, version: Option<semver::Version>) -> Self
|
||||
@@ -1251,13 +1251,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
0 => {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "No columns specified".into(),
|
||||
})
|
||||
});
|
||||
}
|
||||
1 => index.columns.pop().unwrap(),
|
||||
_ => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Indices over multiple columns not yet supported".into(),
|
||||
})
|
||||
});
|
||||
}
|
||||
};
|
||||
let mut body = serde_json::json!({
|
||||
@@ -1320,7 +1320,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
_ => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Index type not supported".into(),
|
||||
})
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1771,8 +1771,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Duration;
|
||||
use std::{collections::HashMap, pin::Pin};
|
||||
|
||||
@@ -1781,10 +1781,10 @@ mod tests {
|
||||
use crate::table::AddDataMode;
|
||||
|
||||
use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
|
||||
use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{future::BoxFuture, StreamExt, TryFutureExt};
|
||||
use futures::{StreamExt, TryFutureExt, future::BoxFuture};
|
||||
use lance_index::scalar::inverted::query::MatchQuery;
|
||||
use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
|
||||
use reqwest::Body;
|
||||
@@ -1794,14 +1794,14 @@ mod tests {
|
||||
use crate::index::vector::{
|
||||
IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
|
||||
};
|
||||
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
||||
use crate::remote::JSON_CONTENT_TYPE;
|
||||
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
||||
use crate::utils::background_cache::clock;
|
||||
use crate::{
|
||||
index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType},
|
||||
DistanceType, Error, Table,
|
||||
index::{Index, IndexStatistics, IndexType, vector::IvfPqIndexBuilder},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
remote::ARROW_FILE_CONTENT_TYPE,
|
||||
DistanceType, Error, Table,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
@@ -2030,11 +2030,13 @@ mod tests {
|
||||
.unwrap(),
|
||||
"/v1/table/my_table/insert/" => {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert!(request
|
||||
.url()
|
||||
.query_pairs()
|
||||
.filter(|(k, _)| k == "mode")
|
||||
.all(|(_, v)| v == "append"));
|
||||
assert!(
|
||||
request
|
||||
.url()
|
||||
.query_pairs()
|
||||
.filter(|(k, _)| k == "mode")
|
||||
.all(|(_, v)| v == "append")
|
||||
);
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
@@ -3592,7 +3594,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn _make_table_with_indices(unindexed_rows: usize) -> Table {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
|
||||
let response_body = match request.url().path() {
|
||||
@@ -3636,8 +3638,7 @@ mod tests {
|
||||
let body = serde_json::to_string(&response_body).unwrap();
|
||||
let status = if body == "null" { 404 } else { 200 };
|
||||
http::Response::builder().status(status).body(body).unwrap()
|
||||
});
|
||||
table
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3848,8 +3849,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_uri_caching() {
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
let call_count = Arc::new(AtomicUsize::new(0));
|
||||
let call_count_clone = call_count.clone();
|
||||
|
||||
@@ -16,12 +16,12 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Plan
|
||||
use futures::StreamExt;
|
||||
use http::header::CONTENT_TYPE;
|
||||
|
||||
use crate::Error;
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender};
|
||||
use crate::remote::table::RemoteTable;
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::table::datafusion::insert::COUNT_SCHEMA;
|
||||
use crate::table::AddResult;
|
||||
use crate::Error;
|
||||
use crate::table::datafusion::insert::COUNT_SCHEMA;
|
||||
|
||||
/// ExecutionPlan for inserting data into a remote LanceDB table.
|
||||
///
|
||||
@@ -309,12 +309,12 @@ mod tests {
|
||||
use arrow_schema::{DataType, Field, Schema as ArrowSchema};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_catalog::MemTable;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use crate::Table;
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::table::datafusion::BaseTableAdapter;
|
||||
use crate::Table;
|
||||
|
||||
fn schema_json() -> &'static str {
|
||||
r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"#
|
||||
|
||||
@@ -5,7 +5,7 @@ use arrow_ipc::CompressionType;
|
||||
use futures::{Stream, StreamExt};
|
||||
use reqwest::Response;
|
||||
|
||||
use crate::{arrow::SendableRecordBatchStream, Result};
|
||||
use crate::{Result, arrow::SendableRecordBatchStream};
|
||||
|
||||
use super::db::ServerVersion;
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ use async_trait::async_trait;
|
||||
use lance::dataset::ROW_ID;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rerankers::{Reranker, RELEVANCE_SCORE};
|
||||
use crate::rerankers::{RELEVANCE_SCORE, Reranker};
|
||||
|
||||
/// Reranks the results using Reciprocal Rank Fusion(RRF) algorithm based
|
||||
/// on the scores of vector and FTS search.
|
||||
|
||||
@@ -8,29 +8,29 @@ use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
use async_trait::async_trait;
|
||||
use datafusion_execution::TaskContext;
|
||||
use datafusion_expr::Expr;
|
||||
use datafusion_physical_plan::display::DisplayableExecutionPlan;
|
||||
use datafusion_physical_plan::ExecutionPlan;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use datafusion_physical_plan::display::DisplayableExecutionPlan;
|
||||
use futures::StreamExt;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use futures::stream::FuturesUnordered;
|
||||
pub use lance::dataset::ColumnAlteration;
|
||||
pub use lance::dataset::NewColumnTransform;
|
||||
pub use lance::dataset::ReadParams;
|
||||
pub use lance::dataset::Version;
|
||||
use lance::dataset::WriteMode;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::dataset::{InsertBuilder, WriteParams};
|
||||
use lance::index::vector::utils::infer_vector_dim;
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
use lance::index::vector::utils::infer_vector_dim;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_index::IndexType;
|
||||
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
|
||||
use lance_index::vector::bq::RQBuildParams;
|
||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
use lance_index::vector::sq::builder::SQBuildParams;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_index::IndexType;
|
||||
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
|
||||
pub use query::AnyQuery;
|
||||
|
||||
@@ -43,19 +43,19 @@ use std::format;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::data::scannable::{estimate_write_partitions, PeekedScannable, Scannable};
|
||||
use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions};
|
||||
use crate::database::Database;
|
||||
use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MemoryRegistry};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::VectorIndex;
|
||||
use crate::index::IndexStatistics;
|
||||
use crate::index::{vector::suggested_num_sub_vectors, Index, IndexBuilder};
|
||||
use crate::index::vector::VectorIndex;
|
||||
use crate::index::{Index, IndexBuilder, vector::suggested_num_sub_vectors};
|
||||
use crate::index::{IndexConfig, IndexStatisticsImpl};
|
||||
use crate::query::{IntoQueryVector, Query, QueryExecutionOptions, TakeQuery, VectorQuery};
|
||||
use crate::table::datafusion::insert::InsertExec;
|
||||
use crate::utils::{
|
||||
supported_bitmap_data_type, supported_btree_data_type, supported_fts_data_type,
|
||||
supported_label_list_data_type, supported_vector_data_type, PatchReadParam, PatchWriteParam,
|
||||
PatchReadParam, PatchWriteParam, supported_bitmap_data_type, supported_btree_data_type,
|
||||
supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type,
|
||||
};
|
||||
|
||||
use self::dataset::DatasetConsistencyWrapper;
|
||||
@@ -2555,22 +2555,21 @@ pub struct FragmentSummaryStats {
|
||||
#[cfg(test)]
|
||||
#[allow(deprecated)]
|
||||
mod tests {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use arrow_array::{
|
||||
builder::{ListBuilder, StringBuilder},
|
||||
Array, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray, RecordBatch,
|
||||
RecordBatchIterator, RecordBatchReader, StringArray,
|
||||
builder::{ListBuilder, StringBuilder},
|
||||
};
|
||||
use arrow_array::{BinaryArray, LargeBinaryArray};
|
||||
use arrow_data::ArrayDataBuilder;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use lance::Dataset;
|
||||
use rand::Rng;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
@@ -2777,9 +2776,8 @@ mod tests {
|
||||
false,
|
||||
)]));
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let float_arr = Float32Array::from(
|
||||
repeat_with(|| rng.gen::<f32>())
|
||||
repeat_with(rand::random::<f32>)
|
||||
.take(512 * dimension as usize)
|
||||
.collect::<Vec<f32>>(),
|
||||
);
|
||||
@@ -2884,8 +2882,8 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq;
|
||||
use lance::index::DatasetIndexInternalExt;
|
||||
use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq;
|
||||
use lance_index::metrics::NoOpMetricsCollector;
|
||||
use lance_index::vector::VectorIndex as LanceVectorIndex;
|
||||
|
||||
@@ -2933,9 +2931,8 @@ mod tests {
|
||||
false,
|
||||
)]));
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let float_arr = Float32Array::from(
|
||||
repeat_with(|| rng.gen::<f32>())
|
||||
repeat_with(rand::random::<f32>)
|
||||
.take(512 * dimension as usize)
|
||||
.collect::<Vec<f32>>(),
|
||||
);
|
||||
@@ -2993,9 +2990,8 @@ mod tests {
|
||||
false,
|
||||
)]));
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let float_arr = Float32Array::from(
|
||||
repeat_with(|| rng.gen::<f32>())
|
||||
repeat_with(rand::random::<f32>)
|
||||
.take(512 * dimension as usize)
|
||||
.collect::<Vec<f32>>(),
|
||||
);
|
||||
@@ -3256,16 +3252,20 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Can not create btree or bitmap index on list column
|
||||
assert!(table
|
||||
.create_index(&["tags"], Index::BTree(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.is_err());
|
||||
assert!(table
|
||||
.create_index(&["tags"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.is_err());
|
||||
assert!(
|
||||
table
|
||||
.create_index(&["tags"], Index::BTree(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
table
|
||||
.create_index(&["tags"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
// Create bitmap index on the "category" column
|
||||
table
|
||||
|
||||
@@ -7,8 +7,8 @@ use arrow_schema::{DataType, Fields, Schema};
|
||||
use lance::dataset::WriteMode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data::scannable::scannable_with_embeddings;
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::data::scannable::scannable_with_embeddings;
|
||||
use crate::embeddings::EmbeddingRegistry;
|
||||
use crate::table::datafusion::cast::cast_to_table_schema;
|
||||
use crate::table::datafusion::reject_nan::reject_nan_vectors;
|
||||
@@ -204,13 +204,14 @@ mod tests {
|
||||
|
||||
use arrow::datatypes::Float64Type;
|
||||
use arrow_array::{
|
||||
record_batch, FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray,
|
||||
RecordBatch, RecordBatchIterator,
|
||||
FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, RecordBatch,
|
||||
RecordBatchIterator, record_batch,
|
||||
};
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::{WriteMode, WriteParams};
|
||||
|
||||
use crate::Error;
|
||||
use crate::arrow::{SendableRecordBatchStream, SimpleRecordBatchStream};
|
||||
use crate::connect;
|
||||
use crate::data::scannable::Scannable;
|
||||
@@ -220,9 +221,8 @@ mod tests {
|
||||
use crate::query::{ExecutableQuery, QueryBase, Select};
|
||||
use crate::table::add_data::NaNVectorBehavior;
|
||||
use crate::table::{ColumnDefinition, ColumnKind, Table, TableDefinition, WriteOptions};
|
||||
use crate::test_utils::embeddings::MockEmbed;
|
||||
use crate::test_utils::TestCustomError;
|
||||
use crate::Error;
|
||||
use crate::test_utils::embeddings::MockEmbed;
|
||||
|
||||
use super::AddDataMode;
|
||||
|
||||
|
||||
@@ -17,17 +17,17 @@ use async_trait::async_trait;
|
||||
use datafusion_catalog::{Session, TableProvider};
|
||||
use datafusion_common::{DataFusionError, Result as DataFusionResult, Statistics};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown, TableType};
|
||||
use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType, dml::InsertOp};
|
||||
use datafusion_physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, stream::RecordBatchStreamAdapter,
|
||||
};
|
||||
use futures::{TryFutureExt, TryStreamExt};
|
||||
use lance::dataset::{WriteMode, WriteParams};
|
||||
|
||||
use super::{AnyQuery, BaseTable};
|
||||
use crate::{
|
||||
query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select},
|
||||
Result,
|
||||
query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select},
|
||||
};
|
||||
use arrow_schema::{DataType, Field};
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
@@ -268,7 +268,7 @@ impl TableProvider for BaseTableAdapter {
|
||||
InsertOp::Replace => {
|
||||
return Err(DataFusionError::NotImplemented(
|
||||
"Replace mode is not supported for LanceDB tables".to_string(),
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -300,13 +300,13 @@ pub mod tests {
|
||||
use datafusion_catalog::TableProvider;
|
||||
use datafusion_common::stats::Precision;
|
||||
use datafusion_execution::SendableRecordBatchStream;
|
||||
use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
|
||||
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, col, lit};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
connect,
|
||||
index::{scalar::BTreeIndexBuilder, Index},
|
||||
index::{Index, scalar::BTreeIndexBuilder},
|
||||
table::datafusion::BaseTableAdapter,
|
||||
};
|
||||
|
||||
|
||||
@@ -5,10 +5,10 @@ use std::sync::Arc;
|
||||
|
||||
use arrow_schema::{DataType, Field, FieldRef, Fields, Schema};
|
||||
use datafusion::functions::core::{get_field, named_struct};
|
||||
use datafusion_common::config::ConfigOptions;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_physical_expr::expressions::{cast, Literal};
|
||||
use datafusion_common::config::ConfigOptions;
|
||||
use datafusion_physical_expr::ScalarFunctionExpr;
|
||||
use datafusion_physical_expr::expressions::{Literal, cast};
|
||||
use datafusion_physical_plan::expressions::Column;
|
||||
use datafusion_physical_plan::projection::ProjectionExec;
|
||||
use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr};
|
||||
|
||||
@@ -16,9 +16,9 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use datafusion_physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
|
||||
};
|
||||
use lance::Dataset;
|
||||
use lance::dataset::transaction::{Operation, Transaction};
|
||||
use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams};
|
||||
use lance::Dataset;
|
||||
use lance_table::format::Fragment;
|
||||
|
||||
use crate::table::dataset::DatasetConsistencyWrapper;
|
||||
@@ -195,13 +195,13 @@ impl ExecutionPlan for InsertExec {
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(transactions) = to_commit {
|
||||
if let Some(merged_txn) = merge_transactions(transactions) {
|
||||
let new_dataset = CommitBuilder::new(dataset.clone())
|
||||
.execute(merged_txn)
|
||||
.await?;
|
||||
ds_wrapper.update(new_dataset);
|
||||
}
|
||||
if let Some(transactions) = to_commit
|
||||
&& let Some(merged_txn) = merge_transactions(transactions)
|
||||
{
|
||||
let new_dataset = CommitBuilder::new(dataset.clone())
|
||||
.execute(merged_txn)
|
||||
.await?;
|
||||
ds_wrapper.update(new_dataset);
|
||||
}
|
||||
|
||||
Ok(RecordBatch::try_new(
|
||||
@@ -222,7 +222,7 @@ mod tests {
|
||||
use std::vec;
|
||||
|
||||
use super::*;
|
||||
use arrow_array::{record_batch, RecordBatchIterator};
|
||||
use arrow_array::{RecordBatchIterator, record_batch};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_catalog::MemTable;
|
||||
use tempfile::tempdir;
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
use core::fmt;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use datafusion_common::{stats::Precision, DataFusionError, Result as DFResult, Statistics};
|
||||
use datafusion_common::{DataFusionError, Result as DFResult, Statistics, stats::Precision};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
|
||||
use datafusion_physical_plan::{
|
||||
execution_plan::EmissionType, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, execution_plan::EmissionType,
|
||||
};
|
||||
|
||||
use crate::{arrow::SendableRecordBatchStreamExt, data::scannable::Scannable};
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::sync::Arc;
|
||||
|
||||
use datafusion::catalog::TableFunctionImpl;
|
||||
use datafusion_catalog::TableProvider;
|
||||
use datafusion_common::{plan_err, DataFusionError, Result as DataFusionResult, ScalarValue};
|
||||
use datafusion_common::{DataFusionError, Result as DataFusionResult, ScalarValue, plan_err};
|
||||
use datafusion_expr::Expr;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
|
||||
@@ -93,9 +93,9 @@ pub fn from_json(json: &str) -> crate::Result<lance_index::scalar::inverted::que
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
index::{scalar::FtsIndexBuilder, Index},
|
||||
table::datafusion::BaseTableAdapter,
|
||||
Connection, Table,
|
||||
index::{Index, scalar::FtsIndexBuilder},
|
||||
table::datafusion::BaseTableAdapter,
|
||||
};
|
||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema as ArrowSchema};
|
||||
@@ -212,10 +212,10 @@ mod tests {
|
||||
let explain_results = explain_df.collect().await.unwrap();
|
||||
for batch in &explain_results {
|
||||
for row_idx in 0..batch.num_rows() {
|
||||
if let Some(col) = batch.column_by_name("plan") {
|
||||
if let Some(plan_str) = col.as_any().downcast_ref::<StringArray>() {
|
||||
println!("{}", plan_str.value(row_idx));
|
||||
}
|
||||
if let Some(col) = batch.column_by_name("plan")
|
||||
&& let Some(plan_str) = col.as_any().downcast_ref::<StringArray>()
|
||||
{
|
||||
println!("{}", plan_str.value(row_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -229,10 +229,10 @@ mod tests {
|
||||
let explain_analyze_results = explain_analyze_df.collect().await.unwrap();
|
||||
for batch in &explain_analyze_results {
|
||||
for row_idx in 0..batch.num_rows() {
|
||||
if let Some(col) = batch.column_by_name("plan") {
|
||||
if let Some(plan_str) = col.as_any().downcast_ref::<StringArray>() {
|
||||
println!("{}", plan_str.value(row_idx));
|
||||
}
|
||||
if let Some(col) = batch.column_by_name("plan")
|
||||
&& let Some(plan_str) = col.as_any().downcast_ref::<StringArray>()
|
||||
{
|
||||
println!("{}", plan_str.value(row_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ use std::{
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use lance::{dataset::refs, Dataset};
|
||||
use lance::{Dataset, dataset::refs};
|
||||
|
||||
use crate::{error::Result, utils::background_cache::BackgroundCache, Error};
|
||||
use crate::{Error, error::Result, utils::background_cache::BackgroundCache};
|
||||
|
||||
/// A wrapper around a [Dataset] that provides consistency checks.
|
||||
///
|
||||
|
||||
@@ -36,7 +36,7 @@ pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Resu
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::connect;
|
||||
use arrow_array::{record_batch, Int32Array, RecordBatch};
|
||||
use arrow_array::{Int32Array, RecordBatch, record_batch};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use lance::dataset::cleanup::RemovalStats;
|
||||
use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions};
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
use lance::dataset::optimize::{CompactionMetrics, IndexRemapperOptions, compact_files};
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
use log::info;
|
||||
|
||||
pub use chrono::Duration;
|
||||
@@ -213,7 +213,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::connect;
|
||||
use crate::index::{scalar::BTreeIndexBuilder, Index};
|
||||
use crate::index::{Index, scalar::BTreeIndexBuilder};
|
||||
use crate::query::ExecutableQuery;
|
||||
use crate::table::{CompactionOptions, OptimizeAction, OptimizeStats};
|
||||
use futures::TryStreamExt;
|
||||
|
||||
@@ -7,26 +7,26 @@ use super::NativeTable;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::expr::expr_to_sql_string;
|
||||
use crate::query::{
|
||||
QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, DEFAULT_TOP_K,
|
||||
DEFAULT_TOP_K, QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest,
|
||||
};
|
||||
use crate::utils::{default_vector_column, TimeoutStream};
|
||||
use crate::utils::{TimeoutStream, default_vector_column};
|
||||
use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder};
|
||||
use arrow::datatypes::{Float32Type, UInt8Type};
|
||||
use arrow_array::Array;
|
||||
use arrow_schema::{DataType, Schema};
|
||||
use datafusion_physical_plan::ExecutionPlan;
|
||||
use datafusion_physical_plan::projection::ProjectionExec;
|
||||
use datafusion_physical_plan::repartition::RepartitionExec;
|
||||
use datafusion_physical_plan::union::UnionExec;
|
||||
use datafusion_physical_plan::ExecutionPlan;
|
||||
use futures::future::try_join_all;
|
||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||
use lance::dataset::scanner::Scanner;
|
||||
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_namespace::models::{
|
||||
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
|
||||
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
|
||||
};
|
||||
use lance_namespace::LanceNamespace;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AnyQuery {
|
||||
|
||||
@@ -92,7 +92,7 @@ pub(crate) async fn execute_drop_columns(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_array::{record_batch, Int32Array, StringArray};
|
||||
use arrow_array::{Int32Array, StringArray, record_batch};
|
||||
use arrow_schema::DataType;
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::ColumnAlteration;
|
||||
|
||||
@@ -115,9 +115,9 @@ mod tests {
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, Select};
|
||||
use arrow_array::{
|
||||
record_batch, Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array,
|
||||
Float64Array, Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray,
|
||||
TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array,
|
||||
Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, Float64Array,
|
||||
Int32Array, Int64Array, LargeStringArray, RecordBatch, StringArray,
|
||||
TimestampMillisecondArray, TimestampNanosecondArray, UInt32Array, record_batch,
|
||||
};
|
||||
use arrow_data::ArrayDataBuilder;
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit};
|
||||
|
||||
@@ -10,9 +10,9 @@ use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::{Child, ChildStdout, Command};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::{connect, Connection};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use crate::{Connection, connect};
|
||||
use anyhow::{Result, anyhow, bail};
|
||||
use tempfile::{TempDir, tempdir};
|
||||
|
||||
pub struct TestConnection {
|
||||
pub uri: String,
|
||||
@@ -113,8 +113,13 @@ async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
|
||||
let (port_sender, mut port_receiver) = mpsc::channel(5);
|
||||
let _reader = spawn_stdout_reader(stdout, port_sender).await;
|
||||
let port = match port_receiver.recv().await {
|
||||
None => bail!("Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon."),
|
||||
Some(Err(err)) => bail!("Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", err),
|
||||
None => bail!(
|
||||
"Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon."
|
||||
),
|
||||
Some(Err(err)) => bail!(
|
||||
"Unable to determine the port number used by the phalanx process we spawned, because of an error, {}",
|
||||
err
|
||||
),
|
||||
Some(Ok(port)) => port,
|
||||
};
|
||||
let uri = "db://test";
|
||||
|
||||
@@ -6,8 +6,9 @@ use futures::TryStreamExt;
|
||||
use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount};
|
||||
|
||||
use crate::{
|
||||
Error, Table,
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
connect, Error, Table,
|
||||
connect,
|
||||
};
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -6,8 +6,8 @@ use std::{borrow::Cow, sync::Arc};
|
||||
use arrow_array::{Array, FixedSizeListArray, Float32Array};
|
||||
use arrow_schema::{DataType, Field};
|
||||
|
||||
use crate::embeddings::EmbeddingFunction;
|
||||
use crate::Result;
|
||||
use crate::embeddings::EmbeddingFunction;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MockEmbed {
|
||||
|
||||
@@ -9,8 +9,8 @@ use std::future::Future;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::{BoxFuture, Shared};
|
||||
use futures::FutureExt;
|
||||
use futures::future::{BoxFuture, Shared};
|
||||
|
||||
type SharedFut<V, E> = Shared<BoxFuture<'static, Result<V, Arc<E>>>>;
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ impl PatchStoreParam for Option<ObjectStoreParams> {
|
||||
|
||||
pub trait PatchWriteParam {
|
||||
fn patch_with_store_wrapper(self, wrapper: Arc<dyn WrappingObjectStore>)
|
||||
-> Result<WriteParams>;
|
||||
-> Result<WriteParams>;
|
||||
}
|
||||
|
||||
impl PatchWriteParam for WriteParams {
|
||||
@@ -340,7 +340,7 @@ mod tests {
|
||||
use arrow_array::Int32Array;
|
||||
use arrow_schema::Field;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::{stream, StreamExt};
|
||||
use futures::{StreamExt, stream};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use super::*;
|
||||
@@ -351,10 +351,12 @@ mod tests {
|
||||
Field::new("id", DataType::Int16, true),
|
||||
Field::new("tag", DataType::Utf8, false),
|
||||
]);
|
||||
assert!(default_vector_column(&schema_no_vector, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("No vector column"));
|
||||
assert!(
|
||||
default_vector_column(&schema_no_vector, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("No vector column")
|
||||
);
|
||||
|
||||
let schema_with_vec_col = Schema::new(vec![
|
||||
Field::new("id", DataType::Int16, true),
|
||||
@@ -382,10 +384,12 @@ mod tests {
|
||||
false,
|
||||
),
|
||||
]);
|
||||
assert!(default_vector_column(&multi_vec_col, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("More than one"));
|
||||
assert!(
|
||||
default_vector_column(&multi_vec_col, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("More than one")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -501,10 +505,12 @@ mod tests {
|
||||
// Poll the stream again and ensure it returns a timeout error
|
||||
let second_result = timeout_stream.next().await.unwrap();
|
||||
assert!(second_result.is_err());
|
||||
assert!(second_result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Query timeout"));
|
||||
assert!(
|
||||
second_result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Query timeout")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -15,10 +15,9 @@ use arrow_array::{
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
connect,
|
||||
Error, Result, connect,
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
|
||||
query::ExecutableQuery,
|
||||
Error, Result,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
@@ -262,17 +261,19 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
|
||||
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(StringArray::from_iter(std::iter::repeat_n(
|
||||
Some("hello world".to_string()),
|
||||
TOTAL,
|
||||
))),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
vec![
|
||||
RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(StringArray::from_iter(std::iter::repeat_n(
|
||||
Some("hello world".to_string()),
|
||||
TOTAL,
|
||||
))),
|
||||
],
|
||||
)
|
||||
.unwrap(),
|
||||
]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema.clone(),
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc,
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
},
|
||||
time::Duration,
|
||||
};
|
||||
@@ -16,8 +16,8 @@ use arrow_array::{
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use lancedb::{
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, MaybeEmbedded, WithEmbeddings},
|
||||
Error, Result,
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, MaybeEmbedded, WithEmbeddings},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -8,7 +8,7 @@ use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use aws_config::{BehaviorVersion, ConfigLoader, Region, SdkConfig};
|
||||
use aws_sdk_s3::{config::Credentials, types::ServerSideEncryption, Client as S3Client};
|
||||
use aws_sdk_s3::{Client as S3Client, config::Credentials, types::ServerSideEncryption};
|
||||
use lancedb::Result;
|
||||
|
||||
const CONFIG: &[(&str, &str)] = &[
|
||||
|
||||
Reference in New Issue
Block a user