mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-17 11:10:40 +00:00
Compare commits
1 Commits
yang/appro
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2e0aa0588 |
85
Cargo.lock
generated
85
Cargo.lock
generated
@@ -3432,8 +3432,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.4",
|
||||
@@ -4735,8 +4735,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -4810,8 +4810,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4832,7 +4832,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "lance-arrow-scalar"
|
||||
version = "58.0.0"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4846,7 +4846,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "lance-arrow-stats"
|
||||
version = "58.0.0"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4855,8 +4855,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4865,8 +4865,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4904,8 +4904,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4935,8 +4935,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4953,8 +4953,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-derive"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -4963,8 +4963,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4999,8 +4999,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -5030,8 +5030,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -5083,6 +5083,7 @@ dependencies = [
|
||||
"rand_distr 0.5.1",
|
||||
"rangemap",
|
||||
"rayon",
|
||||
"regex-syntax",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -5095,8 +5096,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -5137,8 +5138,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -5153,8 +5154,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -5166,8 +5167,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -5207,9 +5208,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-reqwest-client"
|
||||
version = "0.8.5"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d287494559c22838ce34e51ea0fa29dc780d5be8283de5ab33e9395623000c8"
|
||||
checksum = "ba3f0a235e3ed5f8805205649ccc7d7d0f3df23ce1294242c9265ad488d7f19d"
|
||||
dependencies = [
|
||||
"reqwest 0.12.28",
|
||||
"serde",
|
||||
@@ -5221,8 +5222,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-select"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -5237,8 +5238,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -5277,8 +5278,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -5291,8 +5292,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-tokenizer"
|
||||
version = "8.0.0-beta.14"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.14#c188de59fcf0976a0a9fef53ae67ae7ae8bcb61a"
|
||||
version = "8.0.0-beta.16"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.16#6e734df607f2841fe3bba82f05a90f3174933bab"
|
||||
dependencies = [
|
||||
"icu_segmenter",
|
||||
"jieba-rs",
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=8.0.0-beta.14", default-features = false, "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=8.0.0-beta.14", default-features = false, "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=8.0.0-beta.14", default-features = false, "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=8.0.0-beta.14", "tag" = "v8.0.0-beta.14", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=8.0.0-beta.16", default-features = false, "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=8.0.0-beta.16", default-features = false, "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=8.0.0-beta.16", default-features = false, "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=8.0.0-beta.16", "tag" = "v8.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>8.0.0-beta.14</lance-core.version>
|
||||
<lance-core.version>8.0.0-beta.16</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -184,13 +184,12 @@ pub mod table;
|
||||
pub mod test_utils;
|
||||
pub mod utils;
|
||||
|
||||
use std::{fmt::Display, str::FromStr};
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use connection::{ConnectNamespaceBuilder, Connection};
|
||||
pub use error::{Error, Result};
|
||||
use lance_index::vector::ApproxMode as LanceApproxMode;
|
||||
use lance_linalg::distance::DistanceType as LanceDistanceType;
|
||||
pub use table::Table;
|
||||
|
||||
@@ -259,79 +258,6 @@ impl Display for DistanceType {
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls the speed / accuracy tradeoff for approximate vector search.
|
||||
///
|
||||
/// This currently only affects RQ-quantized vector indexes, such as IVF_RQ.
|
||||
/// Other index types ignore this setting.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
#[non_exhaustive]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ApproxMode {
|
||||
/// Prefer lower query latency, which can reduce recall.
|
||||
Fast,
|
||||
/// Use the default balance between query latency and recall.
|
||||
#[default]
|
||||
Normal,
|
||||
/// Prefer higher recall, which can increase query latency.
|
||||
Accurate,
|
||||
}
|
||||
|
||||
impl From<ApproxMode> for LanceApproxMode {
|
||||
fn from(value: ApproxMode) -> Self {
|
||||
match value {
|
||||
ApproxMode::Fast => Self::Fast,
|
||||
ApproxMode::Normal => Self::Normal,
|
||||
ApproxMode::Accurate => Self::Accurate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LanceApproxMode> for ApproxMode {
|
||||
fn from(value: LanceApproxMode) -> Self {
|
||||
match value {
|
||||
LanceApproxMode::Fast => Self::Fast,
|
||||
LanceApproxMode::Normal => Self::Normal,
|
||||
LanceApproxMode::Accurate => Self::Accurate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for ApproxMode {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &str) -> std::prelude::v1::Result<Self, Self::Error> {
|
||||
Self::from_str(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for ApproxMode {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(value: &str) -> std::prelude::v1::Result<Self, Self::Err> {
|
||||
match value.to_ascii_lowercase().as_str() {
|
||||
"fast" => Ok(Self::Fast),
|
||||
"normal" => Ok(Self::Normal),
|
||||
"accurate" => Ok(Self::Accurate),
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"approx_mode must be one of 'fast', 'normal', or 'accurate', got '{}'",
|
||||
value
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ApproxMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Fast => write!(f, "fast"),
|
||||
Self::Normal => write!(f, "normal"),
|
||||
Self::Accurate => write!(f, "accurate"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Connect to a database
|
||||
pub use connection::connect;
|
||||
/// Connect to a namespace-backed database
|
||||
|
||||
@@ -20,12 +20,12 @@ use lance_index::scalar::FullTextSearchQuery;
|
||||
use lance_index::scalar::inverted::SCORE_COL;
|
||||
use lance_index::vector::DIST_COL;
|
||||
|
||||
use crate::DistanceType;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::rerankers::rrf::RRFReranker;
|
||||
use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result};
|
||||
use crate::table::BaseTable;
|
||||
use crate::utils::{MaxBatchLengthStream, TimeoutStream};
|
||||
use crate::{ApproxMode, DistanceType};
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
table::AnyQuery,
|
||||
@@ -935,8 +935,6 @@ pub struct VectorQueryRequest {
|
||||
pub refine_factor: Option<u32>,
|
||||
/// The distance type to use for the search
|
||||
pub distance_type: Option<DistanceType>,
|
||||
/// The speed / accuracy tradeoff to use for approximate vector search
|
||||
pub approx_mode: Option<ApproxMode>,
|
||||
/// Default is true. Set to false to enforce a brute force search.
|
||||
pub use_index: bool,
|
||||
}
|
||||
@@ -954,7 +952,6 @@ impl Default for VectorQueryRequest {
|
||||
ef: None,
|
||||
refine_factor: None,
|
||||
distance_type: None,
|
||||
approx_mode: None,
|
||||
use_index: true,
|
||||
}
|
||||
}
|
||||
@@ -1195,15 +1192,6 @@ impl VectorQuery {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the speed / accuracy tradeoff for approximate vector search.
|
||||
///
|
||||
/// This setting is currently only used by RQ-quantized indexes, such as
|
||||
/// IVF_RQ. Other index types ignore this setting.
|
||||
pub fn approx_mode(mut self, approx_mode: ApproxMode) -> Self {
|
||||
self.request.approx_mode = Some(approx_mode);
|
||||
self
|
||||
}
|
||||
|
||||
/// If this is called then any vector index is skipped
|
||||
///
|
||||
/// An exhaustive (flat) search will be performed. The query vector will
|
||||
@@ -1558,7 +1546,6 @@ mod tests {
|
||||
.nprobes(1000)
|
||||
.postfilter()
|
||||
.distance_type(DistanceType::Cosine)
|
||||
.approx_mode(ApproxMode::Accurate)
|
||||
.refine_factor(999);
|
||||
|
||||
assert_eq!(
|
||||
@@ -1577,49 +1564,9 @@ mod tests {
|
||||
assert_eq!(query.request.maximum_nprobes, Some(1000));
|
||||
assert!(query.request.use_index);
|
||||
assert_eq!(query.request.distance_type, Some(DistanceType::Cosine));
|
||||
assert_eq!(query.request.approx_mode, Some(ApproxMode::Accurate));
|
||||
assert_eq!(query.request.refine_factor, Some(999));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_approx_mode_serde_parse_default_and_display() {
|
||||
assert_eq!(ApproxMode::default(), ApproxMode::Normal);
|
||||
assert_eq!(
|
||||
serde_json::to_string(&ApproxMode::Fast).unwrap(),
|
||||
"\"fast\""
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::from_str::<ApproxMode>("\"accurate\"").unwrap(),
|
||||
ApproxMode::Accurate
|
||||
);
|
||||
assert_eq!("normal".parse::<ApproxMode>().unwrap(), ApproxMode::Normal);
|
||||
assert_eq!(ApproxMode::try_from("FAST").unwrap(), ApproxMode::Fast);
|
||||
assert_eq!(ApproxMode::Accurate.to_string(), "accurate");
|
||||
assert!(ApproxMode::try_from("invalid").is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_vector_query_approx_mode_builder() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test.lance");
|
||||
let uri = dataset_path.to_str().unwrap();
|
||||
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", make_test_batches())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let query = table
|
||||
.query()
|
||||
.nearest_to(&[0.1, 0.2])
|
||||
.unwrap()
|
||||
.approx_mode(ApproxMode::Fast);
|
||||
|
||||
assert_eq!(query.request.approx_mode, Some(ApproxMode::Fast));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute() {
|
||||
// TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
|
||||
|
||||
@@ -706,9 +706,6 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
if let Some(distance_type) = query.distance_type {
|
||||
body["distance_type"] = serde_json::json!(distance_type);
|
||||
}
|
||||
if let Some(approx_mode) = query.approx_mode {
|
||||
body["approx_mode"] = serde_json::json!(approx_mode);
|
||||
}
|
||||
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
|
||||
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
|
||||
// New client / old server: old server will only see nprobes, make sure to set both
|
||||
@@ -3613,61 +3610,6 @@ mod tests {
|
||||
assert_eq!(data[0].as_ref().unwrap(), &expected_data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_vector_approx_mode_sent_when_set() {
|
||||
let expected_data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let expected_data_ref = expected_data.clone();
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/query/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"prefilter": true,
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
"approx_mode": "accurate",
|
||||
"lower_bound": Option::<f32>::None,
|
||||
"upper_bound": Option::<f32>::None,
|
||||
"k": 10,
|
||||
"ef": Option::<usize>::None,
|
||||
"refine_factor": null,
|
||||
"version": null,
|
||||
});
|
||||
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
||||
assert_eq!(body, expected_body);
|
||||
|
||||
let response_body = write_ipc_file(&expected_data_ref);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
|
||||
.body(response_body)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let data = table
|
||||
.query()
|
||||
.nearest_to(vec![0.1, 0.2, 0.3])
|
||||
.unwrap()
|
||||
.approx_mode(crate::ApproxMode::Accurate)
|
||||
.execute()
|
||||
.await;
|
||||
let data = data.unwrap().collect::<Vec<_>>().await;
|
||||
assert_eq!(data.len(), 1);
|
||||
assert_eq!(data[0].as_ref().unwrap(), &expected_data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_fts_default_values() {
|
||||
let expected_data = RecordBatch::try_new(
|
||||
|
||||
@@ -44,35 +44,17 @@ pub async fn execute_query(
|
||||
// QueryTable pushdown runs the query server-side, but only on the main
|
||||
// branch: the namespace request carries no branch yet, so a branch handle
|
||||
// must fall through to local execution.
|
||||
if can_execute_namespace_query(table, query)
|
||||
if table
|
||||
.pushdown_operations
|
||||
.contains(&NamespaceClientPushdownOperation::QueryTable)
|
||||
&& let Some(ref namespace_client) = table.namespace_client
|
||||
&& table.dataset.current_branch().is_none()
|
||||
{
|
||||
return execute_namespace_query(table, namespace_client.clone(), query, options).await;
|
||||
}
|
||||
execute_generic_query(table, query, options).await
|
||||
}
|
||||
|
||||
fn can_execute_namespace_query(table: &NativeTable, query: &AnyQuery) -> bool {
|
||||
table
|
||||
.pushdown_operations
|
||||
.contains(&NamespaceClientPushdownOperation::QueryTable)
|
||||
&& table.namespace_client.is_some()
|
||||
&& table.dataset.current_branch().is_none()
|
||||
&& !requires_local_namespace_execution(query)
|
||||
}
|
||||
|
||||
fn requires_local_namespace_execution(query: &AnyQuery) -> bool {
|
||||
// The namespace QueryTable request has no approx_mode field yet, so
|
||||
// pushing this query down would silently ignore the user's setting.
|
||||
matches!(
|
||||
query,
|
||||
AnyQuery::VectorQuery(VectorQueryRequest {
|
||||
approx_mode: Some(_),
|
||||
..
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn analyze_query_plan(
|
||||
table: &NativeTable,
|
||||
query: &AnyQuery,
|
||||
@@ -185,10 +167,6 @@ pub async fn create_plan(
|
||||
scanner.nearest(&column, query_vector.as_ref(), top_k)?;
|
||||
}
|
||||
|
||||
if let Some(approx_mode) = query.approx_mode {
|
||||
scanner.approx_mode(approx_mode.into());
|
||||
}
|
||||
|
||||
scanner.minimum_nprobes(query.minimum_nprobes);
|
||||
if let Some(maximum_nprobes) = query.maximum_nprobes {
|
||||
scanner.maximum_nprobes(maximum_nprobes);
|
||||
@@ -609,20 +587,12 @@ async fn parse_arrow_ipc_response(bytes: bytes::Bytes) -> Result<DatasetRecordBa
|
||||
#[cfg(test)]
|
||||
#[allow(deprecated)]
|
||||
mod tests {
|
||||
use arrow_array::{ArrayRef, FixedSizeListArray, Float32Array};
|
||||
use arrow_array::Float32Array;
|
||||
use futures::TryStreamExt;
|
||||
use lance_arrow::FixedSizeListArrayExt;
|
||||
use std::sync::{
|
||||
Arc,
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::query::{QueryExecutionOptions, QueryRequest};
|
||||
|
||||
fn fixed_size_list_array(values: Vec<f32>, dimension: i32) -> FixedSizeListArray {
|
||||
FixedSizeListArray::try_new_from_values(Float32Array::from(values), dimension).unwrap()
|
||||
}
|
||||
use crate::query::QueryExecutionOptions;
|
||||
|
||||
#[test]
|
||||
fn test_convert_to_namespace_query_vector() {
|
||||
@@ -745,80 +715,6 @@ mod tests {
|
||||
assert_eq!(count, 2); // 4 and 5
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct CountingNamespaceClient {
|
||||
query_table_calls: AtomicUsize,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl LanceNamespace for CountingNamespaceClient {
|
||||
fn namespace_id(&self) -> String {
|
||||
"counting".to_string()
|
||||
}
|
||||
|
||||
async fn query_table(&self, _request: NsQueryTableRequest) -> lance::Result<bytes::Bytes> {
|
||||
self.query_table_calls.fetch_add(1, Ordering::SeqCst);
|
||||
panic!("approx_mode queries must not be pushed down to namespace query_table");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_query_approx_mode_with_namespace_pushdown_runs_locally() {
|
||||
use crate::connect;
|
||||
use crate::table::query::execute_query;
|
||||
use arrow_array::{Int32Array, RecordBatch};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
let conn = connect("memory://").execute().await.unwrap();
|
||||
|
||||
let vectors = Arc::new(fixed_size_list_array(
|
||||
vec![0.0, 0.0, 10.0, 10.0, 20.0, 20.0],
|
||||
2,
|
||||
));
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("vector", vectors.data_type().clone(), false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema,
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3])), vectors],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = conn
|
||||
.create_table("test_approx_mode_namespace_fallback", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace_client = Arc::new(CountingNamespaceClient::default());
|
||||
let mut native_table = table.as_native().unwrap().clone();
|
||||
native_table.namespace_client = Some(namespace_client.clone());
|
||||
native_table
|
||||
.pushdown_operations
|
||||
.insert(NamespaceClientPushdownOperation::QueryTable);
|
||||
|
||||
let query_vector = Arc::new(Float32Array::from(vec![0.0, 0.0]));
|
||||
let query = AnyQuery::VectorQuery(VectorQueryRequest {
|
||||
base: QueryRequest {
|
||||
limit: Some(1),
|
||||
..Default::default()
|
||||
},
|
||||
column: Some("vector".to_string()),
|
||||
query_vector: vec![query_vector as ArrayRef],
|
||||
approx_mode: Some(crate::ApproxMode::Accurate),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let stream = execute_query(&native_table, &query, QueryExecutionOptions::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
let count: usize = batches.iter().map(|b| b.num_rows()).sum();
|
||||
|
||||
assert_eq!(count, 1);
|
||||
assert_eq!(namespace_client.query_table_calls.load(Ordering::SeqCst), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_plan_multivector_structure() {
|
||||
use arrow_array::{Float32Array, RecordBatch};
|
||||
@@ -883,97 +779,4 @@ mod tests {
|
||||
"Plan should add query_index column"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_plan_applies_approx_mode_to_ann_query() {
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use datafusion_physical_plan::ExecutionPlan;
|
||||
use lance::io::exec::{ANNIvfPartitionExec, ANNIvfSubIndexExec};
|
||||
use lance_index::vector::ApproxMode;
|
||||
|
||||
use crate::connect;
|
||||
use crate::index::{Index, vector::IvfRqIndexBuilder};
|
||||
use crate::table::query::create_plan;
|
||||
|
||||
fn find_ann_approx_mode(plan: &dyn ExecutionPlan) -> Option<ApproxMode> {
|
||||
if let Some(ann) = plan.as_any().downcast_ref::<ANNIvfSubIndexExec>() {
|
||||
return Some(ann.query().approx_mode);
|
||||
}
|
||||
if let Some(ann) = plan.as_any().downcast_ref::<ANNIvfPartitionExec>() {
|
||||
return Some(ann.query.approx_mode);
|
||||
}
|
||||
plan.children()
|
||||
.into_iter()
|
||||
.find_map(|child| find_ann_approx_mode(child.as_ref()))
|
||||
}
|
||||
|
||||
let conn = connect("memory://").execute().await.unwrap();
|
||||
let dimension = 8;
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
dimension,
|
||||
),
|
||||
false,
|
||||
),
|
||||
]));
|
||||
|
||||
let vectors = Arc::new(fixed_size_list_array(
|
||||
(0..512 * dimension)
|
||||
.map(|value| value as f32 / dimension as f32)
|
||||
.collect(),
|
||||
dimension,
|
||||
));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(arrow_array::Int32Array::from_iter_values(0..512)),
|
||||
vectors,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let table = conn
|
||||
.create_table("test_approx_mode_plan", vec![batch])
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
table
|
||||
.create_index(
|
||||
&["vector"],
|
||||
Index::IvfRq(
|
||||
IvfRqIndexBuilder::default()
|
||||
.num_partitions(1)
|
||||
.sample_rate(1)
|
||||
.max_iterations(1)
|
||||
.num_bits(1),
|
||||
),
|
||||
)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let native_table = table.as_native().unwrap();
|
||||
let query_vector = Arc::new(Float32Array::from(vec![0.0; dimension as usize]));
|
||||
let query = AnyQuery::VectorQuery(VectorQueryRequest {
|
||||
column: Some("vector".to_string()),
|
||||
query_vector: vec![query_vector as ArrayRef],
|
||||
base: QueryRequest {
|
||||
limit: Some(1),
|
||||
..Default::default()
|
||||
},
|
||||
approx_mode: Some(crate::ApproxMode::Accurate),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let plan = create_plan(native_table, &query, QueryExecutionOptions::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
find_ann_approx_mode(plan.as_ref()),
|
||||
Some(ApproxMode::Accurate)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user