mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-23 06:50:40 +00:00
Compare commits
9 Commits
v0.30.0-be
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6310ed905 | ||
|
|
ccec91d957 | ||
|
|
ec82e36317 | ||
|
|
da2a1c4a2c | ||
|
|
8463a10ebe | ||
|
|
7168d64af1 | ||
|
|
403c33dff0 | ||
|
|
a0001043b6 | ||
|
|
1bb7acb74f |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.0-beta.0"
|
||||
current_version = "0.30.0-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
11
AGENTS.md
11
AGENTS.md
@@ -37,10 +37,13 @@ Before committing changes, run formatting for every language you touched. At min
|
||||
and run targeted tests through `cd python && uv run ...`.
|
||||
* TypeScript changes: run the relevant `npm`/`pnpm` lint, format, build, and docs commands in `nodejs`.
|
||||
|
||||
Before creating a PR, make sure the PR title follows Conventional Commits, such as
|
||||
`fix: support nested field paths in native index creation` or
|
||||
`feat(python): add dataset multiprocessing support`. The semantic-release check uses the
|
||||
PR title and body as the merge commit message, so a non-conventional PR title will fail CI.
|
||||
Before creating a PR, the exact value passed to `gh pr create --title` must follow
|
||||
Conventional Commits, such as `fix: support nested field paths in native index creation`
|
||||
or `feat(python): add dataset multiprocessing support`. Do not use a plain natural
|
||||
language summary like `Support nested field paths in native index creation` as the PR
|
||||
title. The semantic-release check uses the PR title and body as the merge commit message,
|
||||
so a non-conventional PR title will fail CI. After creating a PR, read the remote PR title
|
||||
back and fix it immediately if it is not conventional.
|
||||
|
||||
## Coding tips
|
||||
|
||||
|
||||
140
Cargo.lock
generated
140
Cargo.lock
generated
@@ -3284,8 +3284,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.4",
|
||||
@@ -4506,8 +4506,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -4525,6 +4525,7 @@ dependencies = [
|
||||
"async_cell",
|
||||
"aws-credential-types",
|
||||
"aws-sdk-dynamodb",
|
||||
"bitpacking",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -4551,9 +4552,11 @@ dependencies = [
|
||||
"lance-io",
|
||||
"lance-linalg",
|
||||
"lance-namespace",
|
||||
"lance-select",
|
||||
"lance-table",
|
||||
"lance-tokenizer",
|
||||
"log",
|
||||
"moka",
|
||||
"object_store",
|
||||
"permutation",
|
||||
"pin-project",
|
||||
@@ -4577,8 +4580,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4598,8 +4601,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4608,8 +4611,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4644,8 +4647,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4675,8 +4678,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4694,8 +4697,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4730,8 +4733,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4762,8 +4765,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -4800,6 +4803,7 @@ dependencies = [
|
||||
"lance-file",
|
||||
"lance-io",
|
||||
"lance-linalg",
|
||||
"lance-select",
|
||||
"lance-table",
|
||||
"lance-tokenizer",
|
||||
"libm",
|
||||
@@ -4827,8 +4831,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4870,8 +4874,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4887,8 +4891,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4900,8 +4904,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4936,9 +4940,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-reqwest-client"
|
||||
version = "0.7.6"
|
||||
version = "0.7.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f65e31bdaa13e01dab6e7cf566da31df243c34a542f0d915d3601ec0e01e61d2"
|
||||
checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99"
|
||||
dependencies = [
|
||||
"reqwest 0.12.28",
|
||||
"serde",
|
||||
@@ -4948,10 +4952,25 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lance-select"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"deepsize",
|
||||
"itertools 0.13.0",
|
||||
"lance-core",
|
||||
"roaring",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4970,6 +4989,7 @@ dependencies = [
|
||||
"lance-core",
|
||||
"lance-file",
|
||||
"lance-io",
|
||||
"lance-select",
|
||||
"log",
|
||||
"object_store",
|
||||
"prost",
|
||||
@@ -4990,8 +5010,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -5002,8 +5022,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-tokenizer"
|
||||
version = "7.0.0-beta.13"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
|
||||
version = "7.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
|
||||
dependencies = [
|
||||
"jieba-rs",
|
||||
"lindera",
|
||||
@@ -5014,7 +5034,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.29.1-beta.0"
|
||||
version = "0.30.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -5084,6 +5104,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"serial_test",
|
||||
"snafu 0.8.9",
|
||||
"tempfile",
|
||||
"test-log",
|
||||
@@ -5096,7 +5117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.29.1-beta.0"
|
||||
version = "0.30.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -5119,7 +5140,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.32.1-beta.0"
|
||||
version = "0.33.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -8128,6 +8149,15 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scc"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
|
||||
dependencies = [
|
||||
"sdd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.29"
|
||||
@@ -8194,6 +8224,12 @@ dependencies = [
|
||||
"untrusted 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sdd"
|
||||
version = "3.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
|
||||
|
||||
[[package]]
|
||||
name = "sec1"
|
||||
version = "0.3.0"
|
||||
@@ -8384,6 +8420,32 @@ dependencies = [
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serial_test"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
|
||||
dependencies = [
|
||||
"futures-executor",
|
||||
"futures-util",
|
||||
"log",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"scc",
|
||||
"serial_test_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serial_test_derive"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
|
||||
@@ -112,25 +112,25 @@ def fetch_remote_tags() -> List[TagInfo]:
|
||||
"api",
|
||||
"-X",
|
||||
"GET",
|
||||
f"repos/{LANCE_REPO}/git/refs/tags",
|
||||
"--paginate",
|
||||
f"repos/{LANCE_REPO}/releases",
|
||||
"--jq",
|
||||
".[].ref",
|
||||
".[].tag_name",
|
||||
"-F",
|
||||
"per_page=20",
|
||||
]
|
||||
)
|
||||
tags: List[TagInfo] = []
|
||||
for line in output.splitlines():
|
||||
ref = line.strip()
|
||||
if not ref.startswith("refs/tags/v"):
|
||||
tag = line.strip()
|
||||
if not tag.startswith("v"):
|
||||
continue
|
||||
tag = ref.split("refs/tags/")[-1]
|
||||
version = tag.lstrip("v")
|
||||
try:
|
||||
tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
|
||||
except ValueError:
|
||||
continue
|
||||
if not tags:
|
||||
raise RuntimeError("No Lance tags could be parsed from GitHub API output")
|
||||
raise RuntimeError("No Lance releases could be parsed from GitHub API output")
|
||||
return tags
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.30.0-beta.0</version>
|
||||
<version>0.30.0-beta.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.0-beta.0</version>
|
||||
<version>0.30.0-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.0-beta.0</version>
|
||||
<version>0.30.0-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>7.0.0-beta.13</lance-core.version>
|
||||
<lance-core.version>7.1.0-beta.2</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.30.0-beta.0"
|
||||
version = "0.30.0-beta.1"
|
||||
publish = false
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.29.1-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.29.1-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.30.0-beta.0",
|
||||
"version": "0.30.0-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.33.0-beta.0"
|
||||
current_version = "0.33.0-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.33.0-beta.0"
|
||||
version = "0.33.0-beta.1"
|
||||
publish = false
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
|
||||
@@ -147,6 +147,13 @@ def connect(
|
||||
>>> db = lancedb.connect("s3://my-bucket/lancedb",
|
||||
... storage_options={"aws_access_key_id": "***"})
|
||||
|
||||
For tests and temporary data, use an in-memory database:
|
||||
|
||||
>>> db = lancedb.connect("memory://")
|
||||
|
||||
In-memory databases are not persisted. Tables are dropped when the last
|
||||
connection or table handle referencing them is closed.
|
||||
|
||||
Connect to LanceDB cloud:
|
||||
|
||||
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
|
||||
@@ -378,6 +385,8 @@ async def connect_async(
|
||||
... db = await lancedb.connect_async("s3://my-bucket/lancedb",
|
||||
... storage_options={
|
||||
... "aws_access_key_id": "***"})
|
||||
... # For tests and temporary data, use an in-memory database
|
||||
... db = await lancedb.connect_async("memory://")
|
||||
... # Connect to LanceDB cloud
|
||||
... db = await lancedb.connect_async("db://my_database", api_key="ldb_...",
|
||||
... client_config={
|
||||
|
||||
@@ -362,6 +362,22 @@ def test_table_create_indices():
|
||||
schema=dict(
|
||||
fields=[
|
||||
dict(name="id", type={"type": "int64"}, nullable=False),
|
||||
dict(name="text", type={"type": "string"}, nullable=False),
|
||||
dict(
|
||||
name="vector",
|
||||
type={
|
||||
"type": "fixed_size_list",
|
||||
"fields": [
|
||||
dict(
|
||||
name="item",
|
||||
type={"type": "float"},
|
||||
nullable=True,
|
||||
)
|
||||
],
|
||||
"length": 2,
|
||||
},
|
||||
nullable=False,
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.30.0-beta.0"
|
||||
version = "0.30.0-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -104,6 +104,7 @@ datafusion.workspace = true
|
||||
http-body = "1" # Matching reqwest
|
||||
rstest = "0.23.0"
|
||||
test-log = "0.2"
|
||||
serial_test = "3"
|
||||
|
||||
|
||||
[features]
|
||||
|
||||
@@ -888,6 +888,7 @@ pub mod test_utils {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serial_test::serial;
|
||||
use std::time::Duration;
|
||||
|
||||
#[test]
|
||||
@@ -1143,6 +1144,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(user_id_env)]
|
||||
fn test_resolve_user_id_none() {
|
||||
let config = ClientConfig::default();
|
||||
// Clear env vars that might be set from other tests
|
||||
@@ -1155,6 +1157,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(user_id_env)]
|
||||
fn test_resolve_user_id_from_env() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
@@ -1169,6 +1172,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(user_id_env)]
|
||||
fn test_resolve_user_id_from_env_key() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
@@ -1189,6 +1193,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(user_id_env)]
|
||||
fn test_resolve_user_id_direct_takes_precedence() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
@@ -1206,6 +1211,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(user_id_env)]
|
||||
fn test_resolve_user_id_empty_env_ignored() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
|
||||
@@ -1528,8 +1528,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
});
|
||||
}
|
||||
};
|
||||
let schema = self.schema().await?;
|
||||
let (canonical_column, field) = resolve_arrow_field_path(&schema, &column)?;
|
||||
let mut body = serde_json::json!({
|
||||
"column": column
|
||||
"column": canonical_column
|
||||
});
|
||||
|
||||
// Add name parameter if provided (for backwards compatibility, only include if Some)
|
||||
@@ -1564,8 +1566,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
|
||||
Index::FTS(p) => ("FTS", Some(to_json(p)?)),
|
||||
Index::Auto => {
|
||||
let schema = self.schema().await?;
|
||||
let field = resolve_arrow_field_path(&schema, &column)?;
|
||||
if supported_vector_data_type(field.data_type()) {
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
|
||||
@@ -1862,16 +1862,26 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
status_code: None,
|
||||
})?;
|
||||
|
||||
let schema = self.schema().await?;
|
||||
|
||||
// Make request to get stats for each index, so we get the index type.
|
||||
// This is a bit inefficient, but it's the only way to get the index type.
|
||||
let mut futures = Vec::with_capacity(body.indexes.len());
|
||||
for index in body.indexes {
|
||||
let columns = index
|
||||
.columns
|
||||
.iter()
|
||||
.map(|column| {
|
||||
resolve_arrow_field_path(&schema, column)
|
||||
.map(|(canonical_column, _)| canonical_column)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let future = async move {
|
||||
match self.index_stats(&index.index_name).await {
|
||||
Ok(Some(stats)) => Ok(Some(IndexConfig {
|
||||
name: index.index_name,
|
||||
index_type: stats.index_type,
|
||||
columns: index.columns,
|
||||
columns,
|
||||
})),
|
||||
Ok(None) => Ok(None), // The index must have been deleted since we listed it.
|
||||
Err(e) => Err(e),
|
||||
@@ -2313,6 +2323,38 @@ mod tests {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn nested_index_schema() -> Schema {
|
||||
let vector_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 8);
|
||||
Schema::new(vec![
|
||||
Field::new(
|
||||
"metadata",
|
||||
DataType::Struct(vec![Field::new("user_id", DataType::Int32, false)].into()),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"image",
|
||||
DataType::Struct(vec![Field::new("embedding", vector_type, false)].into()),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"payload",
|
||||
DataType::Struct(vec![Field::new("text", DataType::Utf8, false)].into()),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"meta-data",
|
||||
DataType::Struct(vec![Field::new("user-id", DataType::Int32, false)].into()),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"literal",
|
||||
DataType::Struct(vec![Field::new("a.b", DataType::Int32, false)].into()),
|
||||
false,
|
||||
),
|
||||
])
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case("", 0)]
|
||||
#[case("{}", 0)]
|
||||
@@ -3079,6 +3121,59 @@ mod tests {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_vector_nested_field_path() {
|
||||
let expected_data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let expected_data_ref = expected_data.clone();
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/query/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"vector_column": "image.embedding",
|
||||
"prefilter": true,
|
||||
"k": 10,
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
"lower_bound": Option::<f32>::None,
|
||||
"upper_bound": Option::<f32>::None,
|
||||
"ef": Option::<usize>::None,
|
||||
"refine_factor": Option::<u32>::None,
|
||||
"version": null,
|
||||
});
|
||||
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
||||
assert_eq!(body, expected_body);
|
||||
|
||||
let response_body = write_ipc_file(&expected_data_ref);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
|
||||
.body(response_body)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let _ = table
|
||||
.query()
|
||||
.nearest_to(vec![0.1, 0.2, 0.3])
|
||||
.unwrap()
|
||||
.column("image.embedding")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_fts() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
@@ -3160,7 +3255,7 @@ mod tests {
|
||||
"query": {
|
||||
"match": {
|
||||
"terms": "hello world",
|
||||
"column": "a",
|
||||
"column": "payload.text",
|
||||
"boost": 1.0,
|
||||
"fuzziness": 0,
|
||||
"max_expansions": 50,
|
||||
@@ -3194,7 +3289,7 @@ mod tests {
|
||||
.query()
|
||||
.full_text_search(FullTextSearchQuery::new_query(
|
||||
MatchQuery::new("hello world".to_owned())
|
||||
.with_column(Some("a".to_owned()))
|
||||
.with_column(Some("payload.text".to_owned()))
|
||||
.into(),
|
||||
))
|
||||
.with_row_id()
|
||||
@@ -3465,32 +3560,152 @@ mod tests {
|
||||
for (index_type, expected_body, index) in cases {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/create_index/");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = expected_body.clone();
|
||||
expected_body["column"] = "a".into();
|
||||
expected_body[INDEX_TYPE_KEY] = index_type.into();
|
||||
match request.url().path() {
|
||||
"/v1/table/my_table/describe/" => {
|
||||
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(describe_response(&schema))
|
||||
.unwrap()
|
||||
}
|
||||
"/v1/table/my_table/create_index/" => {
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = expected_body.clone();
|
||||
expected_body["column"] = "a".into();
|
||||
expected_body[INDEX_TYPE_KEY] = index_type.into();
|
||||
|
||||
assert_eq!(body, expected_body);
|
||||
assert_eq!(body, expected_body);
|
||||
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body("{}".to_string())
|
||||
.unwrap()
|
||||
}
|
||||
path => panic!("Unexpected path: {}", path),
|
||||
}
|
||||
});
|
||||
|
||||
table.create_index(&["a"], index).execute().await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_index_nested_field_paths() {
|
||||
let schema = nested_index_schema();
|
||||
let expected_requests = Arc::new(vec![
|
||||
json!({
|
||||
"column": "metadata.user_id",
|
||||
"index_type": "BTREE",
|
||||
}),
|
||||
json!({
|
||||
"column": "image.embedding",
|
||||
"index_type": "IVF_PQ",
|
||||
"metric_type": "l2",
|
||||
}),
|
||||
{
|
||||
let mut body = serde_json::to_value(InvertedIndexParams::default()).unwrap();
|
||||
body["column"] = "payload.text".into();
|
||||
body["index_type"] = "FTS".into();
|
||||
body
|
||||
},
|
||||
json!({
|
||||
"column": "`meta-data`.`user-id`",
|
||||
"index_type": "BTREE",
|
||||
}),
|
||||
json!({
|
||||
"column": "literal.`a.b`",
|
||||
"index_type": "BTREE",
|
||||
}),
|
||||
]);
|
||||
let request_idx = Arc::new(AtomicUsize::new(0));
|
||||
let table = Table::new_with_handler("my_table", {
|
||||
let schema = schema.clone();
|
||||
let expected_requests = expected_requests.clone();
|
||||
let request_idx = request_idx.clone();
|
||||
move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
match request.url().path() {
|
||||
"/v1/table/my_table/describe/" => http::Response::builder()
|
||||
.status(200)
|
||||
.body(describe_response(&schema))
|
||||
.unwrap(),
|
||||
"/v1/table/my_table/create_index/" => {
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
let idx = request_idx.fetch_add(1, Ordering::SeqCst);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
assert_eq!(body, expected_requests[idx]);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body("{}".to_string())
|
||||
.unwrap()
|
||||
}
|
||||
path => panic!("Unexpected path: {}", path),
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
table
|
||||
.create_index(&["Metadata.USER_ID"], Index::BTree(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
table
|
||||
.create_index(&["Image.Embedding"], Index::Auto)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
table
|
||||
.create_index(&["Payload.Text"], Index::FTS(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
table
|
||||
.create_index(&["`META-DATA`.`USER-ID`"], Index::BTree(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
table
|
||||
.create_index(&["literal.`A.B`"], Index::BTree(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(request_idx.load(Ordering::SeqCst), expected_requests.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_list_indices() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
let schema = Schema::new(vec![
|
||||
Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 8),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"metadata",
|
||||
DataType::Struct(vec![Field::new("my.column", DataType::Utf8, true)].into()),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
|
||||
let response_body = match request.url().path() {
|
||||
"/v1/table/my_table/describe/" => {
|
||||
return http::Response::builder()
|
||||
.status(200)
|
||||
.body(describe_response(&schema))
|
||||
.unwrap();
|
||||
}
|
||||
"/v1/table/my_table/index/list/" => {
|
||||
serde_json::json!({
|
||||
"indexes": [
|
||||
@@ -4010,6 +4225,20 @@ mod tests {
|
||||
assert_eq!(request.method(), "POST");
|
||||
|
||||
let response_body = match request.url().path() {
|
||||
"/v1/table/my_table/describe/" => {
|
||||
let schema = Schema::new(vec![
|
||||
Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
8,
|
||||
),
|
||||
false,
|
||||
),
|
||||
Field::new("my_column", DataType::Utf8, false),
|
||||
]);
|
||||
serde_json::from_str::<serde_json::Value>(&describe_response(&schema)).unwrap()
|
||||
}
|
||||
"/v1/table/my_table/index/list/" => {
|
||||
serde_json::json!({
|
||||
"indexes": [
|
||||
@@ -4171,13 +4400,23 @@ mod tests {
|
||||
assert_eq!(value["index_type"], "IVF_PQ");
|
||||
}
|
||||
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
}
|
||||
"/v1/table/dev$users/describe/" => {
|
||||
// Needed for schema check in Auto index type
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 1, "schema": {"fields": [{"name": "embedding", "type": {"type": "list", "item": {"type": "float32"}}, "nullable": false}]}}"#)
|
||||
.body("".to_string())
|
||||
.unwrap()
|
||||
}
|
||||
"/v1/table/dev$users/describe/" => {
|
||||
let schema = Schema::new(vec![Field::new(
|
||||
"embedding",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
8,
|
||||
),
|
||||
false,
|
||||
)]);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(describe_response(&schema))
|
||||
.unwrap()
|
||||
}
|
||||
_ => {
|
||||
|
||||
@@ -6,7 +6,7 @@ pub(crate) mod background_cache;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef};
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
use datafusion_common::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion_execution::RecordBatchStream;
|
||||
use futures::{FutureExt, Stream};
|
||||
@@ -199,38 +199,32 @@ fn collect_vector_columns(
|
||||
path.pop();
|
||||
}
|
||||
|
||||
pub(crate) fn resolve_arrow_field_path(schema: &Schema, column: &str) -> Result<Field> {
|
||||
let segments =
|
||||
lance_core::datatypes::parse_field_path(column).map_err(|e| Error::InvalidInput {
|
||||
message: format!("Invalid field path `{}`: {}", column, e),
|
||||
pub(crate) fn resolve_arrow_field_path(schema: &Schema, column: &str) -> Result<(String, Field)> {
|
||||
lance_core::datatypes::parse_field_path(column).map_err(|e| Error::InvalidInput {
|
||||
message: format!("Invalid field path `{}`: {}", column, e),
|
||||
})?;
|
||||
|
||||
let lance_schema =
|
||||
lance_core::datatypes::Schema::try_from(schema).map_err(|e| Error::Schema {
|
||||
message: format!("Invalid schema: {}", e),
|
||||
})?;
|
||||
let mut fields = schema.fields();
|
||||
|
||||
for (idx, segment) in segments.iter().enumerate() {
|
||||
let field = find_field(fields, segment).ok_or_else(|| Error::Schema {
|
||||
message: format!("Field path `{}` not found in schema", column),
|
||||
let field_path = lance_schema
|
||||
.resolve_case_insensitive(column)
|
||||
.ok_or_else(|| Error::Schema {
|
||||
message: format!(
|
||||
"Field path `{}` not found in schema. Available field paths: {}",
|
||||
column,
|
||||
lance_schema.field_paths().join(", ")
|
||||
),
|
||||
})?;
|
||||
if idx + 1 == segments.len() {
|
||||
return Ok(field.clone());
|
||||
}
|
||||
fields = match field.data_type() {
|
||||
DataType::Struct(fields) => fields,
|
||||
_ => {
|
||||
return Err(Error::Schema {
|
||||
message: format!("Field path `{}` not found in schema", column),
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
unreachable!("parse_field_path returns at least one segment")
|
||||
}
|
||||
|
||||
fn find_field<'a>(fields: &'a Fields, name: &str) -> Option<&'a Field> {
|
||||
fields
|
||||
let field = field_path.last().expect("field path should be non-empty");
|
||||
let path_segments = field_path
|
||||
.iter()
|
||||
.find(|field| field.name() == name)
|
||||
.map(|field| field.as_ref())
|
||||
.map(|field| field.name.as_str())
|
||||
.collect::<Vec<_>>();
|
||||
let canonical_path = lance_core::datatypes::format_field_path(&path_segments);
|
||||
|
||||
Ok((canonical_path, Field::from(*field)))
|
||||
}
|
||||
|
||||
pub fn supported_btree_data_type(dtype: &DataType) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user