chore: update lance dependency to v7.1.0-beta.2

fix: use releases API in check_lance_release.py (#3427 )
Previously `check_lance_release.py` used `git/refs/tags` with `--paginate --jq`, which drops the last page in some `gh` versions. The 7.x Lance tags all landed on the final (partial) page, causing the script to report `v6.0.1` as the latest and never triggering an update. Switch to the releases API with `per_page=20`, which returns the 20 most recent releases sorted newest-first — one API call, no pagination needed. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-23 15:00:39 +00:00 · 2026-05-23 01:45:38 +00:00 · 2026-05-22 15:00:44 -07:00 · 2026-05-22 10:51:09 -07:00 · 2026-05-22 10:35:15 -07:00 · 2026-05-22 20:09:20 +08:00
38 changed files with 1180 additions and 228 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.29.1-beta.0"
+current_version = "0.30.0-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -37,10 +37,13 @@ Before committing changes, run formatting for every language you touched. At min
  and run targeted tests through `cd python && uv run ...`.
 * TypeScript changes: run the relevant `npm`/`pnpm` lint, format, build, and docs commands in `nodejs`.

-Before creating a PR, make sure the PR title follows Conventional Commits, such as
-`fix: support nested field paths in native index creation` or
-`feat(python): add dataset multiprocessing support`. The semantic-release check uses the
-PR title and body as the merge commit message, so a non-conventional PR title will fail CI.
+Before creating a PR, the exact value passed to `gh pr create --title` must follow
+Conventional Commits, such as `fix: support nested field paths in native index creation`
+or `feat(python): add dataset multiprocessing support`. Do not use a plain natural
+language summary like `Support nested field paths in native index creation` as the PR
+title. The semantic-release check uses the PR title and body as the merge commit message,
+so a non-conventional PR title will fail CI. After creating a PR, read the remote PR title
+back and fix it immediately if it is not conventional.

 ## Coding tips

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3284,8 +3284,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-array",
 "rand 0.9.4",
@@ -4506,8 +4506,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"

 [[package]]
 name = "lance"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arc-swap",
 "arrow",
@@ -4525,6 +4525,7 @@ dependencies = [
 "async_cell",
 "aws-credential-types",
 "aws-sdk-dynamodb",
+ "bitpacking",
 "byteorder",
 "bytes",
 "chrono",
@@ -4551,9 +4552,11 @@ dependencies = [
 "lance-io",
 "lance-linalg",
 "lance-namespace",
+ "lance-select",
 "lance-table",
 "lance-tokenizer",
 "log",
+ "moka",
 "object_store",
 "permutation",
 "pin-project",
@@ -4577,8 +4580,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4598,8 +4601,8 @@ dependencies = [

 [[package]]
 name = "lance-bitpacking"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrayref",
 "paste",
@@ -4608,8 +4611,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4644,8 +4647,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4675,8 +4678,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4694,8 +4697,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4730,8 +4733,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4762,8 +4765,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arc-swap",
 "arrow",
@@ -4800,6 +4803,7 @@ dependencies = [
 "lance-file",
 "lance-io",
 "lance-linalg",
+ "lance-select",
 "lance-table",
 "lance-tokenizer",
 "libm",
@@ -4827,8 +4831,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4870,8 +4874,8 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4887,8 +4891,8 @@ dependencies = [

 [[package]]
 name = "lance-namespace"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "async-trait",
@@ -4900,8 +4904,8 @@ dependencies = [

 [[package]]
 name = "lance-namespace-impls"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "arrow-ipc",
@@ -4936,9 +4940,9 @@ dependencies = [

 [[package]]
 name = "lance-namespace-reqwest-client"
-version = "0.7.6"
+version = "0.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f65e31bdaa13e01dab6e7cf566da31df243c34a542f0d915d3601ec0e01e61d2"
+checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99"
 dependencies = [
 "reqwest 0.12.28",
 "serde",
@@ -4948,10 +4952,25 @@ dependencies = [
 "url",
 ]

+[[package]]
+name = "lance-select"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "byteorder",
+ "bytes",
+ "deepsize",
+ "itertools 0.13.0",
+ "lance-core",
+ "roaring",
+]
+
 [[package]]
 name = "lance-table"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4970,6 +4989,7 @@ dependencies = [
 "lance-core",
 "lance-file",
 "lance-io",
+ "lance-select",
 "log",
 "object_store",
 "prost",
@@ -4990,8 +5010,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -5002,8 +5022,8 @@ dependencies = [

 [[package]]
 name = "lance-tokenizer"
-version = "7.0.0-beta.13"
-source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.13#929166e3ff51ed61b1fa42de2c63feaf51967ea1"
+version = "7.1.0-beta.2"
+source = "git+https://github.com/lance-format/lance.git?tag=v7.1.0-beta.2#24b8afec580737d61c59845175f8ba2f0f390793"
 dependencies = [
 "jieba-rs",
 "lindera",
@@ -5014,7 +5034,7 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.29.1-beta.0"
+version = "0.30.0-beta.1"
 dependencies = [
 "ahash",
 "anyhow",
@@ -5084,6 +5104,7 @@ dependencies = [
 "serde",
 "serde_json",
 "serde_with",
+ "serial_test",
 "snafu 0.8.9",
 "tempfile",
 "test-log",
@@ -5096,7 +5117,7 @@ dependencies = [

 [[package]]
 name = "lancedb-nodejs"
-version = "0.29.1-beta.0"
+version = "0.30.0-beta.1"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -5119,7 +5140,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.32.1-beta.0"
+version = "0.33.0-beta.1"
 dependencies = [
 "arrow",
 "async-trait",
@@ -8128,6 +8149,15 @@ dependencies = [
 "winapi-util",
 ]

+[[package]]
+name = "scc"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
+dependencies = [
+ "sdd",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.29"
@@ -8194,6 +8224,12 @@ dependencies = [
 "untrusted 0.9.0",
 ]

+[[package]]
+name = "sdd"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
+
 [[package]]
 name = "sec1"
 version = "0.3.0"
@@ -8384,6 +8420,32 @@ dependencies = [
 "unsafe-libyaml",
 ]

+[[package]]
+name = "serial_test"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
+dependencies = [
+ "futures-executor",
+ "futures-util",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "scc",
+ "serial_test_derive",
+]
+
+[[package]]
+name = "serial_test_derive"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "sha1"
 version = "0.10.6"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=7.1.0-beta.2", default-features = false, "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=7.1.0-beta.2", "tag" = "v7.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
--- a/ci/check_lance_release.py
+++ b/ci/check_lance_release.py
@@ -112,25 +112,25 @@ def fetch_remote_tags() -> List[TagInfo]:
            "api",
            "-X",
            "GET",
-            f"repos/{LANCE_REPO}/git/refs/tags",
-            "--paginate",
+            f"repos/{LANCE_REPO}/releases",
            "--jq",
-            ".[].ref",
+            ".[].tag_name",
+            "-F",
+            "per_page=20",
        ]
    )
    tags: List[TagInfo] = []
    for line in output.splitlines():
-        ref = line.strip()
-        if not ref.startswith("refs/tags/v"):
+        tag = line.strip()
+        if not tag.startswith("v"):
            continue
-        tag = ref.split("refs/tags/")[-1]
        version = tag.lstrip("v")
        try:
            tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
        except ValueError:
            continue
    if not tags:
-        raise RuntimeError("No Lance tags could be parsed from GitHub API output")
+        raise RuntimeError("No Lance releases could be parsed from GitHub API output")
    return tags


--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.29.1-beta.0</version>
+    <version>0.30.0-beta.1</version>
 </dependency>
 ```

--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.29.1-beta.0</version>
+      <version>0.30.0-beta.1</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.29.1-beta.0</version>
+    <version>0.30.0-beta.1</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>7.0.0-beta.13</lance-core.version>
+        <lance-core.version>7.1.0-beta.2</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.29.1-beta.0"
+version = "0.30.0-beta.1"
 publish = false
 license.workspace = true
 description.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -28,6 +28,7 @@ import {
  List,
  Schema,
  SchemaLike,
+  Struct,
  Type,
  Uint8,
  Utf8,
@@ -780,6 +781,113 @@ describe("When creating an index", () => {
    expect(indices2.length).toBe(0);
  });

+  it("should create and search a nested vector index", async () => {
+    const db = await connect(tmpDir.name);
+    const nestedSchema = new Schema([
+      new Field("id", new Int32(), true),
+      new Field(
+        "image",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+    ]);
+    const nestedTable = await db.createTable(
+      "nested_vector",
+      makeArrowTable(
+        Array.from({ length: 300 }, (_, id) => ({
+          id,
+          image: { embedding: [id, id + 1] },
+        })),
+        { schema: nestedSchema },
+      ),
+    );
+
+    await nestedTable.createIndex("image.embedding", {
+      name: "image_embedding_idx",
+    });
+    const indices = await nestedTable.listIndices();
+    expect(indices).toContainEqual({
+      name: "image_embedding_idx",
+      indexType: "IvfPq",
+      columns: ["image.embedding"],
+    });
+
+    const explicit = await nestedTable
+      .query()
+      .nearestTo([0.0, 1.0])
+      .column("image.embedding")
+      .limit(1)
+      .toArray();
+    const inferred = await nestedTable
+      .query()
+      .nearestTo([0.0, 1.0])
+      .limit(1)
+      .toArray();
+    expect(inferred[0].id).toEqual(explicit[0].id);
+  });
+
+  it("should report multiple nested vector candidates", async () => {
+    const db = await connect(tmpDir.name);
+    const nestedSchema = new Schema([
+      new Field(
+        "image",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+      new Field(
+        "text",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+    ]);
+    const nestedTable = await db.createTable(
+      "multiple_nested_vectors",
+      makeArrowTable(
+        [
+          {
+            image: { embedding: [0.0, 1.0] },
+            text: { embedding: [2.0, 3.0] },
+          },
+        ],
+        { schema: nestedSchema },
+      ),
+    );
+
+    await expect(
+      nestedTable.query().nearestTo([0.0, 1.0]).limit(1).toArray(),
+    ).rejects.toThrow(/image\.embedding.*text\.embedding/);
+  });
+
+  it("should report when no default vector column exists", async () => {
+    const db = await connect(tmpDir.name);
+    const noVectorTable = await db.createTable(
+      "no_vector",
+      makeArrowTable([{ id: 0, label: "cat" }]),
+    );
+
+    await expect(
+      noVectorTable.query().nearestTo([0.0, 1.0]).limit(1).toArray(),
+    ).rejects.toThrow(/No vector column/);
+  });
+
  it("should wait for index readiness", async () => {
    // Create an index and then wait for it to be ready
    await tbl.createIndex("vec");
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.29.1-beta.0",
+  "version": "0.30.0-beta.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0-beta.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.29.1-beta.0",
+  "version": "0.30.0-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.29.1-beta.0",
+      "version": "0.30.0-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.29.1-beta.0",
+  "version": "0.30.0-beta.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.32.1-beta.0"
+current_version = "0.33.0-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.32.1-beta.0"
+version = "0.33.0-beta.1"
 publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -147,6 +147,13 @@ def connect(
    >>> db = lancedb.connect("s3://my-bucket/lancedb",
    ...                      storage_options={"aws_access_key_id": "***"})

+    For tests and temporary data, use an in-memory database:
+
+    >>> db = lancedb.connect("memory://")
+
+    In-memory databases are not persisted. Tables are dropped when the last
+    connection or table handle referencing them is closed.
+
    Connect to LanceDB cloud:

    >>> db = lancedb.connect("db://my_database", api_key="ldb_...",
@@ -378,6 +385,8 @@ async def connect_async(
    ...     db = await lancedb.connect_async("s3://my-bucket/lancedb",
    ...                                      storage_options={
    ...                                          "aws_access_key_id": "***"})
+    ...     # For tests and temporary data, use an in-memory database
+    ...     db = await lancedb.connect_async("memory://")
    ...     # Connect to LanceDB cloud
    ...     db = await lancedb.connect_async("db://my_database", api_key="ldb_...",
    ...                                      client_config={
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -8,7 +8,17 @@ from abc import abstractmethod
 from datetime import timedelta
 from pathlib import Path
 import sys
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Literal,
+    Optional,
+    Union,
+)

 if sys.version_info >= (3, 12):
    from typing import override
@@ -313,7 +323,7 @@ class DBConnection(EnforceOverrides):
        >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
        ...         {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
        >>> db.create_table("my_table", data)
-        LanceTable(name='my_table', version=1, ...)
+        LanceTable(name='my_table', ...)
        >>> db["my_table"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -334,7 +344,7 @@ class DBConnection(EnforceOverrides):
        ...    "long": [-122.7, -74.1]
        ... })
        >>> db.create_table("table2", data)
-        LanceTable(name='table2', version=1, ...)
+        LanceTable(name='table2', ...)
        >>> db["table2"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -357,7 +367,7 @@ class DBConnection(EnforceOverrides):
        ...   pa.field("long", pa.float32())
        ... ])
        >>> db.create_table("table3", data, schema = custom_schema)
-        LanceTable(name='table3', version=1, ...)
+        LanceTable(name='table3', ...)
        >>> db["table3"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -391,7 +401,7 @@ class DBConnection(EnforceOverrides):
        ...     pa.field("price", pa.float32()),
        ... ])
        >>> db.create_table("table4", make_batches(), schema=schema)
-        LanceTable(name='table4', version=1, ...)
+        LanceTable(name='table4', ...)

        """
        raise NotImplementedError
@@ -568,15 +578,15 @@ class LanceDBConnection(DBConnection):
    >>> db = lancedb.connect("./.lancedb")
    >>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
    ...                                   {"vector": [0.5, 1.3], "b": 4}])
-    LanceTable(name='my_table', version=1, ...)
+    LanceTable(name='my_table', ...)
    >>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
-    LanceTable(name='another_table', version=1, ...)
+    LanceTable(name='another_table', ...)
    >>> sorted(db.table_names())
    ['another_table', 'my_table']
    >>> len(db)
    2
    >>> db["my_table"]
-    LanceTable(name='my_table', version=1, ...)
+    LanceTable(name='my_table', ...)
    >>> "my_table" in db
    True
    >>> db.drop_table("my_table")
@@ -847,11 +857,20 @@ class LanceDBConnection(DBConnection):
            )
        )

+    def _all_table_names(self) -> Generator[str, None, None]:
+        page_token = None
+        while True:
+            response = self.list_tables(page_token=page_token)
+            yield from response.tables
+            page_token = response.page_token
+            if not page_token:
+                return
+
    def __len__(self) -> int:
-        return len(self.table_names())
+        return sum(1 for _ in self._all_table_names())

    def __contains__(self, name: str) -> bool:
-        return name in self.table_names()
+        return name in self._all_table_names()

    @override
    def create_table(
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -3,12 +3,14 @@

 from __future__ import annotations

+import asyncio
 from abc import ABC, abstractmethod
 from concurrent.futures import ThreadPoolExecutor
-from enum import Enum
 from datetime import timedelta
+from enum import Enum
 from typing import (
    TYPE_CHECKING,
+    Any,
    Dict,
    List,
    Literal,
@@ -17,41 +19,40 @@ from typing import (
    Type,
    TypeVar,
    Union,
-    Any,
 )

-import asyncio
 import deprecation
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
 import pydantic
+from typing_extensions import Annotated

-from lancedb.pydantic import PYDANTIC_VERSION
+from lancedb._lancedb import fts_query_to_json
 from lancedb.background_loop import LOOP
+from lancedb.pydantic import PYDANTIC_VERSION

 from . import __version__
 from .arrow import AsyncRecordBatchReader
 from .dependencies import pandas as pd
+from .expr import Expr
 from .rerankers.base import Reranker
 from .rerankers.rrf import RRFReranker
 from .rerankers.util import check_reranker_result
 from .util import flatten_columns
-from .expr import Expr
-from lancedb._lancedb import fts_query_to_json
-from typing_extensions import Annotated

 if TYPE_CHECKING:
    import sys
+
    import PIL
    import polars as pl

-    from ._lancedb import Query as LanceQuery
    from ._lancedb import FTSQuery as LanceFTSQuery
    from ._lancedb import HybridQuery as LanceHybridQuery
-    from ._lancedb import VectorQuery as LanceVectorQuery
-    from ._lancedb import TakeQuery as LanceTakeQuery
    from ._lancedb import PyQueryRequest
+    from ._lancedb import Query as LanceQuery
+    from ._lancedb import TakeQuery as LanceTakeQuery
+    from ._lancedb import VectorQuery as LanceVectorQuery
    from .common import VEC
    from .pydantic import LanceModel
    from .table import Table
@@ -3348,16 +3349,18 @@ class BaseQueryBuilder(object):
            If not specified, no timeout is applied. If the query does not
            complete within the specified time, an error will be raised.
        """
-        async_iter = LOOP.run(self._inner.execute(max_batch_length, timeout))
+        async_reader = LOOP.run(
+            self._inner.to_batches(max_batch_length=max_batch_length, timeout=timeout)
+        )

        def iter_sync():
            try:
                while True:
-                    yield LOOP.run(async_iter.__anext__())
+                    yield LOOP.run(async_reader.__anext__())
            except StopAsyncIteration:
                return

-        return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
+        return pa.RecordBatchReader.from_batches(async_reader.schema, iter_sync())

    def to_arrow(self, timeout: Optional[timedelta] = None) -> pa.Table:
        """
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -2178,7 +2178,7 @@ class LanceTable(Table):
        return LOOP.run(self._table.count_rows(filter))

    def __repr__(self) -> str:
-        val = f"{self.__class__.__name__}(name={self.name!r}, version={self.version}"
+        val = f"{self.__class__.__name__}(name={self.name!r}"
        if self._conn.read_consistency_interval is not None:
            val += ", read_consistency_interval={!r}".format(
                self._conn.read_consistency_interval
--- a/python/python/lancedb/util.py
+++ b/python/python/lancedb/util.py
@@ -10,7 +10,7 @@ import pathlib
 import warnings
 from datetime import date, datetime
 from functools import singledispatch
-from typing import Tuple, Union, Optional, Any
+from typing import Tuple, Union, Optional, Any, List
 from urllib.parse import urlparse

 import numpy as np
@@ -189,7 +189,33 @@ def flatten_columns(tbl: pa.Table, flatten: Optional[Union[int, bool]] = None):
    return tbl


-def inf_vector_column_query(schema: pa.Schema) -> str:
+def _format_field_path(path: List[str]) -> str:
+    def format_segment(segment: str) -> str:
+        if all(char.isalnum() or char == "_" for char in segment):
+            return segment
+        return f"`{segment.replace('`', '``')}`"
+
+    return ".".join(format_segment(segment) for segment in path)
+
+
+def _iter_vector_columns(
+    field: pa.Field, path: List[str], dim: Optional[int] = None
+) -> List[str]:
+    field_path = [*path, field.name]
+    if is_vector_column(field.type):
+        vector_dim = infer_vector_column_dim(field.type)
+        if dim is None or vector_dim == dim:
+            return [_format_field_path(field_path)]
+        return []
+    if pa.types.is_struct(field.type):
+        columns = []
+        for idx in range(field.type.num_fields):
+            columns.extend(_iter_vector_columns(field.type.field(idx), field_path, dim))
+        return columns
+    return []
+
+
+def inf_vector_column_query(schema: pa.Schema, dim: Optional[int] = None) -> str:
    """
    Get the vector column name

@@ -202,26 +228,21 @@ def inf_vector_column_query(schema: pa.Schema) -> str:
    -------
    str: the vector column name.
    """
-    vector_col_name = ""
-    vector_col_count = 0
-    for field_name in schema.names:
-        field = schema.field(field_name)
-        if is_vector_column(field.type):
-            vector_col_count += 1
-            if vector_col_count > 1:
-                raise ValueError(
-                    "Schema has more than one vector column. "
-                    "Please specify the vector column name "
-                    "for vector search"
-                )
-            elif vector_col_count == 1:
-                vector_col_name = field_name
-    if vector_col_count == 0:
+    vector_col_names = []
+    for field in schema:
+        vector_col_names.extend(_iter_vector_columns(field, [], dim))
+    if len(vector_col_names) > 1:
+        raise ValueError(
+            "Schema has more than one vector column. "
+            "Please specify the vector column name "
+            f"for vector search. Candidates: {vector_col_names}"
+        )
+    if len(vector_col_names) == 0:
        raise ValueError(
            "There is no vector column in the data. "
            "Please specify the vector column name for vector search"
        )
-    return vector_col_name
+    return vector_col_names[0]


 def is_vector_column(data_type: pa.DataType) -> bool:
@@ -247,6 +268,29 @@ def is_vector_column(data_type: pa.DataType) -> bool:
    return False


+def infer_vector_column_dim(data_type: pa.DataType) -> Optional[int]:
+    if pa.types.is_fixed_size_list(data_type):
+        return data_type.list_size
+    if pa.types.is_list(data_type):
+        return infer_vector_column_dim(data_type.value_type)
+    return None
+
+
+def _query_vector_dim(query: Optional[Any]) -> Optional[int]:
+    if query is None:
+        return None
+    if isinstance(query, np.ndarray):
+        if query.ndim == 0:
+            return None
+        return query.shape[-1]
+    if isinstance(query, list) and query:
+        first = query[0]
+        if isinstance(first, (list, tuple, np.ndarray)):
+            return len(first)
+        return len(query)
+    return None
+
+
 def infer_vector_column_name(
    schema: pa.Schema,
    query_type: str,
@@ -262,7 +306,9 @@ def infer_vector_column_name(

    if query is not None or query_type == "hybrid":
        try:
-            vector_column_name = inf_vector_column_query(schema)
+            vector_column_name = inf_vector_column_query(
+                schema, dim=_query_vector_dim(query)
+            )
        except Exception as e:
            raise e

--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -6,6 +6,7 @@ import re
 import sys
 from datetime import timedelta
 import os
+from types import SimpleNamespace

 import lancedb
 import numpy as np
@@ -188,6 +189,43 @@ def test_table_names(tmp_db: lancedb.DBConnection):
    assert len(result) == 3


+def test_db_contains_and_len_include_all_table_name_pages(tmp_db: lancedb.DBConnection):
+    for idx in range(20):
+        tmp_db.create_table(f"table_{idx}", data=[{"id": idx}])
+
+    assert len(tmp_db) == 20
+    for idx in range(20):
+        assert f"table_{idx}" in tmp_db
+    assert "does_not_exist" not in tmp_db
+
+
+def test_db_contains_stops_after_matching_table_page(
+    tmp_db: lancedb.DBConnection, monkeypatch
+):
+    calls = []
+    pages = {
+        None: SimpleNamespace(tables=["table_0", "table_1"], page_token="next"),
+        "next": SimpleNamespace(tables=["table_2"], page_token=None),
+    }
+
+    def list_tables(*, page_token=None, **_kwargs):
+        calls.append(page_token)
+        return pages[page_token]
+
+    monkeypatch.setattr(tmp_db, "list_tables", list_tables)
+
+    assert "table_1" in tmp_db
+    assert calls == [None]
+
+    calls.clear()
+    assert "table_2" in tmp_db
+    assert calls == [None, "next"]
+
+    calls.clear()
+    assert len(tmp_db) == 3
+    assert calls == [None, "next"]
+
+
@pytest.mark.asyncio
 async def test_table_names_async(tmp_path):
    db = lancedb.connect(tmp_path)
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -563,7 +563,7 @@ def test_create_index_multiple_columns(tmp_path, table):


 def test_nested_schema(tmp_path, table):
-    table.create_fts_index("nested.text")
+    table.create_fts_index("nested.text", with_position=True)
    indices = table.list_indices()
    assert len(indices) == 1
    assert indices[0].index_type == "FTS"
@@ -577,6 +577,98 @@ def test_nested_schema(tmp_path, table):
    assert len(results) > 0
    assert all("puppy" in row["nested"]["text"] for row in results)

+    results = table.search(MatchQuery("puppy", "nested.text")).limit(5).to_list()
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    phrase_results = (
+        table.search(PhraseQuery("puppy runs", "nested.text")).limit(5).to_list()
+    )
+    assert len(phrase_results) > 0
+    assert all("puppy runs" in row["nested"]["text"] for row in phrase_results)
+
+    hybrid_results = (
+        table.search(query_type="hybrid", fts_columns="nested.text")
+        .vector([0 for _ in range(128)])
+        .text("puppy")
+        .limit(5)
+        .to_list()
+    )
+    assert len(hybrid_results) > 0
+
+
+@pytest.mark.asyncio
+async def test_nested_schema_async(async_table):
+    await async_table.create_index("nested.text", config=FTS(with_position=True))
+    indices = await async_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type == "FTS"
+    assert indices[0].columns == ["nested.text"]
+
+    results = await (
+        async_table.query()
+        .nearest_to_text("puppy", columns="nested.text")
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    results = await (
+        async_table.query()
+        .nearest_to_text(MatchQuery("puppy", "nested.text"))
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    phrase_results = await (
+        async_table.query()
+        .nearest_to_text(PhraseQuery("puppy runs", "nested.text"))
+        .limit(5)
+        .to_list()
+    )
+    assert len(phrase_results) > 0
+    assert all("puppy runs" in row["nested"]["text"] for row in phrase_results)
+
+    hybrid_results = await (
+        async_table.query()
+        .nearest_to([0 for _ in range(128)])
+        .nearest_to_text("puppy", columns="nested.text")
+        .limit(5)
+        .to_list()
+    )
+    assert len(hybrid_results) > 0
+
+
+def test_nested_schema_rejects_invalid_fts_fields(tmp_path):
+    db = ldb.connect(tmp_path)
+    data = pa.table(
+        {
+            "payload": pa.array(
+                [
+                    {"text": "puppy runs", "count": 1},
+                    {"text": "car drives", "count": 2},
+                ]
+            ),
+            "vector": pa.array(
+                [[0.1, 0.1], [0.2, 0.2]],
+                type=pa.list_(pa.float32(), list_size=2),
+            ),
+        }
+    )
+    table = db.create_table("test", data=data)
+
+    with pytest.raises(ValueError, match="FTS index cannot be created.*payload"):
+        table.create_fts_index("payload")
+
+    with pytest.raises(ValueError, match="FTS index cannot be created.*count"):
+        table.create_fts_index("payload.count")
+
+    with pytest.raises(ValueError, match="Field path `payload.missing` not found"):
+        table.create_fts_index("payload.missing")
+

 def test_search_index_with_filter(table):
    table.create_fts_index("text")
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -105,6 +105,46 @@ async def test_create_scalar_index(some_table: AsyncTable):
    assert len(indices) == 0


+@pytest.mark.asyncio
+async def test_create_nested_scalar_index_lists_canonical_paths(db_async):
+    metadata_type = pa.struct(
+        [
+            pa.field("user_id", pa.int32()),
+            pa.field("user.id", pa.int32()),
+        ]
+    )
+    data = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3], type=pa.int32()),
+            pa.array(
+                [
+                    {"user_id": 10, "user.id": 100},
+                    {"user_id": 20, "user.id": 200},
+                    {"user_id": 30, "user.id": 300},
+                ],
+                type=metadata_type,
+            ),
+        ],
+        names=["user_id", "metadata"],
+    )
+    table = await db_async.create_table("nested_scalar_index", data)
+
+    await table.create_index("user_id", config=BTree(), name="top_user_id_idx")
+    await table.create_index(
+        "metadata.user_id", config=BTree(), name="nested_user_id_idx"
+    )
+    await table.create_index(
+        "metadata.`user.id`", config=BTree(), name="escaped_user_id_idx"
+    )
+
+    columns_by_name = {
+        index.name: index.columns for index in await table.list_indices()
+    }
+    assert columns_by_name["top_user_id_idx"] == ["user_id"]
+    assert columns_by_name["nested_user_id_idx"] == ["metadata.user_id"]
+    assert columns_by_name["escaped_user_id_idx"] == ["metadata.`user.id`"]
+
+
@pytest.mark.asyncio
 async def test_create_fixed_size_binary_index(some_table: AsyncTable):
    await some_table.create_index("fsb", config=BTree())
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1512,6 +1512,37 @@ def test_take_queries(tmp_path):
    ]


+def test_take_queries_to_batches(tmp_path):
+    # Regression test for the sync take-query path: `to_batches` previously
+    # raised ``AttributeError: 'AsyncTakeQuery' object has no attribute
+    # 'execute'`` because the inherited ``BaseQueryBuilder.to_batches`` called
+    # ``execute`` on the async wrapper instead of the native query.
+    db = lancedb.connect(tmp_path)
+    data = pa.table({"idx": list(range(100)), "label": [str(i) for i in range(100)]})
+    table = db.create_table("test", data)
+
+    # Take by offset → to_batches
+    rs = list(table.take_offsets([5, 2, 17]).to_batches())
+    assert all(isinstance(b, pa.RecordBatch) for b in rs)
+    assert sum(b.num_rows for b in rs) == 3
+    assert sorted(v for b in rs for v in b.column("idx").to_pylist()) == [2, 5, 17]
+
+    # Take by row id → to_batches
+    rs = list(table.take_row_ids([5, 2, 17]).to_batches())
+    assert all(isinstance(b, pa.RecordBatch) for b in rs)
+    assert sum(b.num_rows for b in rs) == 3
+    assert sorted(v for b in rs for v in b.column("idx").to_pylist()) == [2, 5, 17]
+
+    # Take with select projection → to_batches preserves the projection
+    rs = list(table.take_row_ids([5, 2, 17]).select(["label"]).to_batches())
+    assert all(b.schema.names == ["label"] for b in rs)
+    assert sorted(v for b in rs for v in b.column("label").to_pylist()) == [
+        "17",
+        "2",
+        "5",
+    ]
+
+
 def test_getitems(tmp_path):
    db = lancedb.connect(tmp_path)
    data = pa.table(
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -362,6 +362,22 @@ def test_table_create_indices():
                    schema=dict(
                        fields=[
                            dict(name="id", type={"type": "int64"}, nullable=False),
+                            dict(name="text", type={"type": "string"}, nullable=False),
+                            dict(
+                                name="vector",
+                                type={
+                                    "type": "fixed_size_list",
+                                    "fields": [
+                                        dict(
+                                            name="item",
+                                            type={"type": "float"},
+                                            nullable=True,
+                                        )
+                                    ],
+                                    "length": 2,
+                                },
+                                nullable=False,
+                            ),
                        ]
                    ),
                )
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -33,7 +33,7 @@ def test_basic(mem_db: DBConnection):
    table = mem_db.create_table("test", data=data)

    assert table.name == "test"
-    assert "LanceTable(name='test', version=1, _conn=LanceDBConnection(" in repr(table)
+    assert "LanceTable(name='test', _conn=LanceDBConnection(" in repr(table)
    expected_schema = pa.schema(
        {
            "vector": pa.list_(pa.float32(), 2),
@@ -1934,6 +1934,10 @@ def test_create_index_nested_field_paths(mem_db: DBConnection):
    assert len(vector_results) == 1
    assert vector_results[0]["metadata"]["user_id"] == 0

+    default_vector_results = table.search([0.0, 1.0]).limit(1).to_list()
+    assert len(default_vector_results) == 1
+    assert default_vector_results[0]["metadata"]["user_id"] == 0
+
    filtered_results = table.search().where("metadata.user_id = 42").limit(1).to_list()
    assert len(filtered_results) == 1
    assert filtered_results[0]["metadata"]["user_id"] == 42
@@ -2013,6 +2017,74 @@ def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
        table.search(q).limit(1).to_arrow()


+def test_search_infers_single_nested_vector(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field(
+                "image",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [
+            {"id": 0, "image": {"embedding": [0.0, 1.0]}},
+            {"id": 1, "image": {"embedding": [10.0, 11.0]}},
+        ],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_default_search", data=data)
+
+    result = table.search([0.0, 1.0]).limit(1).to_list()
+    assert result[0]["id"] == 0
+
+
+def test_search_nested_vector_multiple_candidates(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field(
+                "image",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+            pa.field(
+                "text",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [
+            {
+                "image": {"embedding": [0.0, 1.0]},
+                "text": {"embedding": [2.0, 3.0]},
+            }
+        ],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_multiple_candidates", data=data)
+
+    with pytest.raises(ValueError, match="image.embedding.*text.embedding"):
+        table.search([0.0, 1.0]).limit(1).to_arrow()
+
+
+def test_search_nested_vector_no_candidates(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field("metadata", pa.struct([pa.field("label", pa.string())])),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [{"id": 0, "metadata": {"label": "cat"}}],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_no_candidates", data=data)
+
+    with pytest.raises(ValueError, match="no vector column"):
+        table.search([0.0, 1.0]).limit(1).to_arrow()
+
+
 def test_compact_cleanup(tmp_db: DBConnection):
    pytest.importorskip("lance")
    table = tmp_db.create_table(
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.29.1-beta.0"
+version = "0.30.0-beta.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -104,6 +104,7 @@ datafusion.workspace = true
 http-body = "1"                                        # Matching reqwest
 rstest = "0.23.0"
 test-log = "0.2"
+serial_test = "3"


 [features]
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -23,17 +23,12 @@ impl VectorIndex {
            .fields
            .iter()
            .map(|field_id| {
-                manifest
-                    .schema
-                    .field_by_id(*field_id)
-                    .unwrap_or_else(|| {
-                        panic!(
-                            "field {field_id} of index {} must exist in schema",
-                            index.name
-                        )
-                    })
-                    .name
-                    .clone()
+                manifest.schema.field_path(*field_id).unwrap_or_else(|_| {
+                    panic!(
+                        "field {field_id} of index {} must exist in schema",
+                        index.name
+                    )
+                })
            })
            .collect();
        Self {
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -888,6 +888,7 @@ pub mod test_utils {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use serial_test::serial;
    use std::time::Duration;

    #[test]
@@ -1143,6 +1144,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_none() {
        let config = ClientConfig::default();
        // Clear env vars that might be set from other tests
@@ -1155,6 +1157,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_from_env() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1169,6 +1172,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_from_env_key() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1189,6 +1193,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_direct_takes_precedence() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1206,6 +1211,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_empty_env_ignored() {
        // SAFETY: This is only called in tests
        unsafe {
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -27,7 +27,9 @@ use crate::table::UpdateResult;
 use crate::table::query::create_multi_vector_plan;
 use crate::table::{AnyQuery, Filter, PreprocessingOutput, TableStatistics};
 use crate::utils::background_cache::BackgroundCache;
-use crate::utils::{supported_btree_data_type, supported_vector_data_type};
+use crate::utils::{
+    resolve_arrow_field_path, supported_btree_data_type, supported_vector_data_type,
+};
 use crate::{DistanceType, Error};
 use crate::{
    error::Result,
@@ -1526,8 +1528,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                });
            }
        };
+        let schema = self.schema().await?;
+        let (canonical_column, field) = resolve_arrow_field_path(&schema, &column)?;
        let mut body = serde_json::json!({
-            "column": column
+            "column": canonical_column
        });

        // Add name parameter if provided (for backwards compatibility, only include if Some)
@@ -1562,12 +1566,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
            Index::FTS(p) => ("FTS", Some(to_json(p)?)),
            Index::Auto => {
-                let schema = self.schema().await?;
-                let field = schema
-                    .field_with_name(&column)
-                    .map_err(|_| Error::InvalidInput {
-                        message: format!("Column {} not found in schema", column),
-                    })?;
                if supported_vector_data_type(field.data_type()) {
                    body[METRIC_TYPE_KEY] =
                        serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
@@ -1864,16 +1862,26 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            status_code: None,
        })?;

+        let schema = self.schema().await?;
+
        // Make request to get stats for each index, so we get the index type.
        // This is a bit inefficient, but it's the only way to get the index type.
        let mut futures = Vec::with_capacity(body.indexes.len());
        for index in body.indexes {
+            let columns = index
+                .columns
+                .iter()
+                .map(|column| {
+                    resolve_arrow_field_path(&schema, column)
+                        .map(|(canonical_column, _)| canonical_column)
+                })
+                .collect::<Result<Vec<_>>>()?;
            let future = async move {
                match self.index_stats(&index.index_name).await {
                    Ok(Some(stats)) => Ok(Some(IndexConfig {
                        name: index.index_name,
                        index_type: stats.index_type,
-                        columns: index.columns,
+                        columns,
                    })),
                    Ok(None) => Ok(None), // The index must have been deleted since we listed it.
                    Err(e) => Err(e),
@@ -2315,6 +2323,38 @@ mod tests {
        .unwrap()
    }

+    fn nested_index_schema() -> Schema {
+        let vector_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 8);
+        Schema::new(vec![
+            Field::new(
+                "metadata",
+                DataType::Struct(vec![Field::new("user_id", DataType::Int32, false)].into()),
+                false,
+            ),
+            Field::new(
+                "image",
+                DataType::Struct(vec![Field::new("embedding", vector_type, false)].into()),
+                false,
+            ),
+            Field::new(
+                "payload",
+                DataType::Struct(vec![Field::new("text", DataType::Utf8, false)].into()),
+                false,
+            ),
+            Field::new(
+                "meta-data",
+                DataType::Struct(vec![Field::new("user-id", DataType::Int32, false)].into()),
+                false,
+            ),
+            Field::new(
+                "literal",
+                DataType::Struct(vec![Field::new("a.b", DataType::Int32, false)].into()),
+                false,
+            ),
+        ])
+    }
+
    #[rstest]
    #[case("", 0)]
    #[case("{}", 0)]
@@ -3081,6 +3121,59 @@ mod tests {
            .unwrap();
    }

+    #[tokio::test]
+    async fn test_query_vector_nested_field_path() {
+        let expected_data = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let expected_data_ref = expected_data.clone();
+
+        let table = Table::new_with_handler("my_table", move |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(request.url().path(), "/v1/table/my_table/query/");
+            assert_eq!(
+                request.headers().get("Content-Type").unwrap(),
+                JSON_CONTENT_TYPE
+            );
+
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            let mut expected_body = serde_json::json!({
+                "vector_column": "image.embedding",
+                "prefilter": true,
+                "k": 10,
+                "nprobes": 20,
+                "minimum_nprobes": 20,
+                "maximum_nprobes": 20,
+                "lower_bound": Option::<f32>::None,
+                "upper_bound": Option::<f32>::None,
+                "ef": Option::<usize>::None,
+                "refine_factor": Option::<u32>::None,
+                "version": null,
+            });
+            expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
+            assert_eq!(body, expected_body);
+
+            let response_body = write_ipc_file(&expected_data_ref);
+            http::Response::builder()
+                .status(200)
+                .header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
+                .body(response_body)
+                .unwrap()
+        });
+
+        let _ = table
+            .query()
+            .nearest_to(vec![0.1, 0.2, 0.3])
+            .unwrap()
+            .column("image.embedding")
+            .execute()
+            .await
+            .unwrap();
+    }
+
    #[tokio::test]
    async fn test_query_fts() {
        let table = Table::new_with_handler("my_table", |request| {
@@ -3162,7 +3255,7 @@ mod tests {
                        "query": {
                            "match": {
                                "terms": "hello world",
-                                "column": "a",
+                                "column": "payload.text",
                                "boost": 1.0,
                                "fuzziness": 0,
                                "max_expansions": 50,
@@ -3196,7 +3289,7 @@ mod tests {
            .query()
            .full_text_search(FullTextSearchQuery::new_query(
                MatchQuery::new("hello world".to_owned())
-                    .with_column(Some("a".to_owned()))
+                    .with_column(Some("payload.text".to_owned()))
                    .into(),
            ))
            .with_row_id()
@@ -3467,32 +3560,152 @@ mod tests {
        for (index_type, expected_body, index) in cases {
            let table = Table::new_with_handler("my_table", move |request| {
                assert_eq!(request.method(), "POST");
-                assert_eq!(request.url().path(), "/v1/table/my_table/create_index/");
-                assert_eq!(
-                    request.headers().get("Content-Type").unwrap(),
-                    JSON_CONTENT_TYPE
-                );
-                let body = request.body().unwrap().as_bytes().unwrap();
-                let body: serde_json::Value = serde_json::from_slice(body).unwrap();
-                let mut expected_body = expected_body.clone();
-                expected_body["column"] = "a".into();
-                expected_body[INDEX_TYPE_KEY] = index_type.into();
+                match request.url().path() {
+                    "/v1/table/my_table/describe/" => {
+                        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+                        http::Response::builder()
+                            .status(200)
+                            .body(describe_response(&schema))
+                            .unwrap()
+                    }
+                    "/v1/table/my_table/create_index/" => {
+                        assert_eq!(
+                            request.headers().get("Content-Type").unwrap(),
+                            JSON_CONTENT_TYPE
+                        );
+                        let body = request.body().unwrap().as_bytes().unwrap();
+                        let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+                        let mut expected_body = expected_body.clone();
+                        expected_body["column"] = "a".into();
+                        expected_body[INDEX_TYPE_KEY] = index_type.into();

-                assert_eq!(body, expected_body);
+                        assert_eq!(body, expected_body);

-                http::Response::builder().status(200).body("{}").unwrap()
+                        http::Response::builder()
+                            .status(200)
+                            .body("{}".to_string())
+                            .unwrap()
+                    }
+                    path => panic!("Unexpected path: {}", path),
+                }
            });

            table.create_index(&["a"], index).execute().await.unwrap();
        }
    }

+    #[tokio::test]
+    async fn test_create_index_nested_field_paths() {
+        let schema = nested_index_schema();
+        let expected_requests = Arc::new(vec![
+            json!({
+                "column": "metadata.user_id",
+                "index_type": "BTREE",
+            }),
+            json!({
+                "column": "image.embedding",
+                "index_type": "IVF_PQ",
+                "metric_type": "l2",
+            }),
+            {
+                let mut body = serde_json::to_value(InvertedIndexParams::default()).unwrap();
+                body["column"] = "payload.text".into();
+                body["index_type"] = "FTS".into();
+                body
+            },
+            json!({
+                "column": "`meta-data`.`user-id`",
+                "index_type": "BTREE",
+            }),
+            json!({
+                "column": "literal.`a.b`",
+                "index_type": "BTREE",
+            }),
+        ]);
+        let request_idx = Arc::new(AtomicUsize::new(0));
+        let table = Table::new_with_handler("my_table", {
+            let schema = schema.clone();
+            let expected_requests = expected_requests.clone();
+            let request_idx = request_idx.clone();
+            move |request| {
+                assert_eq!(request.method(), "POST");
+                match request.url().path() {
+                    "/v1/table/my_table/describe/" => http::Response::builder()
+                        .status(200)
+                        .body(describe_response(&schema))
+                        .unwrap(),
+                    "/v1/table/my_table/create_index/" => {
+                        assert_eq!(
+                            request.headers().get("Content-Type").unwrap(),
+                            JSON_CONTENT_TYPE
+                        );
+                        let idx = request_idx.fetch_add(1, Ordering::SeqCst);
+                        let body = request.body().unwrap().as_bytes().unwrap();
+                        let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+                        assert_eq!(body, expected_requests[idx]);
+                        http::Response::builder()
+                            .status(200)
+                            .body("{}".to_string())
+                            .unwrap()
+                    }
+                    path => panic!("Unexpected path: {}", path),
+                }
+            }
+        });
+
+        table
+            .create_index(&["Metadata.USER_ID"], Index::BTree(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["Image.Embedding"], Index::Auto)
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["Payload.Text"], Index::FTS(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["`META-DATA`.`USER-ID`"], Index::BTree(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["literal.`A.B`"], Index::BTree(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+
+        assert_eq!(request_idx.load(Ordering::SeqCst), expected_requests.len());
+    }
+
    #[tokio::test]
    async fn test_list_indices() {
-        let table = Table::new_with_handler("my_table", |request| {
+        let schema = Schema::new(vec![
+            Field::new(
+                "vector",
+                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 8),
+                false,
+            ),
+            Field::new(
+                "metadata",
+                DataType::Struct(vec![Field::new("my.column", DataType::Utf8, true)].into()),
+                false,
+            ),
+        ]);
+        let table = Table::new_with_handler("my_table", move |request| {
            assert_eq!(request.method(), "POST");

            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    return http::Response::builder()
+                        .status(200)
+                        .body(describe_response(&schema))
+                        .unwrap();
+                }
                "/v1/table/my_table/index/list/" => {
                    serde_json::json!({
                        "indexes": [
@@ -3505,7 +3718,7 @@ mod tests {
                            {
                                "index_name": "my_idx",
                                "index_uuid": "34255f64-5717-4562-b3fc-2c963f66afa6",
-                                "columns": ["my_column"],
+                                "columns": ["metadata.`my.column`"],
                                "index_status": "done",
                            },
                        ]
@@ -3544,7 +3757,7 @@ mod tests {
            IndexConfig {
                name: "my_idx".into(),
                index_type: IndexType::LabelList,
-                columns: vec!["my_column".into()],
+                columns: vec!["metadata.`my.column`".into()],
            },
        ];
        assert_eq!(indices, expected);
@@ -4012,6 +4225,20 @@ mod tests {
            assert_eq!(request.method(), "POST");

            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    let schema = Schema::new(vec![
+                        Field::new(
+                            "vector",
+                            DataType::FixedSizeList(
+                                Arc::new(Field::new("item", DataType::Float32, true)),
+                                8,
+                            ),
+                            false,
+                        ),
+                        Field::new("my_column", DataType::Utf8, false),
+                    ]);
+                    serde_json::from_str::<serde_json::Value>(&describe_response(&schema)).unwrap()
+                }
                "/v1/table/my_table/index/list/" => {
                    serde_json::json!({
                        "indexes": [
@@ -4173,13 +4400,23 @@ mod tests {
                        assert_eq!(value["index_type"], "IVF_PQ");
                    }

-                    http::Response::builder().status(200).body("").unwrap()
-                }
-                "/v1/table/dev$users/describe/" => {
-                    // Needed for schema check in Auto index type
                    http::Response::builder()
                        .status(200)
-                        .body(r#"{"version": 1, "schema": {"fields": [{"name": "embedding", "type": {"type": "list", "item": {"type": "float32"}}, "nullable": false}]}}"#)
+                        .body("".to_string())
+                        .unwrap()
+                }
+                "/v1/table/dev$users/describe/" => {
+                    let schema = Schema::new(vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, true)),
+                            8,
+                        ),
+                        false,
+                    )]);
+                    http::Response::builder()
+                        .status(200)
+                        .body(describe_response(&schema))
                        .unwrap()
                }
                _ => {
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -2688,16 +2688,13 @@ impl BaseTable for NativeTable {
                message: "Multi-column (composite) indices are not yet supported".to_string(),
            });
        }
-
-        let dataset = self.dataset.get().await?;
+        self.dataset.ensure_mutable()?;
+        let mut dataset = (*self.dataset.get().await?).clone();
        let (column, field) = Self::resolve_index_field(dataset.schema(), &opts.columns[0])?;
-        drop(dataset);

        let lance_idx_params = self.make_index_params(&field, opts.index.clone()).await?;
        let index_type = self.get_index_type_for_field(&field, &opts.index);
        let columns = [column.as_str()];
-        self.dataset.ensure_mutable()?;
-        let mut dataset = (*self.dataset.get().await?).clone();
        let mut builder = dataset
            .create_index_builder(&columns, index_type, lance_idx_params.as_ref())
            .train(opts.train)
@@ -2815,63 +2812,88 @@ impl BaseTable for NativeTable {
    async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
        let dataset = self.dataset.get().await?;
        let indices = dataset.load_indices().await?;
-        let results = futures::stream::iter(indices.as_slice()).then(|idx| async {
-
-            // skip Lance internal indexes
-            if idx.name == FRAG_REUSE_INDEX_NAME {
-                return None;
-            }
-
-            let stats = match dataset.index_statistics(idx.name.as_str()).await {
-                Ok(stats) => stats,
-                Err(e) => {
-                    log::warn!("Failed to get statistics for index {} ({}): {}", idx.name, idx.uuid, e);
+        let results = futures::stream::iter(indices.as_slice())
+            .then(|idx| async {
+                // skip Lance internal indexes
+                if idx.name == FRAG_REUSE_INDEX_NAME {
                    return None;
                }
-            };

-            let stats: serde_json::Value = match serde_json::from_str(&stats) {
-                Ok(stats) => stats,
-                Err(e) => {
-                    log::warn!("Failed to deserialize index statistics for index {} ({}): {}", idx.name, idx.uuid, e);
-                    return None;
-                }
-            };
-
-            let Some(index_type) = stats.get("index_type").and_then(|v| v.as_str()) else {
-                log::warn!("Index statistics was missing 'index_type' field for index {} ({})", idx.name, idx.uuid);
-                return None;
-            };
-
-            let index_type: crate::index::IndexType = match index_type.parse() {
-                Ok(index_type) => index_type,
-                Err(e) => {
-                    log::warn!("Failed to parse index type for index {} ({}): {}", idx.name, idx.uuid, e);
-                    return None;
-                }
-            };
-
-            let mut columns = Vec::with_capacity(idx.fields.len());
-            for field_id in &idx.fields {
-                let column = match dataset.schema().field_path(*field_id) {
-                    Ok(column) => column,
+                let stats = match dataset.index_statistics(idx.name.as_str()).await {
+                    Ok(stats) => stats,
                    Err(e) => {
                        log::warn!(
-                            "The index {} ({}) referenced a field with id {} which does not exist in the schema: {}",
+                            "Failed to get statistics for index {} ({}): {}",
                            idx.name,
                            idx.uuid,
-                            field_id,
                            e
                        );
                        return None;
                    }
                };
-                columns.push(column);
-            }

-            let name = idx.name.clone();
-            Some(IndexConfig { index_type, columns, name })
-        }).collect::<Vec<_>>().await;
+                let stats: serde_json::Value = match serde_json::from_str(&stats) {
+                    Ok(stats) => stats,
+                    Err(e) => {
+                        log::warn!(
+                            "Failed to deserialize index statistics for index {} ({}): {}",
+                            idx.name,
+                            idx.uuid,
+                            e
+                        );
+                        return None;
+                    }
+                };
+
+                let Some(index_type) = stats.get("index_type").and_then(|v| v.as_str()) else {
+                    log::warn!(
+                        "Index statistics was missing 'index_type' field for index {} ({})",
+                        idx.name,
+                        idx.uuid
+                    );
+                    return None;
+                };
+
+                let index_type: crate::index::IndexType = match index_type.parse() {
+                    Ok(index_type) => index_type,
+                    Err(e) => {
+                        log::warn!(
+                            "Failed to parse index type for index {} ({}): {}",
+                            idx.name,
+                            idx.uuid,
+                            e
+                        );
+                        return None;
+                    }
+                };
+
+                let mut columns = Vec::with_capacity(idx.fields.len());
+                for field_id in &idx.fields {
+                    let field_path = match dataset.schema().field_path(*field_id) {
+                        Ok(field_path) => field_path,
+                        Err(e) => {
+                            log::warn!(
+                                "Failed to resolve field path for index {} ({}) field id {}: {}",
+                                idx.name,
+                                idx.uuid,
+                                field_id,
+                                e
+                            );
+                            return None;
+                        }
+                    };
+                    columns.push(field_path);
+                }
+
+                let name = idx.name.clone();
+                Some(IndexConfig {
+                    index_type,
+                    columns,
+                    name,
+                })
+            })
+            .collect::<Vec<_>>()
+            .await;

        Ok(results.into_iter().flatten().collect())
    }
@@ -3074,6 +3096,7 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
+    use std::collections::HashMap;
    use std::sync::Arc;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::time::Duration;
@@ -3854,6 +3877,25 @@ mod tests {
            1
        );

+        let default_vector_results = table
+            .query()
+            .nearest_to(&[0.0; 8])
+            .unwrap()
+            .limit(1)
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(
+            default_vector_results
+                .iter()
+                .map(|batch| batch.num_rows())
+                .sum::<usize>(),
+            1
+        );
+
        let fts_results = table
            .query()
            .full_text_search(FullTextSearchQuery::new("document".to_string()))
--- a/rust/lancedb/src/utils/mod.rs
+++ b/rust/lancedb/src/utils/mod.rs
@@ -6,7 +6,7 @@ pub(crate) mod background_cache;
 use std::sync::Arc;

 use arrow_array::RecordBatch;
-use arrow_schema::{DataType, Schema, SchemaRef};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::{DataFusionError, Result as DataFusionResult};
 use datafusion_execution::RecordBatchStream;
 use futures::{FutureExt, Stream};
@@ -152,14 +152,10 @@ pub fn validate_namespace(namespace: &[String]) -> Result<()> {
 /// Find one default column to create index or perform vector query.
 pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
    // Try to find a vector column.
-    let candidates = schema
-        .fields()
-        .iter()
-        .filter_map(|field| match infer_vector_dim(field.data_type()) {
-            Ok(d) if dim.is_none() || dim == Some(d as i32) => Some(field.name()),
-            _ => None,
-        })
-        .collect::<Vec<_>>();
+    let mut candidates = Vec::new();
+    for field in schema.fields() {
+        collect_vector_columns(field, &mut Vec::new(), dim, &mut candidates);
+    }
    if candidates.is_empty() {
        Err(Error::InvalidInput {
            message: format!(
@@ -180,6 +176,57 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
    }
 }

+fn collect_vector_columns(
+    field: &Field,
+    path: &mut Vec<String>,
+    dim: Option<i32>,
+    candidates: &mut Vec<String>,
+) {
+    path.push(field.name().clone());
+    match infer_vector_dim(field.data_type()) {
+        Ok(d) if dim.is_none() || dim == Some(d as i32) => {
+            let path_segments = path.iter().map(String::as_str).collect::<Vec<_>>();
+            candidates.push(lance_core::datatypes::format_field_path(&path_segments));
+        }
+        _ => {
+            if let DataType::Struct(fields) = field.data_type() {
+                for child in fields {
+                    collect_vector_columns(child, path, dim, candidates);
+                }
+            }
+        }
+    }
+    path.pop();
+}
+
+pub(crate) fn resolve_arrow_field_path(schema: &Schema, column: &str) -> Result<(String, Field)> {
+    lance_core::datatypes::parse_field_path(column).map_err(|e| Error::InvalidInput {
+        message: format!("Invalid field path `{}`: {}", column, e),
+    })?;
+
+    let lance_schema =
+        lance_core::datatypes::Schema::try_from(schema).map_err(|e| Error::Schema {
+            message: format!("Invalid schema: {}", e),
+        })?;
+    let field_path = lance_schema
+        .resolve_case_insensitive(column)
+        .ok_or_else(|| Error::Schema {
+            message: format!(
+                "Field path `{}` not found in schema. Available field paths: {}",
+                column,
+                lance_schema.field_paths().join(", ")
+            ),
+        })?;
+    let field = field_path.last().expect("field path should be non-empty");
+    let path_segments = field_path
+        .iter()
+        .map(|field| field.name.as_str())
+        .collect::<Vec<_>>();
+    let canonical_path = lance_core::datatypes::format_field_path(&path_segments);
+
+    Ok((canonical_path, Field::from(*field)))
+}
+
 pub fn supported_btree_data_type(dtype: &DataType) -> bool {
    dtype.is_integer()
        || dtype.is_floating()
@@ -450,6 +497,49 @@ mod tests {
            "vec"
        );

+        let schema_with_nested_vec_col = Schema::new(vec![
+            Field::new("id", DataType::Int16, true),
+            Field::new(
+                "image",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            10,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+        ]);
+        assert_eq!(
+            default_vector_column(&schema_with_nested_vec_col, None).unwrap(),
+            "image.embedding"
+        );
+
+        let schema_with_escaped_nested_vec_col = Schema::new(vec![Field::new(
+            "image-meta",
+            DataType::Struct(
+                vec![Field::new(
+                    "embedding.v1",
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::Float32, false)),
+                        10,
+                    ),
+                    false,
+                )]
+                .into(),
+            ),
+            false,
+        )]);
+        assert_eq!(
+            default_vector_column(&schema_with_escaped_nested_vec_col, None).unwrap(),
+            "`image-meta`.`embedding.v1`"
+        );
+
        let multi_vec_col = Schema::new(vec![
            Field::new("id", DataType::Int16, true),
            Field::new(
@@ -469,6 +559,48 @@ mod tests {
                .to_string()
                .contains("More than one")
        );
+
+        let multi_nested_vec_col = Schema::new(vec![
+            Field::new(
+                "image",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            10,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+            Field::new(
+                "text",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            50,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+        ]);
+        assert_eq!(
+            default_vector_column(&multi_nested_vec_col, Some(50)).unwrap(),
+            "text.embedding"
+        );
+        let err = default_vector_column(&multi_nested_vec_col, None)
+            .unwrap_err()
+            .to_string();
+        assert!(err.contains("image.embedding"));
+        assert!(err.contains("text.embedding"));
    }

    #[test]
Author	SHA1	Message	Date
lancedb automation	b6310ed905	chore: update lance dependency to v7.1.0-beta.2	2026-05-23 01:45:38 +00:00
Will Jones	ccec91d957	fix: use releases API in check_lance_release.py (#3427 ) Previously `check_lance_release.py` used `git/refs/tags` with `--paginate --jq`, which drops the last page in some `gh` versions. The 7.x Lance tags all landed on the final (partial) page, causing the script to report `v6.0.1` as the latest and never triggering an update. Switch to the releases API with `per_page=20`, which returns the 20 most recent releases sorted newest-first — one API call, no pagination needed. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-22 15:00:44 -07:00
Zhaocun Sun	ec82e36317	docs(python): document in-memory connections (#3434 ) ## Problem Issue #2247 notes that the Python docs do not show how to use LanceDB's in-memory backend via `connect("memory://")`. ## Solution Add `memory://` examples to the sync and async `connect` docstrings, and call out that in-memory databases are intended for tests/temporary data and are not persisted. ## Validation - `python3 -m py_compile python/python/lancedb/__init__.py` - `git diff --check` ## Confidence 82/100 — docs-only update, directly tied to the documented missing `memory://` usage. It changes API documentation only and was syntax/diff validated. Closes #2247.	2026-05-22 10:51:09 -07:00
Will Jones	da2a1c4a2c	test(rust): fix flaky env-var-dependent client tests (#3426 ) The `test_resolve_user_id_*` tests in `remote/client.rs` mutate the process-global `LANCEDB_USER_ID` and `LANCEDB_USER_ID_ENV_KEY` environment variables. cargo runs tests in a binary across multiple threads, so one test's `remove_var` can race another's `set_var` between when it's set and when `resolve_user_id()` reads it. This surfaced as an intermittent failure of `test_resolve_user_id_from_env_key` on Windows CI: ``` assertion `left == right` failed left: None right: Some("custom-env-user-id") ``` Annotates the five env-mutating tests with `serial_test`'s `#[serial(user_id_env)]` so they run serially with respect to each other. Should be backported to `release/v0.28` (CI for #3421 hit this same flake). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-22 10:35:15 -07:00
Xuanwo	8463a10ebe	docs: clarify PR title requirement for agents (#3433 )	2026-05-22 20:09:20 +08:00
Lance Release	7168d64af1	Bump version: 0.30.0-beta.0 → 0.30.0-beta.1	2026-05-22 10:09:01 +00:00
Lance Release	403c33dff0	Bump version: 0.33.0-beta.0 → 0.33.0-beta.1	2026-05-22 10:08:07 +00:00
Xuanwo	a0001043b6	fix: canonicalize remote nested field paths (#3430 ) Fixes #3407. Remote tables now resolve create-index field paths against the table schema before sending requests, so nested, escaped, and case-insensitive inputs use the same canonical path contract as local tables. Remote `list_indices()` also canonicalizes returned columns against the current schema, and the remote query tests lock explicit nested vector and FTS request payloads.	2026-05-22 15:23:00 +08:00
Lance Release	1bb7acb74f	Bump version: 0.29.1-beta.0 → 0.30.0-beta.0	2026-05-21 21:36:18 +00:00
Lance Release	4ce175276c	Bump version: 0.32.1-beta.0 → 0.33.0-beta.0	2026-05-21 21:35:22 +00:00
Justin Miller	4bccb43e56	fix(python): route sync BaseQueryBuilder.to_batches through async path (#3425 ) ## Summary Fixes #3424. `LanceTakeQueryBuilder.to_batches()` raised `AttributeError: 'AsyncTakeQuery' object has no attribute 'execute'`. The inherited `BaseQueryBuilder.to_batches` called `self._inner.execute(...)`, but `self._inner` is an `AsyncQueryBase` (Python wrapper) — only its native inner exposes `execute`. Every other sync builder overrides `to_batches`, so the bug only surfaced on take-query builders, which inherit the base unchanged. `take_offsets(...).to_batches()` is broken for the same reason. Route the sync wrapper through the async `to_batches` on the background event loop, so the native `execute` is invoked from inside an awaiting context (matching how the async path works correctly). ## Repro ```python import lancedb, pyarrow as pa, tempfile db = lancedb.connect(tempfile.mkdtemp()) tbl = db.create_table("t", data=pa.table({"a": list(range(100))})) tbl.take_row_ids([0, 1, 2]).to_arrow() # works tbl.search().to_batches() # works list(tbl.take_row_ids([0, 1, 2]).to_batches()) # AttributeError (before) ``` ## Test plan - [x] New regression test `test_take_queries_to_batches` covers `take_offsets(...).to_batches()`, `take_row_ids(...).to_batches()`, and the `select(...)` projection — all fail on `main` with the patch reverted, all pass with the fix. - [x] `test_take_queries`, `test_query_builder_batches`, and `test_query_schema` still pass. - [x] `ruff format --check` and `ruff check` clean on changed files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-21 12:11:13 -07:00
Xuanwo	d5dc4c0f06	fix: discover nested vector columns by default (#3423 ) LanceDB default vector column discovery only considered top-level fields, so tables with a single nested vector leaf still required users to pass an explicit field path. This updates Rust and Python discovery to recurse into struct fields, return canonical field paths, and preserve actionable errors when no default or multiple defaults exist. The explicit nested path flow for index creation and search remains supported across Rust, Python, and Node, with regression coverage for single nested vector leaves, multiple candidate leaves, and schemas without vector leaves. Closes #3405.	2026-05-21 19:02:41 +08:00
Sean Mackrory	55ae6197c1	fix(python): drop version from Table __repr__ (#3411 ) There have been a couple of reports of this function freezing debuggers because it triggers a network round-trip but is assumed to be extremely light-weight: https://github.com/lancedb/lancedb/discussions/2853. We'll just cache the last version we see. I considered digging into see if we could assume or get the version at create time or after other operations, but that could be a bit of a rabbit hole as I'm a bit unfamiliar with this. Claude was having a hard time of it too 😅 I propose we see how the currently implementation goes and improve it if people find "unknown" or stale values coming up disruptively often before improving this further. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-20 12:20:46 -07:00
Pragnyan Ramtha	15bd821825	fix(python): check all table pages for db membership (#3395 ) ## Summary - Fix `name in db` and `len(db)` for local Python connections with more than one page of tables. - Use `list_tables()` pagination instead of deprecated `table_names()` with its default 10-item page. - Add regression coverage with 20 tables so later pages are included. Fixes #2727. ## Validation - `python3 -m py_compile python/python/lancedb/db.py python/python/tests/test_db.py` - No-build Python harness that extracts and executes the edited `LanceDBConnection` pagination methods: passed - `uvx ruff check python/python/lancedb/db.py python/python/tests/test_db.py` - `uvx ruff format --check python/python/lancedb/db.py python/python/tests/test_db.py` Note: `uv run pytest python/tests/test_db.py::test_db_contains_and_len_include_all_table_name_pages -q` was attempted first, but it stayed in the broad Rust/PyO3 native extension build and was stopped before pytest started.	2026-05-20 10:31:10 -07:00
Xuanwo	cf162c8a10	test(python): cover nested FTS field paths (#3418 ) Adds regression coverage for Python FTS APIs targeting nested text leaves, including sync and async match, phrase, and hybrid query paths. This also locks in the intended error boundary: nested text leaf paths are valid, while struct containers, non-text leaves, and missing paths remain rejected. Fixes #3404.	2026-05-21 00:49:00 +08:00
Xuanwo	2eba7ebd02	fix: return canonical nested index paths (#3413 ) Index metadata APIs now resolve stored field ids back to Lance canonical field paths instead of leaf names, so nested indexes such as `metadata.user_id` and escaped literal-dot fields round-trip through `list_indices()`. Native index creation also canonicalizes the input path before handing it to Lance, keeping local metadata consistent with the field-path contract while remote responses continue to expose server-provided canonical columns. Fixes #3403.	2026-05-21 00:20:47 +08:00