mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-01 11:20:44 +00:00
Compare commits
36 Commits
python-v0.
...
rust-neste
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f54f5600ad | ||
|
|
e34fe84c7f | ||
|
|
5b1f248257 | ||
|
|
95e34d47b9 | ||
|
|
a0defd448f | ||
|
|
0fadb65153 | ||
|
|
15fbcf61fc | ||
|
|
d715bbb588 | ||
|
|
5ce3d8d141 | ||
|
|
5eaac178b1 | ||
|
|
11af763fcd | ||
|
|
2ed5452e1c | ||
|
|
b7c0b5987c | ||
|
|
97a4b38f19 | ||
|
|
10879d99b8 | ||
|
|
4e6a1d5dce | ||
|
|
13d2759356 | ||
|
|
7f52ec8c36 | ||
|
|
c6ae0de3ee | ||
|
|
231f0655ce | ||
|
|
8c52977c59 | ||
|
|
359710a0bf | ||
|
|
1f1726369d | ||
|
|
df354abae4 | ||
|
|
11bc674548 | ||
|
|
5593460823 | ||
|
|
2807ad6854 | ||
|
|
4761fa9bcb | ||
|
|
4c2939d66e | ||
|
|
a813ce2f71 | ||
|
|
a898dc81c2 | ||
|
|
de3f8097e7 | ||
|
|
0ac59de5f1 | ||
|
|
d082c2d2ac | ||
|
|
9d8699f99e | ||
|
|
aa2c7b3591 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.2"
|
||||
current_version = "0.28.0-beta.7"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://lancedb.com/docs/tables/...
|
||||
placeholder: ex. https://docs.lancedb.com/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
1
.github/workflows/rust.yml
vendored
1
.github/workflows/rust.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
|
||||
389
Cargo.lock
generated
389
Cargo.lock
generated
@@ -137,15 +137,6 @@ dependencies = [
|
||||
"object",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
|
||||
dependencies = [
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "argminmax"
|
||||
version = "0.6.3"
|
||||
@@ -302,7 +293,7 @@ dependencies = [
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"flatbuffers",
|
||||
"lz4_flex 0.12.1",
|
||||
"lz4_flex",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
@@ -1278,31 +1269,6 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon"
|
||||
version = "3.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
|
||||
dependencies = [
|
||||
"bon-macros",
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon-macros"
|
||||
version = "3.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
|
||||
dependencies = [
|
||||
"darling 0.20.11",
|
||||
"ident_case",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.5.0"
|
||||
@@ -1472,12 +1438,6 @@ dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "census"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
@@ -1795,7 +1755,7 @@ dependencies = [
|
||||
"crossterm_winapi",
|
||||
"document-features",
|
||||
"parking_lot",
|
||||
"rustix 1.1.4",
|
||||
"rustix",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
@@ -2714,12 +2674,6 @@ dependencies = [
|
||||
"litrs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "downcast-rs"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc"
|
||||
|
||||
[[package]]
|
||||
name = "dtor"
|
||||
version = "0.1.1"
|
||||
@@ -2955,12 +2909,6 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
|
||||
|
||||
[[package]]
|
||||
name = "fastdivide"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
@@ -3054,16 +3002,6 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs4"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8"
|
||||
dependencies = [
|
||||
"rustix 0.38.44",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.3.0"
|
||||
@@ -3072,8 +3010,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
@@ -3560,12 +3498,6 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmlescape"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.12"
|
||||
@@ -4134,13 +4066,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
@@ -4177,12 +4110,14 @@ dependencies = [
|
||||
"lance-linalg",
|
||||
"lance-namespace",
|
||||
"lance-table",
|
||||
"lance-tokenizer",
|
||||
"log",
|
||||
"moka",
|
||||
"object_store",
|
||||
"permutation",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"prost-types",
|
||||
"rand 0.9.2",
|
||||
"roaring",
|
||||
@@ -4190,7 +4125,6 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.9.0",
|
||||
"tantivy",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
@@ -4201,13 +4135,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4222,8 +4157,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4232,8 +4167,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4270,12 +4205,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4301,8 +4237,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4320,8 +4256,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4358,8 +4294,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4391,8 +4327,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4429,6 +4365,7 @@ dependencies = [
|
||||
"lance-io",
|
||||
"lance-linalg",
|
||||
"lance-table",
|
||||
"lance-tokenizer",
|
||||
"libm",
|
||||
"log",
|
||||
"ndarray",
|
||||
@@ -4446,7 +4383,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"snafu 0.9.0",
|
||||
"tantivy",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@@ -4456,8 +4392,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4501,8 +4437,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4518,8 +4454,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4532,8 +4468,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4578,8 +4514,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4618,8 +4554,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4628,9 +4564,19 @@ dependencies = [
|
||||
"rand 0.9.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lance-tokenizer"
|
||||
version = "6.0.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3"
|
||||
dependencies = [
|
||||
"rust-stemmers",
|
||||
"serde",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.7"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4712,7 +4658,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.7"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4734,7 +4680,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.2"
|
||||
version = "0.31.0-beta.7"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4772,12 +4718,6 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
|
||||
|
||||
[[package]]
|
||||
name = "lexical-core"
|
||||
version = "1.0.6"
|
||||
@@ -4866,12 +4806,6 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.12.1"
|
||||
@@ -4952,12 +4886,6 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.11.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a"
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.12.1"
|
||||
@@ -5032,15 +4960,6 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "measure_time"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
@@ -5194,12 +5113,6 @@ dependencies = [
|
||||
"target-features",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "murmurhash32"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b"
|
||||
|
||||
[[package]]
|
||||
name = "napi"
|
||||
version = "3.8.3"
|
||||
@@ -5488,12 +5401,6 @@ version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "oneshot"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107"
|
||||
|
||||
[[package]]
|
||||
name = "onig"
|
||||
version = "6.5.1"
|
||||
@@ -5583,15 +5490,6 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
|
||||
|
||||
[[package]]
|
||||
name = "ownedbytes"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "p256"
|
||||
version = "0.11.1"
|
||||
@@ -7046,19 +6944,6 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.15",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.1.4"
|
||||
@@ -7068,7 +6953,7 @@ dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.12.1",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
@@ -7533,15 +7418,6 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.12"
|
||||
@@ -7914,152 +7790,6 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.24.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arc-swap",
|
||||
"base64 0.22.1",
|
||||
"bitpacking",
|
||||
"bon",
|
||||
"byteorder",
|
||||
"census",
|
||||
"crc32fast",
|
||||
"crossbeam-channel",
|
||||
"downcast-rs",
|
||||
"fastdivide",
|
||||
"fnv",
|
||||
"fs4",
|
||||
"htmlescape",
|
||||
"hyperloglogplus",
|
||||
"itertools 0.14.0",
|
||||
"levenshtein_automata",
|
||||
"log",
|
||||
"lru",
|
||||
"lz4_flex 0.11.6",
|
||||
"measure_time",
|
||||
"memmap2 0.9.10",
|
||||
"once_cell",
|
||||
"oneshot",
|
||||
"rayon",
|
||||
"regex",
|
||||
"rust-stemmers",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sketches-ddsketch",
|
||||
"smallvec",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-columnar",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"tantivy-query-grammar",
|
||||
"tantivy-stacker",
|
||||
"tantivy-tokenizer-api",
|
||||
"tempfile",
|
||||
"thiserror 2.0.18",
|
||||
"time",
|
||||
"uuid",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494"
|
||||
dependencies = [
|
||||
"bitpacking",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344"
|
||||
dependencies = [
|
||||
"downcast-rs",
|
||||
"fastdivide",
|
||||
"itertools 0.14.0",
|
||||
"serde",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-sstable",
|
||||
"tantivy-stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-common"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"ownedbytes",
|
||||
"serde",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-fst"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"regex-syntax",
|
||||
"utf8-ranges",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a"
|
||||
dependencies = [
|
||||
"nom 7.1.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"itertools 0.14.0",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1"
|
||||
dependencies = [
|
||||
"murmurhash32",
|
||||
"rand_distr 0.4.3",
|
||||
"tantivy-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-tokenizer-api"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tap"
|
||||
version = "1.0.1"
|
||||
@@ -8087,7 +7817,7 @@ dependencies = [
|
||||
"fastrand",
|
||||
"getrandom 0.4.2",
|
||||
"once_cell",
|
||||
"rustix 1.1.4",
|
||||
"rustix",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
@@ -8539,6 +8269,15 @@ version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization-alignments"
|
||||
version = "0.1.12"
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
|
||||
|
||||
## **How to Install**:
|
||||
|
||||
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
|
||||
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
|
||||
|
||||
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
|
||||
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
|
||||
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.27.2</version>
|
||||
<version>0.28.0-beta.7</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -53,3 +53,18 @@ optional tlsConfig: TlsConfig;
|
||||
```ts
|
||||
optional userAgent: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### userId?
|
||||
|
||||
```ts
|
||||
optional userId: string;
|
||||
```
|
||||
|
||||
User identifier for tracking purposes.
|
||||
|
||||
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
variable that contains the user ID value.
|
||||
|
||||
@@ -89,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-final.0</version>
|
||||
<version>0.28.0-beta.7</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-final.0</version>
|
||||
<version>0.28.0-beta.7</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>5.0.0-beta.4</lance-core.version>
|
||||
<lance-core.version>6.0.0-beta.1</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.7"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
|
||||
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.7",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -35,7 +35,7 @@ pub struct ConnectionOptions {
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// (For LanceDB OSS only): configuration for object storage.
|
||||
///
|
||||
/// The available options are described at https://lancedb.com/docs/storage/
|
||||
/// The available options are described at https://docs.lancedb.com/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
|
||||
@@ -92,6 +92,13 @@ pub struct ClientConfig {
|
||||
pub extra_headers: Option<HashMap<String, String>>,
|
||||
pub id_delimiter: Option<String>,
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
/// User identifier for tracking purposes.
|
||||
///
|
||||
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
/// variable that contains the user ID value.
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
|
||||
@@ -145,6 +152,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: config.id_delimiter,
|
||||
tls_config: config.tls_config.map(Into::into),
|
||||
header_provider: None, // the header provider is set separately later
|
||||
user_id: config.user_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.0"
|
||||
current_version = "0.31.0-beta.7"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.0"
|
||||
version = "0.31.0-beta.7"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=5.0.0b3",
|
||||
"pylance>=5.0.0b5",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp>=3.9.0",
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy>=0.20.0",
|
||||
"pyarrow-stubs>=16.0",
|
||||
"pylance>=5.0.0b3",
|
||||
"pylance>=5.0.0b5",
|
||||
"requests>=2.31.0",
|
||||
"datafusion>=52,<53",
|
||||
]
|
||||
@@ -83,7 +83,7 @@ embeddings = [
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google.generativeai>=0.3.0",
|
||||
"google-genai>=1.0.0",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.44.38",
|
||||
"botocore>=1.31.57",
|
||||
|
||||
@@ -110,7 +110,7 @@ def connect(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -215,6 +215,85 @@ def connect(
|
||||
)
|
||||
|
||||
|
||||
WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
|
||||
|
||||
|
||||
def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
|
||||
"""Apply worker property overrides.
|
||||
|
||||
Any key starting with ``_lancedb_worker_`` is extracted, the prefix
|
||||
is stripped, and the resulting key-value pair is put back into the
|
||||
map (overriding the existing value if present). The original
|
||||
prefixed key is removed.
|
||||
"""
|
||||
worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
|
||||
if not worker_keys:
|
||||
return props
|
||||
result = dict(props)
|
||||
for key in worker_keys:
|
||||
value = result.pop(key)
|
||||
real_key = key[len(WORKER_PROPERTY_PREFIX) :]
|
||||
result[real_key] = value
|
||||
return result
|
||||
|
||||
|
||||
def deserialize_conn(
|
||||
data: str,
|
||||
*,
|
||||
for_worker: bool = False,
|
||||
) -> DBConnection:
|
||||
"""Reconstruct a DBConnection from a serialized string.
|
||||
|
||||
The string must have been produced by
|
||||
:meth:`DBConnection.serialize`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : str
|
||||
String produced by ``serialize()``.
|
||||
for_worker : bool, default False
|
||||
When ``True``, any namespace client property whose key starts
|
||||
with ``_lancedb_worker_`` has that prefix stripped and the
|
||||
value overrides the corresponding property. For example,
|
||||
``_lancedb_worker_uri`` replaces ``uri``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DBConnection
|
||||
A new connection matching the serialized state.
|
||||
"""
|
||||
import json
|
||||
|
||||
parsed = json.loads(data)
|
||||
connection_type = parsed.get("connection_type")
|
||||
|
||||
rci_secs = parsed.get("read_consistency_interval_seconds")
|
||||
rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
|
||||
storage_options = parsed.get("storage_options")
|
||||
|
||||
if connection_type == "namespace":
|
||||
props = dict(parsed.get("namespace_client_properties") or {})
|
||||
if for_worker:
|
||||
props = _apply_worker_overrides(props)
|
||||
return connect_namespace(
|
||||
namespace_client_impl=parsed["namespace_client_impl"],
|
||||
namespace_client_properties=props,
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
namespace_client_pushdown_operations=parsed.get(
|
||||
"namespace_client_pushdown_operations"
|
||||
),
|
||||
)
|
||||
elif connection_type == "local":
|
||||
return LanceDBConnection(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
|
||||
|
||||
async def connect_async(
|
||||
uri: URI,
|
||||
*,
|
||||
@@ -257,7 +336,7 @@ async def connect_async(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
|
||||
@@ -151,6 +151,9 @@ class Connection(object):
|
||||
async def drop_all_tables(
|
||||
self, namespace_path: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def namespace_client_config(
|
||||
self,
|
||||
) -> Dict[str, Any]: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
|
||||
@@ -96,7 +96,7 @@ def data_to_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -23,11 +23,13 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
||||
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||
from lancedb.background_loop import LOOP
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
connect as namespace_connect,
|
||||
)
|
||||
|
||||
from . import __version__
|
||||
@@ -280,7 +282,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -431,7 +433,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -507,6 +509,39 @@ class DBConnection(EnforceOverrides):
|
||||
def uri(self) -> str:
|
||||
return self._uri
|
||||
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
For native storage connections, this returns a DirectoryNamespace
|
||||
pointing to the same root with the same storage options.
|
||||
|
||||
For namespace connections, this returns the backing namespace client.
|
||||
|
||||
For enterprise (remote) connections, this returns a RestNamespace
|
||||
with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"namespace_client is not supported for this connection type"
|
||||
)
|
||||
|
||||
def serialize(self) -> str:
|
||||
"""Serialize this connection for reconstruction.
|
||||
|
||||
The returned string can be passed to :func:`lancedb.deserialize_conn`
|
||||
to recreate an equivalent connection, e.g. in a remote worker.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Serialized representation of this connection.
|
||||
"""
|
||||
raise NotImplementedError("serialize is not supported for this connection type")
|
||||
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
@@ -559,6 +594,7 @@ class LanceDBConnection(DBConnection):
|
||||
):
|
||||
if _inner is not None:
|
||||
self._conn = _inner
|
||||
self._cached_namespace_client = None
|
||||
return
|
||||
|
||||
if not isinstance(uri, Path):
|
||||
@@ -606,6 +642,7 @@ class LanceDBConnection(DBConnection):
|
||||
# beyond _conn.
|
||||
self.storage_options = storage_options
|
||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||
self._cached_namespace_client: Optional[LanceNamespace] = None
|
||||
|
||||
@property
|
||||
def read_consistency_interval(self) -> Optional[timedelta]:
|
||||
@@ -630,6 +667,22 @@ class LanceDBConnection(DBConnection):
|
||||
val += ")"
|
||||
return val
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
rci = self.read_consistency_interval
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "local",
|
||||
"uri": self.uri,
|
||||
"storage_options": self.storage_options,
|
||||
"read_consistency_interval_seconds": (
|
||||
rci.total_seconds() if rci else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
async def _async_get_table_names(self, start_after: Optional[str], limit: int):
|
||||
conn = AsyncConnection(await lancedb_connect(self.uri))
|
||||
return await conn.table_names(start_after=start_after, limit=limit)
|
||||
@@ -665,10 +718,10 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
return self._namespace_conn().list_namespaces(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -678,27 +731,10 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
)
|
||||
return self._namespace_conn().create_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
properties=properties,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -708,46 +744,19 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
return self._namespace_conn().drop_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
behavior=behavior,
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
return self._namespace_conn().describe_namespace(
|
||||
namespace_path=namespace_path,
|
||||
)
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
@@ -776,6 +785,12 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
return self._namespace_conn().list_tables(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
)
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
@@ -864,6 +879,22 @@ class LanceDBConnection(DBConnection):
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().create_table(
|
||||
name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
data_storage_version=data_storage_version,
|
||||
enable_v2_manifest_paths=enable_v2_manifest_paths,
|
||||
)
|
||||
|
||||
tbl = LanceTable.create(
|
||||
self,
|
||||
name,
|
||||
@@ -879,6 +910,19 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
return tbl
|
||||
|
||||
def _namespace_conn(self) -> DBConnection:
|
||||
"""Return a LanceNamespaceDBConnection backed by this connection's
|
||||
directory namespace. Used to delegate child-namespace operations."""
|
||||
from lancedb.namespace import LanceNamespaceDBConnection
|
||||
|
||||
return LanceNamespaceDBConnection(
|
||||
self.namespace_client(),
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=self.storage_options,
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
)
|
||||
|
||||
@override
|
||||
def open_table(
|
||||
self,
|
||||
@@ -895,7 +939,8 @@ class LanceDBConnection(DBConnection):
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
The namespace to open the table from. When non-empty, the
|
||||
table is resolved through the directory namespace client.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -914,6 +959,14 @@ class LanceDBConnection(DBConnection):
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
@@ -998,6 +1051,9 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
self._namespace_conn().drop_table(name, namespace_path=namespace_path)
|
||||
return
|
||||
LOOP.run(
|
||||
self._conn.drop_table(
|
||||
name, namespace_path=namespace_path, ignore_missing=ignore_missing
|
||||
@@ -1044,6 +1100,23 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a DirectoryNamespace pointing to the same root with the
|
||||
same storage options. The result is cached for the lifetime of
|
||||
this connection.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
if self._cached_namespace_client is None:
|
||||
self._cached_namespace_client = LOOP.run(self._conn.namespace_client())
|
||||
return self._cached_namespace_client
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
removed_in="0.17",
|
||||
@@ -1317,6 +1390,7 @@ class AsyncConnection(object):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
) -> AsyncTable:
|
||||
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
|
||||
|
||||
@@ -1361,7 +1435,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -1514,6 +1588,7 @@ class AsyncConnection(object):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
else:
|
||||
data = data_to_reader(data, schema)
|
||||
@@ -1524,6 +1599,7 @@ class AsyncConnection(object):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
|
||||
return AsyncTable(new_table)
|
||||
@@ -1552,7 +1628,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
index_cache_size: int, default 256
|
||||
**Deprecated**: Use session-level cache configuration instead.
|
||||
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||
@@ -1716,6 +1792,25 @@ class AsyncConnection(object):
|
||||
namespace_path = []
|
||||
await self._inner.drop_all_tables(namespace_path=namespace_path)
|
||||
|
||||
async def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
For native storage connections, this returns a DirectoryNamespace
|
||||
pointing to the same root with the same storage options.
|
||||
|
||||
For namespace connections, this returns the backing namespace client.
|
||||
|
||||
For enterprise (remote) connections, this returns a RestNamespace
|
||||
with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
config = await self._inner.namespace_client_config()
|
||||
return namespace_connect(config["impl"], config["properties"])
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
removed_in="0.17",
|
||||
|
||||
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
|
||||
@register("gemini-text")
|
||||
class GeminiText(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
be set.
|
||||
|
||||
https://ai.google.dev/docs/embeddings_guide
|
||||
https://ai.google.dev/gemini-api/docs/embeddings
|
||||
|
||||
Supports various tasks types:
|
||||
| Task Type | Description |
|
||||
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "models/embedding-001"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
name: str, default "gemini-embedding-001"
|
||||
The name of the model to use. Supported models include:
|
||||
- "gemini-embedding-001" (768 dimensions)
|
||||
|
||||
Note: The legacy "models/embedding-001" format is also supported but
|
||||
"gemini-embedding-001" is recommended.
|
||||
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "models/embedding-001"
|
||||
name: str = "gemini-embedding-001"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
|
||||
@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
"""
|
||||
if (
|
||||
kwargs.get("task_type") == "retrieval_document"
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
from google.genai import types
|
||||
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
"embedding"
|
||||
]
|
||||
for text in texts
|
||||
]
|
||||
task_type = kwargs.get("task_type")
|
||||
|
||||
# Build content objects for embed_content
|
||||
contents = []
|
||||
for text in texts:
|
||||
if task_type == "retrieval_document":
|
||||
# Provide a title for retrieval_document task
|
||||
contents.append(
|
||||
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
|
||||
)
|
||||
else:
|
||||
contents.append({"parts": [{"text": text}]})
|
||||
|
||||
# Build config
|
||||
config_kwargs = {}
|
||||
if task_type:
|
||||
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
|
||||
|
||||
# Call embed_content for each content
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
config = (
|
||||
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
|
||||
)
|
||||
response = self.client.models.embed_content(
|
||||
model=self.name,
|
||||
contents=content,
|
||||
config=config,
|
||||
)
|
||||
embeddings.append(response.embeddings[0].values)
|
||||
|
||||
return embeddings
|
||||
|
||||
@cached_property
|
||||
def client(self):
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
attempt_import_or_raise("google.genai", "google-genai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key_not_found_help("google")
|
||||
return genai
|
||||
|
||||
from google import genai as genai_module
|
||||
|
||||
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
|
||||
|
||||
@@ -10,7 +10,6 @@ through a namespace abstraction.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
|
||||
|
||||
@@ -25,7 +24,24 @@ if TYPE_CHECKING:
|
||||
from datetime import timedelta
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.db import DBConnection, LanceDBConnection
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.query_table_request import QueryTableRequest
|
||||
from lance_namespace_urllib3_client.models.query_table_request_columns import (
|
||||
QueryTableRequestColumns,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_full_text_query import (
|
||||
QueryTableRequestFullTextQuery,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_vector import (
|
||||
QueryTableRequestVector,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.string_fts_query import StringFtsQuery
|
||||
from lance_namespace.errors import TableNotFoundError
|
||||
from lancedb._lancedb import connect_namespace_client as _connect_namespace_client
|
||||
from lancedb.background_loop import LOOP
|
||||
from lancedb.db import AsyncConnection, DBConnection
|
||||
from lancedb.namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
@@ -40,14 +56,11 @@ from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DescribeNamespaceRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
DeclareTableRequest,
|
||||
CreateTableRequest,
|
||||
)
|
||||
from lancedb.table import AsyncTable, LanceTable, Table
|
||||
from lancedb.util import validate_table_name
|
||||
@@ -56,21 +69,6 @@ from lancedb.pydantic import LanceModel
|
||||
from lancedb.embeddings import EmbeddingFunctionConfig
|
||||
from ._lancedb import Session
|
||||
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
from lance_namespace_urllib3_client.models.query_table_request import QueryTableRequest
|
||||
from lance_namespace_urllib3_client.models.query_table_request_vector import (
|
||||
QueryTableRequestVector,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_columns import (
|
||||
QueryTableRequestColumns,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_full_text_query import (
|
||||
QueryTableRequestFullTextQuery,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.string_fts_query import StringFtsQuery
|
||||
|
||||
|
||||
def _query_to_namespace_request(
|
||||
table_id: List[str],
|
||||
@@ -381,6 +379,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize a namespace-based LanceDB connection.
|
||||
@@ -406,12 +406,60 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace.create_table() instead of using declare_table + local write.
|
||||
|
||||
Default is None (no pushdown, all operations run locally).
|
||||
namespace_client_impl : Optional[str]
|
||||
The namespace implementation name used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
namespace_client_properties : Optional[Dict[str, str]]
|
||||
The namespace properties used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
"""
|
||||
self._namespace_client = namespace_client
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._namespace_client_impl = namespace_client_impl
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
read_consistency_interval=(
|
||||
read_consistency_interval.total_seconds()
|
||||
if read_consistency_interval is not None
|
||||
else None
|
||||
),
|
||||
storage_options=self.storage_options or None,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=(
|
||||
list(self._namespace_client_pushdown_operations)
|
||||
),
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "namespace",
|
||||
"namespace_client_impl": self._namespace_client_impl,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"namespace_client_pushdown_operations": sorted(
|
||||
self._namespace_client_pushdown_operations
|
||||
),
|
||||
"storage_options": self.storage_options or None,
|
||||
"read_consistency_interval_seconds": (
|
||||
self.read_consistency_interval.total_seconds()
|
||||
if self.read_consistency_interval
|
||||
else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -464,13 +512,10 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
return self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
async_table = LOOP.run(
|
||||
self._inner.create_table(
|
||||
name,
|
||||
data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
@@ -480,127 +525,15 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
# Local create path: declare_table + local write
|
||||
# Step 1: Get the table location and storage options from namespace
|
||||
# In overwrite mode, if table exists, use describe_table to get
|
||||
# existing location. Otherwise, call create_empty_table to reserve
|
||||
# a new location
|
||||
location = None
|
||||
namespace_storage_options = None
|
||||
if mode.lower() == "overwrite":
|
||||
# Try to describe the table first to see if it exists
|
||||
try:
|
||||
describe_request = DescribeTableRequest(id=table_id)
|
||||
describe_response = self._namespace_client.describe_table(
|
||||
describe_request
|
||||
)
|
||||
location = describe_response.location
|
||||
namespace_storage_options = describe_response.storage_options
|
||||
except Exception:
|
||||
# Table doesn't exist, will create a new one below
|
||||
pass
|
||||
|
||||
if location is None:
|
||||
# Table doesn't exist or mode is "create", reserve a new location
|
||||
declare_request = DeclareTableRequest(
|
||||
id=table_id,
|
||||
location=None,
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
declare_response = self._namespace_client.declare_table(declare_request)
|
||||
|
||||
if not declare_response.location:
|
||||
raise ValueError(
|
||||
"Table location is missing from declare_table response"
|
||||
)
|
||||
|
||||
location = declare_response.location
|
||||
namespace_storage_options = declare_response.storage_options
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if namespace_storage_options:
|
||||
merged_storage_options.update(namespace_storage_options)
|
||||
|
||||
# Step 2: Create table using LanceTable.create with the location
|
||||
# We need a temporary connection for the LanceTable.create method
|
||||
temp_conn = LanceDBConnection(
|
||||
location, # Use the actual location as the connection URI
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
tbl = LanceTable.create(
|
||||
temp_conn,
|
||||
return LanceTable(
|
||||
self,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
return tbl
|
||||
|
||||
def _create_table_server_side(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[DATA],
|
||||
schema: Optional[Union[pa.Schema, LanceModel]],
|
||||
mode: str,
|
||||
exist_ok: bool,
|
||||
on_bad_vectors: str,
|
||||
fill_value: float,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]],
|
||||
namespace_path: Optional[List[str]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
) -> Table:
|
||||
"""Create a table using server-side namespace.create_table()."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
arrow_ipc_bytes = _data_to_arrow_ipc(
|
||||
data=data,
|
||||
schema=schema,
|
||||
embedding_functions=embedding_functions,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
|
||||
try:
|
||||
self._namespace_client.create_table(request, arrow_ipc_bytes)
|
||||
except Exception as e:
|
||||
if exist_ok and "already exists" in str(e).lower():
|
||||
return self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
raise
|
||||
|
||||
return self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -614,30 +547,28 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
) -> Table:
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._namespace_client.describe_table(request)
|
||||
try:
|
||||
async_table = LOOP.run(
|
||||
self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
)
|
||||
except RuntimeError as e:
|
||||
if "Table not found" in str(e):
|
||||
table_id = namespace_path + [name]
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if response.storage_options:
|
||||
merged_storage_options.update(response.storage_options)
|
||||
|
||||
# Pass managed_versioning to avoid redundant describe_table call in Rust.
|
||||
# Convert None to False since we already have the answer from describe_table.
|
||||
managed_versioning = response.managed_versioning is True
|
||||
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
return self._lance_table_from_uri(
|
||||
return LanceTable(
|
||||
self,
|
||||
name,
|
||||
response.location,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
namespace_client=self._namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -861,35 +792,50 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
) -> LanceTable:
|
||||
# Open a table directly from a URI using the location parameter
|
||||
# Note: storage_options should already be merged by the caller
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
# Open a table directly from the namespace-resolved physical location.
|
||||
#
|
||||
# Open the table through the Rust namespace-backed connection. The Rust
|
||||
# layer keeps the logical namespace path and namespace client intact.
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
temp_conn = LanceDBConnection(
|
||||
table_uri, # Use the table location as the connection URI
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=storage_options if storage_options is not None else {},
|
||||
session=self.session,
|
||||
|
||||
async_table = LOOP.run(
|
||||
self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=None,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
)
|
||||
)
|
||||
|
||||
# Open the table using the temporary connection with the location parameter
|
||||
# Pass namespace_client to enable managed versioning support and auto-create
|
||||
# storage options provider
|
||||
# Pass managed_versioning to avoid redundant describe_table call
|
||||
# Pass pushdown_operations if configured on this connection
|
||||
return LanceTable.open(
|
||||
temp_conn,
|
||||
return LanceTable(
|
||||
self,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=table_uri,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the namespace client for this connection.
|
||||
|
||||
For namespace connections, this returns the backing namespace client
|
||||
that was provided during construction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return self._namespace_client
|
||||
|
||||
|
||||
class AsyncLanceNamespaceDBConnection:
|
||||
"""
|
||||
@@ -937,7 +883,26 @@ class AsyncLanceNamespaceDBConnection:
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
read_consistency_interval=(
|
||||
read_consistency_interval.total_seconds()
|
||||
if read_consistency_interval is not None
|
||||
else None
|
||||
),
|
||||
storage_options=self.storage_options or None,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=(
|
||||
list(self._namespace_client_pushdown_operations)
|
||||
),
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
)
|
||||
)
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
@@ -989,145 +954,16 @@ class AsyncLanceNamespaceDBConnection:
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
return await self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
# Local create path: declare_table + local write
|
||||
# Step 1: Get the table location and storage options from namespace
|
||||
location = None
|
||||
namespace_storage_options = None
|
||||
if mode.lower() == "overwrite":
|
||||
# Try to describe the table first to see if it exists
|
||||
try:
|
||||
describe_request = DescribeTableRequest(id=table_id)
|
||||
describe_response = self._namespace_client.describe_table(
|
||||
describe_request
|
||||
)
|
||||
location = describe_response.location
|
||||
namespace_storage_options = describe_response.storage_options
|
||||
except Exception:
|
||||
# Table doesn't exist, will create a new one below
|
||||
pass
|
||||
|
||||
if location is None:
|
||||
# Table doesn't exist or mode is "create", reserve a new location
|
||||
declare_request = DeclareTableRequest(
|
||||
id=table_id,
|
||||
location=None,
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
declare_response = self._namespace_client.declare_table(declare_request)
|
||||
|
||||
if not declare_response.location:
|
||||
raise ValueError(
|
||||
"Table location is missing from declare_table response"
|
||||
)
|
||||
|
||||
location = declare_response.location
|
||||
namespace_storage_options = declare_response.storage_options
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if namespace_storage_options:
|
||||
merged_storage_options.update(namespace_storage_options)
|
||||
|
||||
# Step 2: Create table using LanceTable.create with the location
|
||||
# Run the sync operation in a thread
|
||||
def _create_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
# storage_options_provider is auto-created in Rust from namespace_client
|
||||
return LanceTable.create(
|
||||
temp_conn,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_create_table)
|
||||
# Get the underlying async table from LanceTable
|
||||
return lance_table._table
|
||||
|
||||
async def _create_table_server_side(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[DATA],
|
||||
schema: Optional[Union[pa.Schema, LanceModel]],
|
||||
mode: str,
|
||||
exist_ok: bool,
|
||||
on_bad_vectors: str,
|
||||
fill_value: float,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]],
|
||||
namespace_path: Optional[List[str]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
) -> AsyncTable:
|
||||
"""Create a table using server-side namespace.create_table()."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
def _prepare_and_create():
|
||||
arrow_ipc_bytes = _data_to_arrow_ipc(
|
||||
data=data,
|
||||
schema=schema,
|
||||
embedding_functions=embedding_functions,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
|
||||
self._namespace_client.create_table(request, arrow_ipc_bytes)
|
||||
|
||||
try:
|
||||
await asyncio.to_thread(_prepare_and_create)
|
||||
except Exception as e:
|
||||
if exist_ok and "already exists" in str(e).lower():
|
||||
return await self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
raise
|
||||
|
||||
return await self.open_table(
|
||||
return await self._inner.create_table(
|
||||
name,
|
||||
data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
namespace_path=namespace_path,
|
||||
embedding_functions=embedding_functions,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
@@ -1142,45 +978,18 @@ class AsyncLanceNamespaceDBConnection:
|
||||
"""Open an existing table from the namespace."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._namespace_client.describe_table(request)
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if response.storage_options:
|
||||
merged_storage_options.update(response.storage_options)
|
||||
|
||||
# Capture managed_versioning from describe response.
|
||||
# Convert None to False since we already have the answer from describe_table.
|
||||
managed_versioning = response.managed_versioning is True
|
||||
|
||||
# Open table in a thread
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
def _open_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
response.location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
temp_conn,
|
||||
try:
|
||||
return await self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=response.location,
|
||||
namespace_client=self._namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_open_table)
|
||||
return lance_table._table
|
||||
except RuntimeError as e:
|
||||
if "Table not found" in str(e):
|
||||
table_id = namespace_path + [name]
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
|
||||
async def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop a table from the namespace."""
|
||||
@@ -1387,6 +1196,19 @@ class AsyncLanceNamespaceDBConnection:
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
async def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the namespace client for this connection.
|
||||
|
||||
For namespace connections, this returns the backing namespace client
|
||||
that was provided during construction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return self._namespace_client
|
||||
|
||||
|
||||
def connect_namespace(
|
||||
namespace_client_impl: str,
|
||||
@@ -1445,6 +1267,8 @@ def connect_namespace(
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -284,9 +284,8 @@ class Permutations:
|
||||
self.permutation_table = permutation_table
|
||||
|
||||
if permutation_table.schema.metadata is not None:
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is not None:
|
||||
self.split_names = json.loads(split_names)
|
||||
self.split_dict = {
|
||||
@@ -460,9 +459,8 @@ class Permutation:
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
" because no split names are defined in the permutation table"
|
||||
)
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is None:
|
||||
raise ValueError(
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
|
||||
@@ -10,6 +10,7 @@ import sys
|
||||
import types
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@@ -314,6 +315,19 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
|
||||
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
|
||||
# For regular Vector
|
||||
return pa.list_(tp.value_arrow_type(), tp.dim())
|
||||
if _safe_issubclass(tp, Enum):
|
||||
# Map Enum to the Arrow type of its value.
|
||||
# For string-valued enums, use dictionary encoding for efficiency.
|
||||
# For integer enums, use the native type.
|
||||
# Fall back to utf8 for mixed-type or empty enums.
|
||||
value_types = {type(m.value) for m in tp}
|
||||
if len(value_types) == 1:
|
||||
value_type = value_types.pop()
|
||||
if value_type is str:
|
||||
# Use dictionary encoding for string enums
|
||||
return pa.dictionary(pa.int32(), pa.utf8())
|
||||
return _py_type_to_arrow_type(value_type, field)
|
||||
return pa.utf8()
|
||||
return _py_type_to_arrow_type(tp, field)
|
||||
|
||||
|
||||
|
||||
@@ -145,6 +145,33 @@ class TlsConfig:
|
||||
|
||||
@dataclass
|
||||
class ClientConfig:
|
||||
"""Configuration for the LanceDB Cloud HTTP client.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
user_agent: str
|
||||
User agent string sent with requests.
|
||||
retry_config: RetryConfig
|
||||
Configuration for retrying failed requests.
|
||||
timeout_config: Optional[TimeoutConfig]
|
||||
Configuration for request timeouts.
|
||||
extra_headers: Optional[dict]
|
||||
Additional headers to include in requests.
|
||||
id_delimiter: Optional[str]
|
||||
The delimiter to use when constructing object identifiers.
|
||||
tls_config: Optional[TlsConfig]
|
||||
TLS/mTLS configuration for secure connections.
|
||||
header_provider: Optional[HeaderProvider]
|
||||
Provider for dynamic headers to be added to each request.
|
||||
user_id: Optional[str]
|
||||
User identifier for tracking purposes. This is sent as the
|
||||
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
|
||||
This can also be set via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
|
||||
environment variable that contains the user ID value.
|
||||
"""
|
||||
|
||||
user_agent: str = f"LanceDB-Python-Client/{__version__}"
|
||||
retry_config: RetryConfig = field(default_factory=RetryConfig)
|
||||
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
|
||||
@@ -152,6 +179,7 @@ class ClientConfig:
|
||||
id_delimiter: Optional[str] = None
|
||||
tls_config: Optional[TlsConfig] = None
|
||||
header_provider: Optional["HeaderProvider"] = None
|
||||
user_id: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if isinstance(self.retry_config, dict):
|
||||
|
||||
@@ -24,6 +24,7 @@ from ..common import DATA
|
||||
from ..db import DBConnection, LOOP
|
||||
from ..embeddings import EmbeddingFunctionConfig
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
@@ -570,6 +571,19 @@ class RemoteDBConnection(DBConnection):
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a RestNamespace with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
|
||||
async def close(self):
|
||||
"""Close the connection to the database."""
|
||||
self._conn.close()
|
||||
|
||||
@@ -191,7 +191,7 @@ def _into_pyarrow_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
@@ -270,15 +270,17 @@ def _sanitize_data(
|
||||
reader,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
target_schema=target_schema,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
if target_schema is None:
|
||||
target_schema, reader = _infer_target_schema(reader)
|
||||
|
||||
if metadata:
|
||||
new_metadata = target_schema.metadata or {}
|
||||
new_metadata.update(metadata)
|
||||
target_schema = target_schema.with_metadata(new_metadata)
|
||||
target_schema = target_schema.with_metadata(
|
||||
_merge_metadata(target_schema.metadata, metadata)
|
||||
)
|
||||
|
||||
_validate_schema(target_schema)
|
||||
reader = _cast_to_target_schema(reader, target_schema, allow_subschema)
|
||||
@@ -294,7 +296,7 @@ def _cast_to_target_schema(
|
||||
# pa.Table.cast expects field order not to be changed.
|
||||
# Lance doesn't care about field order, so we don't need to rearrange fields
|
||||
# to match the target schema. We just need to correctly cast the fields.
|
||||
if reader.schema == target_schema:
|
||||
if reader.schema.equals(target_schema, check_metadata=True):
|
||||
# Fast path when the schemas are already the same
|
||||
return reader
|
||||
|
||||
@@ -314,7 +316,13 @@ def _cast_to_target_schema(
|
||||
def gen():
|
||||
for batch in reader:
|
||||
# Table but not RecordBatch has cast.
|
||||
yield pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()[0]
|
||||
cast_batches = (
|
||||
pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()
|
||||
)
|
||||
if cast_batches:
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
cast_batches[0].columns, schema=reordered_schema
|
||||
)
|
||||
|
||||
return pa.RecordBatchReader.from_batches(reordered_schema, gen())
|
||||
|
||||
@@ -332,37 +340,51 @@ def _align_field_types(
|
||||
if target_field is None:
|
||||
raise ValueError(f"Field '{field.name}' not found in target schema")
|
||||
if pa.types.is_struct(target_field.type):
|
||||
new_type = pa.struct(
|
||||
_align_field_types(
|
||||
field.type.fields,
|
||||
target_field.type.fields,
|
||||
if pa.types.is_struct(field.type):
|
||||
new_type = pa.struct(
|
||||
_align_field_types(
|
||||
field.type.fields,
|
||||
target_field.type.fields,
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_list(target_field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_large_list(target_field.type):
|
||||
new_type = pa.large_list(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.large_list(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_fixed_size_list(target_field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0],
|
||||
target_field.type.list_size,
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0],
|
||||
target_field.type.list_size,
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
else:
|
||||
new_type = target_field.type
|
||||
new_fields.append(pa.field(field.name, new_type, field.nullable))
|
||||
new_fields.append(
|
||||
pa.field(field.name, new_type, field.nullable, target_field.metadata)
|
||||
)
|
||||
return new_fields
|
||||
|
||||
|
||||
@@ -440,6 +462,7 @@ def sanitize_create_table(
|
||||
schema = data.schema
|
||||
|
||||
if metadata:
|
||||
metadata = _merge_metadata(schema.metadata, metadata)
|
||||
schema = schema.with_metadata(metadata)
|
||||
# Need to apply metadata to the data as well
|
||||
if isinstance(data, pa.Table):
|
||||
@@ -492,9 +515,9 @@ def _append_vector_columns(
|
||||
vector columns to the table.
|
||||
"""
|
||||
if schema is None:
|
||||
metadata = metadata or {}
|
||||
metadata = _merge_metadata(metadata)
|
||||
else:
|
||||
metadata = schema.metadata or metadata or {}
|
||||
metadata = _merge_metadata(schema.metadata, metadata)
|
||||
functions = EmbeddingFunctionRegistry.get_instance().parse_functions(metadata)
|
||||
|
||||
if not functions:
|
||||
@@ -2906,6 +2929,7 @@ class LanceTable(Table):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
)
|
||||
return self
|
||||
@@ -3211,43 +3235,157 @@ def _handle_bad_vectors(
|
||||
reader: pa.RecordBatchReader,
|
||||
on_bad_vectors: Literal["error", "drop", "fill", "null"] = "error",
|
||||
fill_value: float = 0.0,
|
||||
target_schema: Optional[pa.Schema] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
vector_columns = []
|
||||
vector_columns = _find_vector_columns(reader.schema, target_schema, metadata)
|
||||
if not vector_columns:
|
||||
return reader
|
||||
|
||||
for field in reader.schema:
|
||||
# They can provide a 'vector' column that isn't yet a FSL
|
||||
named_vector_col = (
|
||||
(
|
||||
pa.types.is_list(field.type)
|
||||
or pa.types.is_large_list(field.type)
|
||||
or pa.types.is_fixed_size_list(field.type)
|
||||
)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and field.name == VECTOR_COLUMN_NAME
|
||||
)
|
||||
# TODO: we're making an assumption that fixed size list of 10 or more
|
||||
# is a vector column. This is definitely a bit hacky.
|
||||
likely_vector_col = (
|
||||
pa.types.is_fixed_size_list(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and (field.type.list_size >= 10)
|
||||
)
|
||||
|
||||
if named_vector_col or likely_vector_col:
|
||||
vector_columns.append(field.name)
|
||||
output_schema = _vector_output_schema(reader.schema, vector_columns)
|
||||
|
||||
def gen():
|
||||
for batch in reader:
|
||||
for name in vector_columns:
|
||||
pending_dims = []
|
||||
for vector_column in vector_columns:
|
||||
dim = vector_column["expected_dim"]
|
||||
if target_schema is not None and dim is None:
|
||||
dim = _infer_vector_dim(batch[vector_column["name"]])
|
||||
pending_dims.append(vector_column)
|
||||
batch = _handle_bad_vector_column(
|
||||
batch,
|
||||
vector_column_name=name,
|
||||
vector_column_name=vector_column["name"],
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
expected_dim=dim,
|
||||
expected_value_type=vector_column["expected_value_type"],
|
||||
)
|
||||
yield batch
|
||||
for vector_column in pending_dims:
|
||||
if vector_column["expected_dim"] is None:
|
||||
vector_column["expected_dim"] = _infer_vector_dim(
|
||||
batch[vector_column["name"]]
|
||||
)
|
||||
if batch.schema.equals(output_schema, check_metadata=True):
|
||||
yield batch
|
||||
continue
|
||||
|
||||
return pa.RecordBatchReader.from_batches(reader.schema, gen())
|
||||
cast_batches = (
|
||||
pa.Table.from_batches([batch]).cast(output_schema).to_batches()
|
||||
)
|
||||
if cast_batches:
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
cast_batches[0].columns,
|
||||
schema=output_schema,
|
||||
)
|
||||
|
||||
return pa.RecordBatchReader.from_batches(output_schema, gen())
|
||||
|
||||
|
||||
def _find_vector_columns(
|
||||
reader_schema: pa.Schema,
|
||||
target_schema: Optional[pa.Schema],
|
||||
metadata: Optional[dict],
|
||||
) -> List[dict]:
|
||||
if target_schema is None:
|
||||
vector_columns = []
|
||||
for field in reader_schema:
|
||||
named_vector_col = (
|
||||
_is_list_like(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and field.name == VECTOR_COLUMN_NAME
|
||||
)
|
||||
likely_vector_col = (
|
||||
pa.types.is_fixed_size_list(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and (field.type.list_size >= 10)
|
||||
)
|
||||
if named_vector_col or likely_vector_col:
|
||||
vector_columns.append(
|
||||
{
|
||||
"name": field.name,
|
||||
"expected_dim": None,
|
||||
"expected_value_type": None,
|
||||
}
|
||||
)
|
||||
return vector_columns
|
||||
|
||||
reader_column_names = set(reader_schema.names)
|
||||
active_metadata = _merge_metadata(target_schema.metadata, metadata)
|
||||
embedding_function_columns = set(
|
||||
EmbeddingFunctionRegistry.get_instance().parse_functions(active_metadata).keys()
|
||||
)
|
||||
vector_columns = []
|
||||
for field in target_schema:
|
||||
if field.name not in reader_column_names:
|
||||
continue
|
||||
if not _is_list_like(field.type) or not pa.types.is_floating(
|
||||
field.type.value_type
|
||||
):
|
||||
continue
|
||||
|
||||
reader_field = reader_schema.field(field.name)
|
||||
named_vector_col = (
|
||||
field.name in embedding_function_columns
|
||||
or field.name == VECTOR_COLUMN_NAME
|
||||
or (field.name == "embedding" and pa.types.is_fixed_size_list(field.type))
|
||||
)
|
||||
typed_fixed_vector_col = (
|
||||
pa.types.is_fixed_size_list(reader_field.type)
|
||||
and pa.types.is_floating(reader_field.type.value_type)
|
||||
and reader_field.type.list_size >= 10
|
||||
)
|
||||
|
||||
if named_vector_col or typed_fixed_vector_col:
|
||||
vector_columns.append(
|
||||
{
|
||||
"name": field.name,
|
||||
"expected_dim": (
|
||||
field.type.list_size
|
||||
if pa.types.is_fixed_size_list(field.type)
|
||||
else None
|
||||
),
|
||||
"expected_value_type": field.type.value_type,
|
||||
}
|
||||
)
|
||||
|
||||
return vector_columns
|
||||
|
||||
|
||||
def _vector_output_schema(
|
||||
reader_schema: pa.Schema,
|
||||
vector_columns: List[dict],
|
||||
) -> pa.Schema:
|
||||
columns_by_name = {column["name"]: column for column in vector_columns}
|
||||
fields = []
|
||||
for field in reader_schema:
|
||||
column = columns_by_name.get(field.name)
|
||||
if column is None:
|
||||
output_type = field.type
|
||||
else:
|
||||
output_type = _vector_output_type(field, column)
|
||||
fields.append(pa.field(field.name, output_type, field.nullable, field.metadata))
|
||||
return pa.schema(fields, metadata=reader_schema.metadata)
|
||||
|
||||
|
||||
def _vector_output_type(field: pa.Field, vector_column: dict) -> pa.DataType:
|
||||
if not _is_list_like(field.type):
|
||||
return field.type
|
||||
|
||||
if vector_column["expected_value_type"] is not None and (
|
||||
pa.types.is_null(field.type.value_type)
|
||||
or pa.types.is_integer(field.type.value_type)
|
||||
or pa.types.is_unsigned_integer(field.type.value_type)
|
||||
):
|
||||
return pa.list_(vector_column["expected_value_type"])
|
||||
|
||||
if (
|
||||
vector_column["expected_dim"] is not None
|
||||
and pa.types.is_fixed_size_list(field.type)
|
||||
and field.type.list_size != vector_column["expected_dim"]
|
||||
):
|
||||
return pa.list_(field.type.value_type)
|
||||
|
||||
return field.type
|
||||
|
||||
|
||||
def _handle_bad_vector_column(
|
||||
@@ -3255,6 +3393,8 @@ def _handle_bad_vector_column(
|
||||
vector_column_name: str,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
expected_dim: Optional[int] = None,
|
||||
expected_value_type: Optional[pa.DataType] = None,
|
||||
) -> pa.RecordBatch:
|
||||
"""
|
||||
Ensure that the vector column exists and has type fixed_size_list(float)
|
||||
@@ -3271,14 +3411,39 @@ def _handle_bad_vector_column(
|
||||
fill_value: float, default 0.0
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
"""
|
||||
position = data.column_names.index(vector_column_name)
|
||||
vec_arr = data[vector_column_name]
|
||||
if not _is_list_like(vec_arr.type):
|
||||
return data
|
||||
|
||||
has_nan = has_nan_values(vec_arr)
|
||||
if (
|
||||
expected_dim is not None
|
||||
and pa.types.is_fixed_size_list(vec_arr.type)
|
||||
and vec_arr.type.list_size != expected_dim
|
||||
):
|
||||
vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(vec_arr.type.value_type))
|
||||
data = data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
if pa.types.is_fixed_size_list(vec_arr.type):
|
||||
if expected_value_type is not None and (
|
||||
pa.types.is_integer(vec_arr.type.value_type)
|
||||
or pa.types.is_unsigned_integer(vec_arr.type.value_type)
|
||||
):
|
||||
vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(expected_value_type))
|
||||
data = data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
if pa.types.is_floating(vec_arr.type.value_type):
|
||||
has_nan = has_nan_values(vec_arr)
|
||||
else:
|
||||
has_nan = pa.array([False] * len(vec_arr))
|
||||
|
||||
if expected_dim is not None:
|
||||
dim = expected_dim
|
||||
elif pa.types.is_fixed_size_list(vec_arr.type):
|
||||
dim = vec_arr.type.list_size
|
||||
else:
|
||||
dim = _modal_list_size(vec_arr)
|
||||
dim = _infer_vector_dim(vec_arr)
|
||||
if dim is None:
|
||||
return data
|
||||
has_wrong_dim = pc.not_equal(pc.list_value_length(vec_arr), dim)
|
||||
|
||||
has_bad_vectors = pc.any(has_nan).as_py() or pc.any(has_wrong_dim).as_py()
|
||||
@@ -3316,13 +3481,12 @@ def _handle_bad_vector_column(
|
||||
)
|
||||
vec_arr = pc.if_else(
|
||||
is_bad,
|
||||
pa.scalar([fill_value] * dim),
|
||||
pa.scalar([fill_value] * dim, type=vec_arr.type),
|
||||
vec_arr,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid value for on_bad_vectors: {on_bad_vectors}")
|
||||
|
||||
position = data.column_names.index(vector_column_name)
|
||||
return data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
|
||||
@@ -3343,6 +3507,28 @@ def has_nan_values(arr: Union[pa.ListArray, pa.ChunkedArray]) -> pa.BooleanArray
|
||||
return pc.is_in(indices, has_nan_indices)
|
||||
|
||||
|
||||
def _is_list_like(data_type: pa.DataType) -> bool:
|
||||
return (
|
||||
pa.types.is_list(data_type)
|
||||
or pa.types.is_large_list(data_type)
|
||||
or pa.types.is_fixed_size_list(data_type)
|
||||
)
|
||||
|
||||
|
||||
def _merge_metadata(*metadata_dicts: Optional[dict]) -> dict:
|
||||
merged = {}
|
||||
for metadata in metadata_dicts:
|
||||
if metadata is None:
|
||||
continue
|
||||
for key, value in metadata.items():
|
||||
if isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
if isinstance(value, str):
|
||||
value = value.encode("utf-8")
|
||||
merged[key] = value
|
||||
return merged
|
||||
|
||||
|
||||
def _name_suggests_vector_column(field_name: str) -> bool:
|
||||
"""Check if a field name indicates a vector column."""
|
||||
name_lower = field_name.lower()
|
||||
@@ -3410,6 +3596,16 @@ def _modal_list_size(arr: Union[pa.ListArray, pa.ChunkedArray]) -> int:
|
||||
return pc.mode(pc.list_value_length(arr))[0].as_py()["mode"]
|
||||
|
||||
|
||||
def _infer_vector_dim(arr: Union[pa.Array, pa.ChunkedArray]) -> Optional[int]:
|
||||
if not _is_list_like(arr.type):
|
||||
return None
|
||||
lengths = pc.list_value_length(arr)
|
||||
lengths = pc.filter(lengths, pc.greater(lengths, 0))
|
||||
if len(lengths) == 0:
|
||||
return None
|
||||
return pc.mode(lengths)[0].as_py()["mode"]
|
||||
|
||||
|
||||
def _validate_schema(schema: pa.Schema):
|
||||
"""
|
||||
Make sure the metadata is valid utf8
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
|
||||
import re
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
import os
|
||||
|
||||
@@ -896,42 +897,22 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):
|
||||
|
||||
|
||||
def test_local_namespace_operations(tmp_path):
|
||||
"""Test that local mode namespace operations behave as expected."""
|
||||
# Create a local database connection
|
||||
"""Test that local mode namespace operations work via directory namespace."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Test list_namespaces returns empty list for root namespace
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert namespaces == []
|
||||
# Root namespace starts empty
|
||||
assert db.list_namespaces().namespaces == []
|
||||
|
||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.list_namespaces(namespace_path=["test"])
|
||||
# Create and list child namespace
|
||||
db.create_namespace(["child"])
|
||||
assert "child" in db.list_namespaces().namespaces
|
||||
|
||||
# List namespaces under child
|
||||
assert db.list_namespaces(namespace_path=["child"]).namespaces == []
|
||||
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
"""Test that create_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.create_namespace(["test_namespace"])
|
||||
|
||||
|
||||
def test_local_drop_namespace_not_supported(tmp_path):
|
||||
"""Test that drop_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.drop_namespace(["test_namespace"])
|
||||
# Drop namespace
|
||||
db.drop_namespace(["child"])
|
||||
assert db.list_namespaces().namespaces == []
|
||||
|
||||
|
||||
def test_clone_table_latest_version(tmp_path):
|
||||
@@ -1048,3 +1029,59 @@ def test_clone_table_deep_clone_fails(tmp_path):
|
||||
source_uri = os.path.join(tmp_path, "source.lance")
|
||||
with pytest.raises(Exception, match="Deep clone is not yet implemented"):
|
||||
db.clone_table("cloned", source_uri, is_shallow=False)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_native_storage(tmp_path):
|
||||
"""Test namespace_client() returns DirectoryNamespace for native storage."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
db = lancedb.connect(tmp_path)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
assert str(tmp_path) in ns_client.namespace_id()
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_with_storage_options(tmp_path):
|
||||
"""Test namespace_client() preserves storage options."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
storage_options = {"timeout": "10s"}
|
||||
db = lancedb.connect(tmp_path, storage_options=storage_options)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_operations(tmp_path):
|
||||
"""Test that namespace_client() returns a functional namespace client."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
# Create a table through the main db connection
|
||||
data = [{"id": 1, "text": "hello", "vector": [1.0, 2.0]}]
|
||||
db.create_table("test_table", data=data)
|
||||
|
||||
# Verify the namespace client can see the table
|
||||
from lance_namespace import ListTablesRequest
|
||||
|
||||
# id=[] means root namespace
|
||||
response = ns_client.list_tables(ListTablesRequest(id=[]))
|
||||
# Tables can be strings or objects with name attribute
|
||||
table_names = [t.name if hasattr(t, "name") else t for t in response.tables]
|
||||
assert "test_table" in table_names
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_namespace_connection(tmp_path):
|
||||
"""Test namespace_client() returns the backing client for namespace connections."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
assert str(tmp_path) in ns_client.namespace_id()
|
||||
|
||||
@@ -681,7 +681,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on sync connection."""
|
||||
@@ -690,7 +690,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_both_pushdowns_stored(self):
|
||||
"""Test that both pushdown operations can be set together."""
|
||||
@@ -699,13 +699,13 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -727,7 +727,7 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
async def test_async_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on async connection."""
|
||||
@@ -736,9 +736,9 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
async def test_async_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty on async connection."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
@@ -18,6 +18,9 @@ Tests verify:
|
||||
"""
|
||||
|
||||
import copy
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from typing import Dict, Optional
|
||||
@@ -387,6 +390,66 @@ def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||
assert get_describe_call_count(inner_ns_client) == describe_count_after_open
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="TODO: fix schema-only namespace metrics test on Windows",
|
||||
)
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_create_schema_only_with_provider(use_custom: bool):
|
||||
"""
|
||||
Test creating a schema-only table through namespace with storage options provider.
|
||||
|
||||
Verifies:
|
||||
- declare_table is called once to reserve the location
|
||||
- describe_table is not needed during create in create mode
|
||||
- the table can be reopened successfully afterward
|
||||
- opening the table triggers exactly one describe_table call
|
||||
"""
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=temp_dir,
|
||||
storage_options={},
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
db.create_namespace([namespace_name])
|
||||
|
||||
table_name = f"test_table_{uuid.uuid4().hex}"
|
||||
namespace_path = [namespace_name]
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
pa.field("text", pa.string()),
|
||||
]
|
||||
)
|
||||
|
||||
assert get_declare_call_count(inner_ns_client) == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
table = db.create_table(
|
||||
table_name, schema=schema, namespace_path=namespace_path
|
||||
)
|
||||
|
||||
assert table.name == table_name
|
||||
assert table.namespace == namespace_path
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
reopened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||
|
||||
assert reopened_table.schema == schema
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
assert get_describe_call_count(inner_ns_client) == 1
|
||||
finally:
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
|
||||
|
||||
@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
|
||||
assert permutations[1].num_rows == 500
|
||||
|
||||
|
||||
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
|
||||
"""Regression: schema metadata present but missing split_names key must not crash.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
|
||||
so any permutation table whose metadata dict had other keys but no split_names
|
||||
raised AttributeError: 'NoneType' has no attribute 'decode'.
|
||||
"""
|
||||
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
|
||||
|
||||
# Build a permutation-like table that carries some metadata but NOT split_names.
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_nosplit", raw)
|
||||
|
||||
permutations = Permutations(base, perm_tbl)
|
||||
assert permutations.split_names == []
|
||||
assert permutations.split_dict == {}
|
||||
|
||||
|
||||
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
|
||||
"""Regression: from_tables() with a string split must raise ValueError, not
|
||||
AttributeError.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
|
||||
when the metadata dict existed but had no split_names key.
|
||||
"""
|
||||
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
|
||||
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_strsplit", raw)
|
||||
|
||||
with pytest.raises(ValueError, match="no split names are defined"):
|
||||
Permutation.from_tables(base, perm_tbl, split="train")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def some_perm_table(some_table: Table) -> Table:
|
||||
return (
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import json
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import pyarrow as pa
|
||||
@@ -673,3 +674,29 @@ async def test_aliases_in_lance_model_async(mem_db_async):
|
||||
assert hasattr(model, "name")
|
||||
assert hasattr(model, "distance")
|
||||
assert model.distance < 0.01
|
||||
|
||||
|
||||
def test_enum_types():
|
||||
"""Enum fields should map to the Arrow type of their value (issue #1846)."""
|
||||
|
||||
class StrStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
DONE = "done"
|
||||
|
||||
class IntPriority(int, Enum):
|
||||
LOW = 1
|
||||
MEDIUM = 2
|
||||
HIGH = 3
|
||||
|
||||
class TestModel(pydantic.BaseModel):
|
||||
status: StrStatus
|
||||
priority: IntPriority
|
||||
opt_status: Optional[StrStatus] = None
|
||||
|
||||
schema = pydantic_to_schema(TestModel)
|
||||
|
||||
assert schema.field("status").type == pa.dictionary(pa.int32(), pa.utf8())
|
||||
assert schema.field("priority").type == pa.int64()
|
||||
assert schema.field("opt_status").type == pa.dictionary(pa.int32(), pa.utf8())
|
||||
assert schema.field("opt_status").nullable
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import date, datetime, timedelta
|
||||
from time import sleep
|
||||
from typing import List
|
||||
@@ -1049,6 +1050,231 @@ def test_add_with_nans(mem_db: DBConnection):
|
||||
assert np.allclose(v, np.array([0.0, 0.0]))
|
||||
|
||||
|
||||
def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection):
|
||||
class Schema(LanceModel):
|
||||
text: str
|
||||
embedding: Vector(16)
|
||||
|
||||
table = mem_db.create_table("test_empty_embeddings", schema=Schema)
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello", "embedding": []},
|
||||
{"text": "bar", "embedding": [0.1] * 16},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
data = table.to_arrow()
|
||||
assert data["text"].to_pylist() == ["bar"]
|
||||
assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16))
|
||||
|
||||
|
||||
def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection):
|
||||
class Schema(LanceModel):
|
||||
text: str
|
||||
embedding: Vector(4)
|
||||
|
||||
table = mem_db.create_table("test_integer_embeddings", schema=Schema)
|
||||
table.add(
|
||||
[{"text": "foo", "embedding": [1, 2, 3, 4]}],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["embedding"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_non_vector_fixed_size_lists(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), 4)),
|
||||
pa.field("bbox", pa.list_(pa.float32(), 4)),
|
||||
]
|
||||
)
|
||||
table = mem_db.create_table("test_bbox_schema", schema=schema)
|
||||
|
||||
with pytest.raises(RuntimeError, match="FixedSizeListType"):
|
||||
table.add(
|
||||
[{"vector": [1.0, 2.0, 3.0, 4.0], "bbox": [0.0, 1.0]}],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_custom_named_fixed_size_lists(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("features", pa.list_(pa.float32(), 16))])
|
||||
table = mem_db.create_table("test_custom_named_fixed_size_vector", schema=schema)
|
||||
|
||||
with pytest.raises(RuntimeError, match="FixedSizeListType"):
|
||||
table.add(
|
||||
[
|
||||
{"features": []},
|
||||
{"features": [0.1] * 16},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
|
||||
def test_on_bad_vectors_with_schema_list_vector_still_sanitizes(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_schema_list_vector", schema=schema)
|
||||
table.add(
|
||||
[
|
||||
{"vector": [1.0, 2.0]},
|
||||
{"vector": [3.0]},
|
||||
{"vector": [4.0, 5.0]},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [4.0, 5.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_handles_typed_custom_fixed_vectors_for_list_schema(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("vec", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_typed_custom_fixed_vector", schema=schema)
|
||||
data = pa.table(
|
||||
{
|
||||
"vec": pa.array(
|
||||
[[float("nan")] * 16, [1.0] * 16],
|
||||
type=pa.list_(pa.float32(), 16),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
table.add(data, on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vec"].to_pylist() == [[1.0] * 16]
|
||||
|
||||
|
||||
def test_on_bad_vectors_fill_preserves_arrow_nested_vector_type(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_fill_arrow_nested_type", schema=schema)
|
||||
data = pa.table(
|
||||
{
|
||||
"vector": pa.array(
|
||||
[[1.0, 2.0], [float("nan"), 3.0]],
|
||||
type=pa.list_(pa.float32(), 2),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
table.add(
|
||||
data,
|
||||
on_bad_vectors="fill",
|
||||
fill_value=0.0,
|
||||
)
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [0.0, 0.0]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("table_name", "batch1", "expected"),
|
||||
[
|
||||
(
|
||||
"test_schema_list_vector_empty_prefix",
|
||||
pa.record_batch({"vector": [[], []]}),
|
||||
[[], [], [1.0, 2.0], [3.0, 4.0]],
|
||||
),
|
||||
(
|
||||
"test_schema_list_vector_all_bad_prefix",
|
||||
pa.record_batch({"vector": [[float("nan")] * 3, [float("nan")] * 3]}),
|
||||
[[1.0, 2.0], [3.0, 4.0]],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_on_bad_vectors_with_schema_list_vector_ignores_invalid_prefix_batches(
|
||||
mem_db: DBConnection,
|
||||
table_name: str,
|
||||
batch1: pa.RecordBatch,
|
||||
expected: list,
|
||||
):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table(table_name, schema=schema)
|
||||
batch2 = pa.record_batch({"vector": [[1.0, 2.0], [3.0, 4.0]]})
|
||||
reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
|
||||
|
||||
table.add(reader, on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == expected
|
||||
|
||||
|
||||
def test_on_bad_vectors_with_multiple_vectors_locks_dim_after_final_drop(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
func = MockTextEmbeddingFunction.create()
|
||||
metadata = registry.get_table_metadata(
|
||||
[
|
||||
EmbeddingFunctionConfig(
|
||||
source_column="text1", vector_column="vec1", function=func
|
||||
),
|
||||
EmbeddingFunctionConfig(
|
||||
source_column="text2", vector_column="vec2", function=func
|
||||
),
|
||||
]
|
||||
)
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vec1", pa.list_(pa.float32())),
|
||||
pa.field("vec2", pa.list_(pa.float32())),
|
||||
],
|
||||
metadata=metadata,
|
||||
)
|
||||
table = mem_db.create_table("test_multi_vector_dim_lock", schema=schema)
|
||||
batch1 = pa.record_batch(
|
||||
{
|
||||
"vec1": [[1.0, 2.0, 3.0], [10.0, 11.0]],
|
||||
"vec2": [[float("nan"), 0.0], [5.0, 6.0]],
|
||||
}
|
||||
)
|
||||
batch2 = pa.record_batch(
|
||||
{
|
||||
"vec1": [[20.0, 21.0], [30.0, 31.0]],
|
||||
"vec2": [[7.0, 8.0], [9.0, 10.0]],
|
||||
}
|
||||
)
|
||||
reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
|
||||
|
||||
table.add(reader, on_bad_vectors="drop")
|
||||
|
||||
data = table.to_arrow()
|
||||
assert data["vec1"].to_pylist() == [[10.0, 11.0], [20.0, 21.0], [30.0, 31.0]]
|
||||
assert data["vec2"].to_pylist() == [[5.0, 6.0], [7.0, 8.0], [9.0, 10.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_non_vector_list_columns(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("embedding_history", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_non_vector_list_schema", schema=schema)
|
||||
table.add(
|
||||
[
|
||||
{"embedding_history": [1.0, 2.0]},
|
||||
{"embedding_history": [3.0]},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["embedding_history"].to_pylist() == [
|
||||
[1.0, 2.0],
|
||||
[3.0],
|
||||
]
|
||||
|
||||
|
||||
def test_on_bad_vectors_all_null_schema_vector_batches_do_not_crash(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2), nullable=True)])
|
||||
table = mem_db.create_table("test_all_null_vector_batch", schema=schema)
|
||||
|
||||
table.add([{"vector": None}], on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [None]
|
||||
|
||||
|
||||
def test_restore(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
"my_table",
|
||||
@@ -1815,6 +2041,13 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
@pytest.mark.parametrize(
|
||||
"consistency_interval", [None, timedelta(seconds=0), timedelta(seconds=0.1)]
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason=(
|
||||
"TODO: directory namespace is not supported on Windows yet; "
|
||||
"re-enable after that is fixed."
|
||||
),
|
||||
)
|
||||
def test_consistency(tmp_path, consistency_interval):
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("my_table", data=[{"id": 0}])
|
||||
@@ -1835,7 +2068,6 @@ def test_consistency(tmp_path, consistency_interval):
|
||||
elif consistency_interval == timedelta(seconds=0):
|
||||
assert table2.version == table.version
|
||||
else:
|
||||
# (consistency_interval == timedelta(seconds=0.1)
|
||||
assert table2.version == table.version - 1
|
||||
sleep(0.1)
|
||||
assert table2.version == table.version
|
||||
|
||||
@@ -15,8 +15,10 @@ from lancedb.table import (
|
||||
_cast_to_target_schema,
|
||||
_handle_bad_vectors,
|
||||
_into_pyarrow_reader,
|
||||
_sanitize_data,
|
||||
_infer_target_schema,
|
||||
_merge_metadata,
|
||||
_sanitize_data,
|
||||
sanitize_create_table,
|
||||
)
|
||||
import pyarrow as pa
|
||||
import pandas as pd
|
||||
@@ -304,6 +306,117 @@ def test_handle_bad_vectors_noop():
|
||||
assert output["vector"] == vector
|
||||
|
||||
|
||||
def test_handle_bad_vectors_updates_reader_schema_for_target_schema():
|
||||
data = pa.table({"vector": [[1, 2, 3, 4]]})
|
||||
target_schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 4))])
|
||||
|
||||
output = _handle_bad_vectors(
|
||||
data.to_reader(),
|
||||
on_bad_vectors="drop",
|
||||
target_schema=target_schema,
|
||||
)
|
||||
|
||||
assert output.schema == pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
assert output.read_all()["vector"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
|
||||
|
||||
|
||||
def test_sanitize_data_keeps_target_field_metadata():
|
||||
source_field = pa.field(
|
||||
"vector",
|
||||
pa.list_(pa.float32(), 2),
|
||||
metadata={b"source": b"drop-me"},
|
||||
)
|
||||
target_field = pa.field(
|
||||
"vector",
|
||||
pa.list_(pa.float32(), 2),
|
||||
metadata={b"target": b"keep-me"},
|
||||
)
|
||||
data = pa.table(
|
||||
{"vector": pa.array([[1.0, 2.0]], type=pa.list_(pa.float32(), 2))},
|
||||
schema=pa.schema([source_field]),
|
||||
)
|
||||
|
||||
output = _sanitize_data(
|
||||
data,
|
||||
target_schema=pa.schema([target_field]),
|
||||
on_bad_vectors="drop",
|
||||
).read_all()
|
||||
|
||||
assert output.schema.field("vector").metadata == {b"target": b"keep-me"}
|
||||
|
||||
|
||||
def test_sanitize_data_uses_separate_embedding_metadata_for_bad_vectors():
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="custom_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
metadata = registry.get_table_metadata([conf])
|
||||
schema = pa.schema(
|
||||
{
|
||||
"text": pa.string(),
|
||||
"custom_vector": pa.list_(pa.float32(), 10),
|
||||
},
|
||||
metadata={b"note": b"keep-me"},
|
||||
)
|
||||
data = pa.table(
|
||||
{
|
||||
"text": ["bad", "good"],
|
||||
"custom_vector": [[1.0] * 9, [2.0] * 10],
|
||||
}
|
||||
)
|
||||
|
||||
output = _sanitize_data(
|
||||
data,
|
||||
target_schema=schema,
|
||||
metadata=metadata,
|
||||
on_bad_vectors="drop",
|
||||
).read_all()
|
||||
|
||||
assert output["text"].to_pylist() == ["good"]
|
||||
assert output.schema.metadata[b"note"] == b"keep-me"
|
||||
assert b"embedding_functions" in output.schema.metadata
|
||||
|
||||
|
||||
def test_sanitize_create_table_merges_and_overrides_embedding_metadata():
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
old_conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="old_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
new_conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="custom_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
metadata = registry.get_table_metadata([new_conf])
|
||||
schema = pa.schema(
|
||||
{
|
||||
"text": pa.string(),
|
||||
"custom_vector": pa.list_(pa.float32(), 10),
|
||||
},
|
||||
metadata=_merge_metadata(
|
||||
{b"note": b"keep-me"},
|
||||
registry.get_table_metadata([old_conf]),
|
||||
),
|
||||
)
|
||||
|
||||
data, schema = sanitize_create_table(
|
||||
pa.table({"text": ["good"]}),
|
||||
schema,
|
||||
metadata=metadata,
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert schema.metadata[b"note"] == b"keep-me"
|
||||
assert b"embedding_functions" in schema.metadata
|
||||
assert data.schema.metadata[b"note"] == b"keep-me"
|
||||
funcs = EmbeddingFunctionRegistry.get_instance().parse_functions(schema.metadata)
|
||||
assert set(funcs.keys()) == {"custom_vector"}
|
||||
|
||||
|
||||
class TestModel(lancedb.pydantic.LanceModel):
|
||||
a: Optional[int]
|
||||
b: Optional[int]
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
|
||||
use lancedb::{
|
||||
connection::Connection as LanceConnection,
|
||||
connection::NamespaceClientPushdownOperation,
|
||||
database::namespace::LanceNamespaceDatabase,
|
||||
database::{CreateTableMode, Database, ReadConsistency},
|
||||
};
|
||||
use pyo3::{
|
||||
@@ -39,6 +45,29 @@ impl Connection {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_namespace_client_pushdown_operations(
|
||||
operations: Option<Vec<String>>,
|
||||
) -> PyResult<HashSet<NamespaceClientPushdownOperation>> {
|
||||
let mut parsed = HashSet::new();
|
||||
for operation in operations.unwrap_or_default() {
|
||||
match operation.as_str() {
|
||||
"QueryTable" => {
|
||||
parsed.insert(NamespaceClientPushdownOperation::QueryTable);
|
||||
}
|
||||
"CreateTable" => {
|
||||
parsed.insert(NamespaceClientPushdownOperation::CreateTable);
|
||||
}
|
||||
_ => {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Invalid pushdown operation: {}",
|
||||
operation
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
fn parse_create_mode_str(mode: &str) -> PyResult<CreateTableMode> {
|
||||
match mode {
|
||||
@@ -474,6 +503,25 @@ impl Connection {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns a dict with:
|
||||
/// - "impl": "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - "properties": configuration properties for the namespace
|
||||
#[pyo3(signature = ())]
|
||||
pub fn namespace_client_config(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
let (impl_type, properties) = inner.namespace_client_config().await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("impl", impl_type)?;
|
||||
dict.set_item("properties", properties)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
@@ -519,6 +567,52 @@ pub fn connect(
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (
|
||||
namespace_client,
|
||||
read_consistency_interval=None,
|
||||
storage_options=None,
|
||||
session=None,
|
||||
namespace_client_pushdown_operations=None,
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn connect_namespace_client(
|
||||
py: Python<'_>,
|
||||
namespace_client: Py<PyAny>,
|
||||
read_consistency_interval: Option<f64>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
session: Option<crate::session::Session>,
|
||||
namespace_client_pushdown_operations: Option<Vec<String>>,
|
||||
namespace_client_impl: Option<String>,
|
||||
namespace_client_properties: Option<HashMap<String, String>>,
|
||||
) -> PyResult<Connection> {
|
||||
let namespace_client = extract_namespace_arc(py, namespace_client)?;
|
||||
let read_consistency_interval = read_consistency_interval.map(Duration::from_secs_f64);
|
||||
let namespace_client_pushdown_operations =
|
||||
parse_namespace_client_pushdown_operations(namespace_client_pushdown_operations)?;
|
||||
let ns_impl = namespace_client_impl.unwrap_or_else(|| "python".to_string());
|
||||
let ns_properties = namespace_client_properties.unwrap_or_default();
|
||||
let storage_options = storage_options.unwrap_or_default();
|
||||
let session = session.map(|s| s.inner.clone());
|
||||
|
||||
let database = LanceNamespaceDatabase::from_namespace_client(
|
||||
namespace_client,
|
||||
ns_impl,
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
namespace_client_pushdown_operations,
|
||||
);
|
||||
|
||||
Ok(Connection::new(LanceConnection::new(
|
||||
Arc::new(database),
|
||||
Arc::new(lancedb::embeddings::MemoryRegistry::new()),
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
pub struct PyClientConfig {
|
||||
user_agent: String,
|
||||
@@ -528,6 +622,7 @@ pub struct PyClientConfig {
|
||||
id_delimiter: Option<String>,
|
||||
tls_config: Option<PyClientTlsConfig>,
|
||||
header_provider: Option<Py<PyAny>>,
|
||||
user_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
@@ -612,6 +707,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: value.id_delimiter,
|
||||
tls_config: value.tls_config.map(Into::into),
|
||||
header_provider,
|
||||
user_id: value.user_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use arrow::RecordBatchStream;
|
||||
use connection::{Connection, connect};
|
||||
use connection::{Connection, connect, connect_namespace_client};
|
||||
use env_logger::Env;
|
||||
use expr::{PyExpr, expr_col, expr_func, expr_lit};
|
||||
use index::IndexConfig;
|
||||
@@ -58,6 +58,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<PyPermutationReader>()?;
|
||||
m.add_class::<PyExpr>()?;
|
||||
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(connect_namespace_client, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "1.91.0"
|
||||
channel = "1.94.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.7"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -171,7 +171,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -188,7 +188,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -541,6 +541,16 @@ impl Connection {
|
||||
self.internal.namespace_client().await
|
||||
}
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns (impl_type, properties) where:
|
||||
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - properties: configuration properties for the namespace
|
||||
pub async fn namespace_client_config(
|
||||
&self,
|
||||
) -> Result<(String, std::collections::HashMap<String, String>)> {
|
||||
self.internal.namespace_client_config().await
|
||||
}
|
||||
|
||||
/// List tables with pagination support
|
||||
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
self.internal.list_tables(request).await
|
||||
@@ -572,6 +582,14 @@ pub struct ConnectRequest {
|
||||
/// Database specific options
|
||||
pub options: HashMap<String, String>,
|
||||
|
||||
/// Extra properties for the equivalent namespace client.
|
||||
///
|
||||
/// For a local [`ListingDatabase`], these are merged into the backing
|
||||
/// `DirectoryNamespace` properties. This is useful for namespace-specific
|
||||
/// settings such as `table_version_tracking_enabled` that are distinct from
|
||||
/// storage options.
|
||||
pub namespace_client_properties: HashMap<String, String>,
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If None, then consistency is not checked. For performance
|
||||
@@ -611,6 +629,7 @@ impl ConnectBuilder {
|
||||
client_config: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
session: None,
|
||||
},
|
||||
embedding_registry: None,
|
||||
@@ -728,7 +747,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.request.options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -736,7 +755,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -747,6 +766,31 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional property for the equivalent namespace client.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional properties for the equivalent namespace client.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes. This
|
||||
/// only affects LanceDB OSS.
|
||||
///
|
||||
@@ -871,7 +915,7 @@ use std::collections::HashSet;
|
||||
/// These operations will be executed on the namespace server instead of locally
|
||||
/// when enabled via [`ConnectNamespaceBuilder::pushdown_operations`].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum PushdownOperation {
|
||||
pub enum NamespaceClientPushdownOperation {
|
||||
/// Execute queries on the namespace server via `query_table()` instead of locally.
|
||||
QueryTable,
|
||||
/// Execute table creation on the namespace server via `create_table()`
|
||||
@@ -883,10 +927,11 @@ pub struct ConnectNamespaceBuilder {
|
||||
ns_impl: String,
|
||||
properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
}
|
||||
|
||||
impl ConnectNamespaceBuilder {
|
||||
@@ -895,6 +940,7 @@ impl ConnectNamespaceBuilder {
|
||||
ns_impl: ns_impl.to_string(),
|
||||
properties,
|
||||
storage_options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
read_consistency_interval: None,
|
||||
embedding_registry: None,
|
||||
session: None,
|
||||
@@ -904,7 +950,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.storage_options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -912,7 +958,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -923,6 +969,29 @@ impl ConnectNamespaceBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional namespace client property.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional namespace client properties.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If left unset, consistency is not checked. For maximum read
|
||||
@@ -960,11 +1029,11 @@ impl ConnectNamespaceBuilder {
|
||||
/// and leveraging server-side compute resources.
|
||||
///
|
||||
/// Available operations:
|
||||
/// - [`PushdownOperation::QueryTable`]: Execute queries via `namespace.query_table()`
|
||||
/// - [`PushdownOperation::CreateTable`]: Execute table creation via `namespace.create_table()`
|
||||
/// - [`NamespaceClientPushdownOperation::QueryTable`]: Execute queries via `namespace.query_table()`
|
||||
/// - [`NamespaceClientPushdownOperation::CreateTable`]: Execute table creation via `namespace.create_table()`
|
||||
///
|
||||
/// By default, no operations are pushed down (all executed locally).
|
||||
pub fn pushdown_operation(mut self, operation: PushdownOperation) -> Self {
|
||||
pub fn pushdown_operation(mut self, operation: NamespaceClientPushdownOperation) -> Self {
|
||||
self.pushdown_operations.insert(operation);
|
||||
self
|
||||
}
|
||||
@@ -974,7 +1043,7 @@ impl ConnectNamespaceBuilder {
|
||||
/// See [`Self::pushdown_operation`] for details.
|
||||
pub fn pushdown_operations(
|
||||
mut self,
|
||||
operations: impl IntoIterator<Item = PushdownOperation>,
|
||||
operations: impl IntoIterator<Item = NamespaceClientPushdownOperation>,
|
||||
) -> Self {
|
||||
self.pushdown_operations.extend(operations);
|
||||
self
|
||||
@@ -984,10 +1053,13 @@ impl ConnectNamespaceBuilder {
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
|
||||
let mut properties = self.properties;
|
||||
properties.extend(self.namespace_client_properties);
|
||||
|
||||
let internal = Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
&self.ns_impl,
|
||||
self.properties,
|
||||
properties,
|
||||
self.storage_options,
|
||||
self.read_consistency_interval,
|
||||
self.session,
|
||||
@@ -1107,6 +1179,31 @@ mod tests {
|
||||
assert_eq!(db.uri(), relative_uri.to_str().unwrap().to_string());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let db = connect(uri)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (ns_impl, properties) = db.namespace_client_config().await.unwrap();
|
||||
assert_eq!(ns_impl, "dir");
|
||||
assert_eq!(properties.get("root"), Some(&uri.to_string()));
|
||||
assert_eq!(
|
||||
properties.get("table_version_tracking_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
properties.get("manifest_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_names() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
|
||||
@@ -55,7 +55,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -73,7 +73,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
|
||||
@@ -265,4 +265,13 @@ pub trait Database:
|
||||
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
|
||||
/// For RemoteDatabase, it is the equivalent RestNamespace.
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns (impl_type, properties) where:
|
||||
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - properties: configuration properties for the namespace
|
||||
///
|
||||
/// This is useful for Python bindings where we want to return a Python
|
||||
/// namespace object rather than a Rust trait object.
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)>;
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
use crate::error::{CreateDirSnafu, Error, Result};
|
||||
use crate::io::object_store::MirroringObjectStoreWrapper;
|
||||
use crate::table::NativeTable;
|
||||
@@ -73,7 +74,7 @@ pub struct ListingDatabaseOptions {
|
||||
/// These are used to create/list tables and they are inherited by all tables
|
||||
/// opened by this database.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub storage_options: HashMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -185,7 +186,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.options
|
||||
.storage_options
|
||||
@@ -195,7 +196,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -255,6 +256,9 @@ pub struct ListingDatabase {
|
||||
|
||||
// Session for object stores and caching
|
||||
session: Arc<lance::session::Session>,
|
||||
|
||||
// Namespace-backed database for child namespace operations
|
||||
namespace_database: Arc<LanceNamespaceDatabase>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ListingDatabase {
|
||||
@@ -281,6 +285,44 @@ const MIRRORED_STORE: &str = "mirroredStore";
|
||||
|
||||
/// A connection to LanceDB
|
||||
impl ListingDatabase {
|
||||
fn build_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
) -> HashMap<String, String> {
|
||||
let mut properties = namespace_client_properties;
|
||||
properties.insert("root".to_string(), uri.to_string());
|
||||
for (key, value) in storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
properties
|
||||
}
|
||||
|
||||
async fn connect_namespace_database(
|
||||
uri: &str,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Arc<lance::session::Session>,
|
||||
) -> Result<Arc<LanceNamespaceDatabase>> {
|
||||
let ns_properties = Self::build_namespace_client_properties(
|
||||
uri,
|
||||
&storage_options,
|
||||
namespace_client_properties,
|
||||
);
|
||||
Ok(Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
"dir",
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
Some(session),
|
||||
HashSet::new(),
|
||||
)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Connect to a listing database
|
||||
///
|
||||
/// The URI should be a path to a directory where the tables are stored.
|
||||
@@ -300,6 +342,7 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -387,6 +430,15 @@ impl ListingDatabase {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
&table_base_uri,
|
||||
options.storage_options.clone(),
|
||||
request.namespace_client_properties.clone(),
|
||||
request.read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: table_base_uri,
|
||||
query_string,
|
||||
@@ -398,6 +450,7 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config: options.new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -405,6 +458,7 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -416,6 +470,7 @@ impl ListingDatabase {
|
||||
path: &str,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
new_table_config: NewTableConfig,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
let session = session.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
@@ -429,6 +484,15 @@ impl ListingDatabase {
|
||||
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
|
||||
}
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
path,
|
||||
HashMap::new(),
|
||||
namespace_client_properties,
|
||||
read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: path.to_string(),
|
||||
query_string: None,
|
||||
@@ -440,6 +504,7 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -497,6 +562,10 @@ impl ListingDatabase {
|
||||
Ok(uri)
|
||||
}
|
||||
|
||||
fn namespace_database(&self) -> Arc<LanceNamespaceDatabase> {
|
||||
self.namespace_database.clone()
|
||||
}
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
@@ -696,16 +765,7 @@ impl Database for ListingDatabase {
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ListNamespacesResponse {
|
||||
namespaces: Vec::new(),
|
||||
page_token: None,
|
||||
})
|
||||
self.namespace_database().list_namespaces(request).await
|
||||
}
|
||||
|
||||
fn uri(&self) -> &str {
|
||||
@@ -726,36 +786,26 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
_request: CreateNamespaceRequest,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
self.namespace_database().create_namespace(request).await
|
||||
}
|
||||
|
||||
async fn drop_namespace(
|
||||
&self,
|
||||
_request: DropNamespaceRequest,
|
||||
) -> Result<DropNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
|
||||
self.namespace_database().drop_namespace(request).await
|
||||
}
|
||||
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
_request: DescribeNamespaceRequest,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
self.namespace_database().describe_namespace(request).await
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||
if !request.namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
return self.namespace_database().table_names(request).await;
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -788,9 +838,7 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
return self.namespace_database().list_tables(request).await;
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -838,11 +886,8 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().create_table(request).await;
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -959,11 +1004,8 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().open_table(request).await;
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -1059,9 +1101,10 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn drop_table(&self, name: &str, namespace_path: &[String]) -> Result<()> {
|
||||
if !namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_table(name, namespace_path)
|
||||
.await;
|
||||
}
|
||||
self.drop_tables(vec![name.to_string()]).await
|
||||
}
|
||||
@@ -1070,9 +1113,10 @@ impl Database for ListingDatabase {
|
||||
async fn drop_all_tables(&self, namespace_path: &[String]) -> Result<()> {
|
||||
// Check if namespace parameter is provided
|
||||
if !namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_all_tables(namespace_path)
|
||||
.await;
|
||||
}
|
||||
let tables = self.table_names(TableNamesRequest::default()).await?;
|
||||
self.drop_tables(tables).await
|
||||
@@ -1083,21 +1127,11 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
|
||||
// Create a DirectoryNamespace pointing to the same root with the same storage options
|
||||
let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
|
||||
self.namespace_database.namespace_client().await
|
||||
}
|
||||
|
||||
// Add storage options
|
||||
if !self.storage_options.is_empty() {
|
||||
builder = builder.storage_options(self.storage_options.clone());
|
||||
}
|
||||
|
||||
// Use the same session
|
||||
builder = builder.session(self.session.clone());
|
||||
|
||||
let namespace = builder.build().await.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create namespace client: {}", e),
|
||||
})?;
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
self.namespace_database.namespace_client_config().await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1123,6 +1157,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1256,6 +1291,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: options.clone(),
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1790,6 +1826,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1895,6 +1932,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1966,6 +2004,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2099,4 +2138,209 @@ mod tests {
|
||||
assert!(tables.contains(&"table1".to_string()));
|
||||
assert!(tables.contains(&"table2".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_namespace_operations() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let root_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec![]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(root_namespaces.namespaces.contains(&"parent".to_string()));
|
||||
|
||||
let child_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(child_namespaces.namespaces.contains(&"child".to_string()));
|
||||
|
||||
db.describe_namespace(DescribeNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[cfg(not(windows))] // TODO: support Windows once directory namespace-backed listing DB tests are supported.
|
||||
async fn test_listing_database_with_namespace_client_properties() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
let mut namespace_client_properties = HashMap::new();
|
||||
namespace_client_properties.insert(
|
||||
"table_version_tracking_enabled".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
namespace_client_properties.insert("manifest_enabled".to_string(), "true".to_string());
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let db = ListingDatabase::connect_with_options(&request)
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace_path = vec!["test_ns".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "managed_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".to_string(), "managed_table".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_nested_namespace_table_ops() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
let namespace_path = vec!["parent".to_string(), "child".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec![
|
||||
"parent".to_string(),
|
||||
"child".to_string(),
|
||||
"nested_table".to_string(),
|
||||
]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(describe.location.is_some());
|
||||
|
||||
let table = db
|
||||
.open_table(OpenTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
managed_versioning: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.name(), "nested_table");
|
||||
|
||||
#[allow(deprecated)]
|
||||
let table_names = db
|
||||
.table_names(TableNamesRequest {
|
||||
namespace_path: namespace_path.clone(),
|
||||
start_after: None,
|
||||
limit: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table_names, vec!["nested_table".to_string()]);
|
||||
|
||||
let list_tables = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(list_tables.tables, vec!["nested_table".to_string()]);
|
||||
|
||||
db.drop_table("nested_table", &namespace_path)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let post_drop = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(post_drop.tables.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ use lance_namespace_impls::ConnectBuilder;
|
||||
use lance_table::io::commit::CommitHandler;
|
||||
use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
|
||||
|
||||
use crate::connection::PushdownOperation;
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::table::NativeTable;
|
||||
@@ -44,17 +44,42 @@ pub struct LanceNamespaceDatabase {
|
||||
// database URI
|
||||
uri: String,
|
||||
// Operations to push down to the namespace server
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
// Namespace implementation type (e.g., "dir", "rest")
|
||||
ns_impl: String,
|
||||
// Namespace properties used to construct the namespace client
|
||||
ns_properties: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl LanceNamespaceDatabase {
|
||||
pub fn from_namespace_client(
|
||||
namespace_client: Arc<dyn LanceNamespace>,
|
||||
namespace_client_impl: String,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
namespace_client_pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Self {
|
||||
Self {
|
||||
namespace: namespace_client,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
uri: format!("namespace://{}", namespace_client_impl),
|
||||
pushdown_operations: namespace_client_pushdown_operations,
|
||||
ns_impl: namespace_client_impl,
|
||||
ns_properties: namespace_client_properties,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn connect(
|
||||
ns_impl: &str,
|
||||
ns_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(ns_impl);
|
||||
for (key, value) in ns_properties.clone() {
|
||||
@@ -74,6 +99,8 @@ impl LanceNamespaceDatabase {
|
||||
session,
|
||||
uri: format!("namespace://{}", ns_impl),
|
||||
pushdown_operations,
|
||||
ns_impl: ns_impl.to_string(),
|
||||
ns_properties,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -157,22 +184,15 @@ impl Database for LanceNamespaceDatabase {
|
||||
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let mut table_id = request.namespace_path.clone();
|
||||
table_id.push(request.name.clone());
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let describe_result = self.namespace.describe_table(describe_request).await;
|
||||
|
||||
match request.mode {
|
||||
CreateTableMode::Create => {
|
||||
if describe_result.is_ok() {
|
||||
return Err(Error::TableAlreadyExists {
|
||||
name: request.name.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
CreateTableMode::Create => {}
|
||||
CreateTableMode::Overwrite => {
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let describe_result = self.namespace.describe_table(describe_request).await;
|
||||
if describe_result.is_ok() {
|
||||
// Drop the existing table - must succeed
|
||||
let drop_request = DropTableRequest {
|
||||
@@ -188,6 +208,11 @@ impl Database for LanceNamespaceDatabase {
|
||||
}
|
||||
}
|
||||
CreateTableMode::ExistOk(_) => {
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let describe_result = self.namespace.describe_table(describe_request).await;
|
||||
if describe_result.is_ok() {
|
||||
let native_table = NativeTable::open_from_namespace(
|
||||
self.namespace.clone(),
|
||||
@@ -215,7 +240,26 @@ impl Database for LanceNamespaceDatabase {
|
||||
};
|
||||
|
||||
let (location, initial_storage_options, managed_versioning) = {
|
||||
let response = self.namespace.declare_table(declare_request).await?;
|
||||
let response = self
|
||||
.namespace
|
||||
.declare_table(declare_request)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let err_str = e.to_string();
|
||||
if matches!(request.mode, CreateTableMode::Create)
|
||||
&& (err_str.contains("already exists")
|
||||
|| err_str.contains("TableAlreadyExists")
|
||||
|| err_str.contains("table already exists"))
|
||||
{
|
||||
Error::TableAlreadyExists {
|
||||
name: request.name.clone(),
|
||||
}
|
||||
} else {
|
||||
Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
}
|
||||
}
|
||||
})?;
|
||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from declare_table response".to_string(),
|
||||
})?;
|
||||
@@ -345,6 +389,10 @@ impl Database for LanceNamespaceDatabase {
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
|
||||
Ok(self.namespace.clone())
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
Ok((self.ns_impl.clone(), self.ns_properties.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -440,6 +488,47 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_connection_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
|
||||
let test_data = create_test_data();
|
||||
conn.create_table("test_table", test_data)
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table");
|
||||
|
||||
let namespace_client = conn.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".into(), "test_table".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to describe table");
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_create_table_basic() {
|
||||
// Setup: Create a temporary directory for the namespace
|
||||
|
||||
@@ -177,6 +177,7 @@ impl BedrockEmbeddingFunction {
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
})
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
|
||||
//! columns as vector columns.
|
||||
//!
|
||||
//! For more details, please refer to the [LanceDB documentation](https://lancedb.com/docs).
|
||||
//! For more details, please refer to the [LanceDB documentation](https://docs.lancedb.com).
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
|
||||
@@ -52,6 +52,13 @@ pub struct ClientConfig {
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
/// Provider for custom headers to be added to each request
|
||||
pub header_provider: Option<Arc<dyn HeaderProvider>>,
|
||||
/// User identifier for tracking purposes.
|
||||
///
|
||||
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
/// variable that contains the user ID value.
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ClientConfig {
|
||||
@@ -67,6 +74,7 @@ impl std::fmt::Debug for ClientConfig {
|
||||
"header_provider",
|
||||
&self.header_provider.as_ref().map(|_| "Some(...)"),
|
||||
)
|
||||
.field("user_id", &self.user_id)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -81,10 +89,41 @@ impl Default for ClientConfig {
|
||||
id_delimiter: None,
|
||||
tls_config: None,
|
||||
header_provider: None,
|
||||
user_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ClientConfig {
|
||||
/// Resolve the user ID from the config or environment variables.
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. If `user_id` is set in the config, use that value
|
||||
/// 2. If `LANCEDB_USER_ID` environment variable is set, use that value
|
||||
/// 3. If `LANCEDB_USER_ID_ENV_KEY` is set, read the env var it points to
|
||||
/// 4. Otherwise, return None
|
||||
pub fn resolve_user_id(&self) -> Option<String> {
|
||||
if self.user_id.is_some() {
|
||||
return self.user_id.clone();
|
||||
}
|
||||
|
||||
if let Ok(user_id) = std::env::var("LANCEDB_USER_ID")
|
||||
&& !user_id.is_empty()
|
||||
{
|
||||
return Some(user_id);
|
||||
}
|
||||
|
||||
if let Ok(env_key) = std::env::var("LANCEDB_USER_ID_ENV_KEY")
|
||||
&& let Ok(user_id) = std::env::var(&env_key)
|
||||
&& !user_id.is_empty()
|
||||
{
|
||||
return Some(user_id);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// How to handle timeouts for HTTP requests.
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub struct TimeoutConfig {
|
||||
@@ -464,6 +503,15 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(user_id) = config.resolve_user_id() {
|
||||
headers.insert(
|
||||
HeaderName::from_static("x-lancedb-user-id"),
|
||||
HeaderValue::from_str(&user_id).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii user_id '{}' provided", user_id),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
|
||||
@@ -1072,4 +1120,91 @@ mod tests {
|
||||
_ => panic!("Expected Runtime error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_direct_value() {
|
||||
let config = ClientConfig {
|
||||
user_id: Some("direct-user-id".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_none() {
|
||||
let config = ClientConfig::default();
|
||||
// Clear env vars that might be set from other tests
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
}
|
||||
assert_eq!(config.resolve_user_id(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_from_env() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(config.resolve_user_id(), Some("env-user-id".to_string()));
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_from_env_key() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
std::env::set_var("LANCEDB_USER_ID_ENV_KEY", "MY_CUSTOM_USER_ID");
|
||||
std::env::set_var("MY_CUSTOM_USER_ID", "custom-env-user-id");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(
|
||||
config.resolve_user_id(),
|
||||
Some("custom-env-user-id".to_string())
|
||||
);
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
std::env::remove_var("MY_CUSTOM_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_direct_takes_precedence() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
|
||||
}
|
||||
let config = ClientConfig {
|
||||
user_id: Some("direct-user-id".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_empty_env_ignored() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "");
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(config.resolve_user_id(), None);
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ pub struct RemoteDatabaseOptions {
|
||||
pub host_override: Option<String>,
|
||||
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
///
|
||||
/// These options are only used for LanceDB Enterprise and only a subset of options
|
||||
/// are supported.
|
||||
@@ -777,6 +777,32 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
let namespace = builder.build();
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("uri".to_string(), self.client.host().to_string());
|
||||
properties.insert("delimiter".to_string(), self.client.id_delimiter.clone());
|
||||
for (key, value) in &self.namespace_headers {
|
||||
properties.insert(format!("header.{}", key), value.clone());
|
||||
}
|
||||
// Add TLS configuration if present
|
||||
if let Some(tls_config) = &self.tls_config {
|
||||
if let Some(cert_file) = &tls_config.cert_file {
|
||||
properties.insert("tls.cert_file".to_string(), cert_file.clone());
|
||||
}
|
||||
if let Some(key_file) = &tls_config.key_file {
|
||||
properties.insert("tls.key_file".to_string(), key_file.clone());
|
||||
}
|
||||
if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
|
||||
properties.insert("tls.ssl_ca_cert".to_string(), ssl_ca_cert.clone());
|
||||
}
|
||||
properties.insert(
|
||||
"tls.assert_hostname".to_string(),
|
||||
tls_config.assert_hostname.to_string(),
|
||||
);
|
||||
}
|
||||
Ok(("rest".to_string(), properties))
|
||||
}
|
||||
}
|
||||
|
||||
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
|
||||
|
||||
@@ -47,7 +47,7 @@ use std::format;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::connection::PushdownOperation;
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
|
||||
use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions};
|
||||
use crate::database::Database;
|
||||
@@ -1272,7 +1272,7 @@ pub struct NativeTable {
|
||||
pub(crate) namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
// Operations to push down to the namespace server.
|
||||
// pub(crate) so query.rs can access the field for server-side query execution.
|
||||
pub(crate) pushdown_operations: HashSet<PushdownOperation>,
|
||||
pub(crate) pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for NativeTable {
|
||||
@@ -1359,7 +1359,7 @@ impl NativeTable {
|
||||
params: Option<ReadParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
managed_versioning: Option<bool>,
|
||||
) -> Result<Self> {
|
||||
let params = params.unwrap_or_default();
|
||||
@@ -1470,7 +1470,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<ReadParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
let mut params = params.unwrap_or_default();
|
||||
@@ -1518,7 +1518,7 @@ impl NativeTable {
|
||||
let id = Self::build_id(&namespace, name);
|
||||
|
||||
let stored_namespace_client =
|
||||
if pushdown_operations.contains(&PushdownOperation::QueryTable) {
|
||||
if pushdown_operations.contains(&NamespaceClientPushdownOperation::QueryTable) {
|
||||
Some(namespace_client)
|
||||
} else {
|
||||
None
|
||||
@@ -1588,7 +1588,7 @@ impl NativeTable {
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
// Default params uses format v1.
|
||||
let params = params.unwrap_or(WriteParams {
|
||||
@@ -1635,7 +1635,7 @@ impl NativeTable {
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
let data: Box<dyn Scannable> = Box::new(RecordBatch::new_empty(schema));
|
||||
Self::create(
|
||||
@@ -1685,7 +1685,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
// Build table_id from namespace + name for the storage options provider
|
||||
@@ -1738,7 +1738,7 @@ impl NativeTable {
|
||||
let id = Self::build_id(&namespace, name);
|
||||
|
||||
let stored_namespace_client =
|
||||
if pushdown_operations.contains(&PushdownOperation::QueryTable) {
|
||||
if pushdown_operations.contains(&NamespaceClientPushdownOperation::QueryTable) {
|
||||
Some(namespace_client)
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -285,7 +285,10 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{connect, io::object_store::io_tracking::IoStatsHolder, table::WriteOptions};
|
||||
use crate::{
|
||||
connect, io::object_store::io_tracking::IoStatsHolder, table::WriteOptions,
|
||||
utils::background_cache::clock,
|
||||
};
|
||||
|
||||
async fn create_test_dataset(uri: &str) -> Dataset {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||
@@ -463,6 +466,10 @@ mod tests {
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let ds = create_test_dataset(uri).await;
|
||||
|
||||
// Other tests use a thread-local mock clock. Simulate leaked state from a
|
||||
// previous test to ensure this wrapper starts from real time.
|
||||
clock::advance_by(Duration::from_secs(60));
|
||||
|
||||
let wrapper = DatasetConsistencyWrapper::new_latest(ds, Some(Duration::from_millis(200)));
|
||||
|
||||
// Populate the cache
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::NativeTable;
|
||||
use crate::connection::PushdownOperation;
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::expr::expr_to_sql_string;
|
||||
use crate::query::{
|
||||
@@ -44,7 +44,7 @@ pub async fn execute_query(
|
||||
// If QueryTable pushdown is enabled and namespace client is configured, use server-side query execution
|
||||
if table
|
||||
.pushdown_operations
|
||||
.contains(&PushdownOperation::QueryTable)
|
||||
.contains(&NamespaceClientPushdownOperation::QueryTable)
|
||||
&& let Some(ref namespace_client) = table.namespace_client
|
||||
{
|
||||
return execute_namespace_query(table, namespace_client.clone(), query, options).await;
|
||||
|
||||
@@ -107,6 +107,14 @@ where
|
||||
refresh_window < ttl,
|
||||
"refresh_window ({refresh_window:?}) must be less than ttl ({ttl:?})"
|
||||
);
|
||||
#[cfg(test)]
|
||||
{
|
||||
// Tests may advance the thread-local mock clock and leave it behind for
|
||||
// the next test that happens to run on the same worker thread. Each new
|
||||
// cache should start from a clean clock state instead of inheriting
|
||||
// unrelated mock time from a previous test.
|
||||
clock::clear_mock();
|
||||
}
|
||||
Self {
|
||||
inner: Arc::new(Mutex::new(CacheInner {
|
||||
state: State::Empty,
|
||||
|
||||
Reference in New Issue
Block a user