mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
18 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f79295c697 | ||
|
|
381fad9b65 | ||
|
|
055bf91d3e | ||
|
|
050f0086b8 | ||
|
|
10fa23e0d6 | ||
|
|
43d9fc28b0 | ||
|
|
f45f0d0431 | ||
|
|
b9e3c36d82 | ||
|
|
3cd7dd3375 | ||
|
|
12d4ce4cfe | ||
|
|
3d1f102087 | ||
|
|
81afd8a42f | ||
|
|
c2aa03615a | ||
|
|
d2c6759e7f | ||
|
|
94fb9f364a | ||
|
|
fbff244ed8 | ||
|
|
7e7466d224 | ||
|
|
cceaf27d79 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.21.2-beta.0"
|
current_version = "0.21.2-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
10
.github/workflows/cargo-publish.yml
vendored
10
.github/workflows/cargo-publish.yml
vendored
@@ -5,8 +5,8 @@ on:
|
|||||||
tags-ignore:
|
tags-ignore:
|
||||||
# We don't publish pre-releases for Rust. Crates.io is just a source
|
# We don't publish pre-releases for Rust. Crates.io is just a source
|
||||||
# distribution, so we don't need to publish pre-releases.
|
# distribution, so we don't need to publish pre-releases.
|
||||||
- 'v*-beta*'
|
- "v*-beta*"
|
||||||
- '*-v*' # for example, python-vX.Y.Z
|
- "*-v*" # for example, python-vX.Y.Z
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||||
@@ -19,6 +19,8 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -31,6 +33,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- uses: rust-lang/crates-io-auth-action@v1
|
||||||
|
id: auth
|
||||||
- name: Publish the package
|
- name: Publish the package
|
||||||
run: |
|
run: |
|
||||||
cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
|
||||||
|
|||||||
24
CLAUDE.md
Normal file
24
CLAUDE.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
|
||||||
|
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
|
||||||
|
remote (against LanceDB Cloud).
|
||||||
|
|
||||||
|
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
|
||||||
|
|
||||||
|
Project layout:
|
||||||
|
|
||||||
|
* `rust/lancedb`: The LanceDB core Rust implementation.
|
||||||
|
* `python`: The Python bindings, using PyO3.
|
||||||
|
* `nodejs`: The Typescript bindings, using napi-rs
|
||||||
|
* `java`: The Java bindings
|
||||||
|
|
||||||
|
(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
|
||||||
|
* Check for compiler errors: `cargo check --features remote --tests --examples`
|
||||||
|
* Run tests: `cargo test --features remote --tests`
|
||||||
|
* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
|
||||||
|
* Lint: `cargo clippy --features remote --tests --examples`
|
||||||
|
* Format: `cargo fmt --all`
|
||||||
|
|
||||||
|
Before committing changes, run formatting.
|
||||||
187
Cargo.lock
generated
187
Cargo.lock
generated
@@ -1039,6 +1039,17 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "backon"
|
||||||
|
version = "1.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"gloo-timers",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "backtrace"
|
name = "backtrace"
|
||||||
version = "0.3.75"
|
version = "0.3.75"
|
||||||
@@ -2477,6 +2488,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"block-buffer",
|
"block-buffer",
|
||||||
|
"const-oid",
|
||||||
"crypto-common",
|
"crypto-common",
|
||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
@@ -2840,9 +2852,11 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fsst"
|
name = "fsst"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "99b0ce83d91fe637d97c127ac8df19f57e6012a5472c339154e5100cb107df4c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arrow-array",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -3256,6 +3270,18 @@ version = "0.3.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "gloo-timers"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "group"
|
name = "group"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
@@ -3792,6 +3818,17 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "io-uring"
|
||||||
|
version = "0.7.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.9.1",
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ipnet"
|
name = "ipnet"
|
||||||
version = "2.11.0"
|
version = "2.11.0"
|
||||||
@@ -3930,8 +3967,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance"
|
name = "lance"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7484555bbe6f7898d6a283f89ecd3e2ba85a0f28d9a9e6f15f3018d8adaebdd9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -3993,8 +4031,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-arrow"
|
name = "lance-arrow"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8674ce4b27d131ac98692dbc0b28f43690defa6ca63303b3cab21e6beaf43868"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4011,8 +4050,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-core"
|
name = "lance-core"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a1dd99bf06d5e322e81ff84cc2ce12b463836b4fba2bc1e0223085e1c8d7b71a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4047,8 +4087,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datafusion"
|
name = "lance-datafusion"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "29e78724715c1cb255ea3ac749b617406d91db6565ea77d531c1aba46716efc4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4076,8 +4117,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datagen"
|
name = "lance-datagen"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0cc5fa5f59bf65d02118fcc05615b511c03222f5240c4a18218f1297f97bcdf7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4093,8 +4135,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-encoding"
|
name = "lance-encoding"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a550fe9d4d931c48177691b9c085baf158bfde4ed7b6055eb27fed54174e5767"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayref",
|
"arrayref",
|
||||||
"arrow",
|
"arrow",
|
||||||
@@ -4133,8 +4176,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-file"
|
name = "lance-file"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2d338a50e09bc5af5773cdc5d269680288847d1d34a4622063cce8ad4b5375b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4168,8 +4212,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-index"
|
name = "lance-index"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14cbcb44403ee477ab4e53194e4c322295959785a7056b33043a2f9f01fa0f8a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4203,7 +4248,6 @@ dependencies = [
|
|||||||
"lance-linalg",
|
"lance-linalg",
|
||||||
"lance-table",
|
"lance-table",
|
||||||
"log",
|
"log",
|
||||||
"moka",
|
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"object_store",
|
"object_store",
|
||||||
"prost",
|
"prost",
|
||||||
@@ -4223,8 +4267,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-io"
|
name = "lance-io"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "933c8dad56aa3048c421f336b20f23f507cc47271fcc18bea8b4052c247a170e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -4248,6 +4293,8 @@ dependencies = [
|
|||||||
"lance-core",
|
"lance-core",
|
||||||
"log",
|
"log",
|
||||||
"object_store",
|
"object_store",
|
||||||
|
"object_store_opendal",
|
||||||
|
"opendal",
|
||||||
"path_abs",
|
"path_abs",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"prost",
|
"prost",
|
||||||
@@ -4262,8 +4309,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-linalg"
|
name = "lance-linalg"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2540ae40b7c35901be13541437c947aadb5a6afb2110f7275e90884aeee4cc07"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4286,8 +4334,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-table"
|
name = "lance-table"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "31e1cfa3e031b5795330eec7808baa1c2e105a067adf0790e5bb9a51aa7256ff"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4325,8 +4374,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-testing"
|
name = "lance-testing"
|
||||||
version = "0.31.2"
|
version = "0.32.0"
|
||||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2847faaa98fdb2facc75ae515e553ea67e68d0b05de41ac577b8038e1bbafac8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-schema",
|
"arrow-schema",
|
||||||
@@ -4337,7 +4387,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4424,7 +4474,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4449,7 +4499,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4469,7 +4519,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.24.2-beta.0"
|
version = "0.24.2-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
@@ -5215,6 +5265,21 @@ dependencies = [
|
|||||||
"web-time",
|
"web-time",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "object_store_opendal"
|
||||||
|
version = "0.54.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5ce697ee723fdc3eaf6c457abf4059034be15167022b18b619993802cd1443d5"
|
||||||
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
|
"bytes",
|
||||||
|
"futures",
|
||||||
|
"object_store",
|
||||||
|
"opendal",
|
||||||
|
"pin-project",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.21.3"
|
version = "1.21.3"
|
||||||
@@ -5255,6 +5320,33 @@ dependencies = [
|
|||||||
"pkg-config",
|
"pkg-config",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opendal"
|
||||||
|
version = "0.54.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"backon",
|
||||||
|
"base64 0.22.1",
|
||||||
|
"bytes",
|
||||||
|
"chrono",
|
||||||
|
"futures",
|
||||||
|
"getrandom 0.2.16",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body 1.0.1",
|
||||||
|
"log",
|
||||||
|
"md-5",
|
||||||
|
"percent-encoding",
|
||||||
|
"quick-xml",
|
||||||
|
"reqsign",
|
||||||
|
"reqwest",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"tokio",
|
||||||
|
"uuid",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl-probe"
|
name = "openssl-probe"
|
||||||
version = "0.1.6"
|
version = "0.1.6"
|
||||||
@@ -6460,6 +6552,33 @@ version = "1.9.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
|
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "reqsign"
|
||||||
|
version = "0.16.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"async-trait",
|
||||||
|
"base64 0.22.1",
|
||||||
|
"chrono",
|
||||||
|
"form_urlencoded",
|
||||||
|
"getrandom 0.2.16",
|
||||||
|
"hex",
|
||||||
|
"hmac",
|
||||||
|
"home",
|
||||||
|
"http 1.3.1",
|
||||||
|
"log",
|
||||||
|
"once_cell",
|
||||||
|
"percent-encoding",
|
||||||
|
"rand 0.8.5",
|
||||||
|
"reqwest",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"sha1",
|
||||||
|
"sha2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
version = "0.12.20"
|
version = "0.12.20"
|
||||||
@@ -7732,16 +7851,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.45.1"
|
version = "1.46.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
|
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"backtrace",
|
"backtrace",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
"io-uring",
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"signal-hook-registry",
|
"signal-hook-registry",
|
||||||
|
"slab",
|
||||||
"socket2",
|
"socket2",
|
||||||
"tokio-macros",
|
"tokio-macros",
|
||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
|
|||||||
18
Cargo.toml
18
Cargo.toml
@@ -21,16 +21,14 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.31.2", "features" = [
|
lance = { "version" = "=0.32.0", "features" = ["dynamodb"] }
|
||||||
"dynamodb",
|
lance-io = "=0.32.0"
|
||||||
], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-index = "=0.32.0"
|
||||||
lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-linalg = "=0.32.0"
|
||||||
lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-table = "=0.32.0"
|
||||||
lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-testing = "=0.32.0"
|
||||||
lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-datafusion = "=0.32.0"
|
||||||
lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
lance-encoding = "=0.32.0"
|
||||||
lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
|
||||||
lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
|
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "55.1", optional = false }
|
arrow = { version = "55.1", optional = false }
|
||||||
arrow-array = "55.1"
|
arrow-array = "55.1"
|
||||||
|
|||||||
84
docs/src/js/classes/Session.md
Normal file
84
docs/src/js/classes/Session.md
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / Session
|
||||||
|
|
||||||
|
# Class: Session
|
||||||
|
|
||||||
|
A session for managing caches and object stores across LanceDB operations.
|
||||||
|
|
||||||
|
Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
which can significantly impact performance for large datasets.
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new Session()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new session with custom cache sizes.
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
|
||||||
|
- `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||||
|
Defaults to 6GB if not specified.
|
||||||
|
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||||
|
Defaults to 1GB if not specified.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **indexCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
* **metadataCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### approxNumItems()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
approxNumItems(): number
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the approximate number of items cached in the session.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`number`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### sizeBytes()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
sizeBytes(): bigint
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the current size of the session caches in bytes.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`bigint`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### default()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static default(): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a session with default cache sizes.
|
||||||
|
|
||||||
|
This is equivalent to creating a session with 6GB index cache
|
||||||
|
and 1GB metadata cache.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
@@ -6,10 +6,13 @@
|
|||||||
|
|
||||||
# Function: connect()
|
# Function: connect()
|
||||||
|
|
||||||
## connect(uri, options)
|
## connect(uri, options, session)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
function connect(uri, options?): Promise<Connection>
|
function connect(
|
||||||
|
uri,
|
||||||
|
options?,
|
||||||
|
session?): Promise<Connection>
|
||||||
```
|
```
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI.
|
Connect to a LanceDB instance at the given URI.
|
||||||
@@ -29,6 +32,8 @@ Accepted formats:
|
|||||||
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||||
The options to use when connecting to the database
|
The options to use when connecting to the database
|
||||||
|
|
||||||
|
* **session?**: [`Session`](../classes/Session.md)
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||||
@@ -77,7 +82,7 @@ Accepted formats:
|
|||||||
|
|
||||||
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
||||||
|
|
||||||
### Example
|
### Examples
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
const conn = await connect({
|
const conn = await connect({
|
||||||
@@ -85,3 +90,11 @@ const conn = await connect({
|
|||||||
storageOptions: {timeout: "60s"}
|
storageOptions: {timeout: "60s"}
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const session = Session.default();
|
||||||
|
const conn = await connect({
|
||||||
|
uri: "/path/to/database",
|
||||||
|
session: session
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [QueryBase](classes/QueryBase.md)
|
- [QueryBase](classes/QueryBase.md)
|
||||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||||
|
- [Session](classes/Session.md)
|
||||||
- [Table](classes/Table.md)
|
- [Table](classes/Table.md)
|
||||||
- [TagContents](classes/TagContents.md)
|
- [TagContents](classes/TagContents.md)
|
||||||
- [Tags](classes/Tags.md)
|
- [Tags](classes/Tags.md)
|
||||||
|
|||||||
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### session?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional session: Session;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB OSS only): the session to use for this connection. Holds
|
||||||
|
shared caches and other session-specific state.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### storageOptions?
|
### storageOptions?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
### indexCacheSize?
|
### ~~indexCacheSize?~~
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional indexCacheSize: number;
|
optional indexCacheSize: number;
|
||||||
@@ -16,6 +16,11 @@ optional indexCacheSize: number;
|
|||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
|
#### Deprecated
|
||||||
|
|
||||||
|
Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to the connect() function.
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
- IVF: there is one entry for each IVF partition
|
- IVF: there is one entry for each IVF partition
|
||||||
- BTREE: there is one entry for the entire index
|
- BTREE: there is one entry for the entire index
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
|
|||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
arrow = { workspace = true, features = ["ffi"] }
|
arrow = { workspace = true, features = ["ffi"] }
|
||||||
arrow-schema.workspace = true
|
arrow-schema.workspace = true
|
||||||
tokio = "1.23"
|
tokio = "1.46"
|
||||||
jni = "0.21.1"
|
jni = "0.21.1"
|
||||||
snafu.workspace = true
|
snafu.workspace = true
|
||||||
lazy_static.workspace = true
|
lazy_static.workspace = true
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.21.2-beta.0</version>
|
<version>0.21.2-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.21.2-beta.0</version>
|
<version>0.21.2-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.21.2-beta.0</version>
|
<version>0.21.2-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
49
node/package-lock.json
generated
49
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,65 +327,60 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.1.tgz",
|
||||||
"integrity": "sha512-RiYqpKuq9v8A4wFuHt1iPNFYjWJ1KgGFLJwQO4ajp9Hee84sDHq8mP0ATgMcc24hiaOUQ1lRRTULjGbHn4NIYw==",
|
"integrity": "sha512-7QXVJNTei7PMuXRyyc+F3WGiudRNq9HfeOaMmMOJJpuCAO0zLq1pM9DCl5aPF5MddrodPHJxi+IWV+iAFH7zcg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
"license": "Apache-2.0",
|
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"darwin"
|
"darwin"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.1.tgz",
|
||||||
"integrity": "sha512-togdP0YIjMYg/hBRMMxW434i5VB789JWU5o3hWrodbX8olEc0Txqw5Dg9CgIOldBIiCti6uTSQiTo6uldZon1w==",
|
"integrity": "sha512-M/TWcJ3WVc6DNFgG/lWI7L5tQ05IF3WoWuZfRfbbimGhRvY7xf1O3uOt+jMcNJCa5mHFGCg2SZDA8mebd/mL7g==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
"license": "Apache-2.0",
|
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"darwin"
|
"darwin"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.1.tgz",
|
||||||
"integrity": "sha512-ErS4IQDQVTYVATPeOj/dZXQR34eZQ5rAXm3vJdQi5K6X4zCDaIjOhpmnwzPBGT9W1idaBAoDJhtNfsFaJ6/PQQ==",
|
"integrity": "sha512-OEsM9znf9DDmdwGuTg2EVu+ebwuWQ1lCx0cYy4+hNy3ntolwMC39ePg2H9WD9SsEnQ2vcGJgBJTQLPKgXww+iQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
"license": "Apache-2.0",
|
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"linux"
|
"linux"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.1.tgz",
|
||||||
"integrity": "sha512-ycDpyBGbfxtnGGa/RQo5+So6dHALiem1pbYc/LDKKluUJpadtXtEwC61o6hZTcejoYjhEE8ET7vA3OCEJfMFaw==",
|
"integrity": "sha512-7FTq/O1zNzD71rgX2PEVmkct4jk2wc+ADU3rss+0VqoBSO9XeMqZEVD2WgZWuSTg6bYai//FHGDHSaknHBNsdw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
"license": "Apache-2.0",
|
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"linux"
|
"linux"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.1.tgz",
|
||||||
"integrity": "sha512-IgVkAP/LiNIQD5P6n/9x3bgQOt5pGJarjtSF8r+ialD95QHmo6tcxrwTy/DlA+H1uI6B6h+sbN0c1KXTh1rYcg==",
|
"integrity": "sha512-mN1p/J0kdqy6MrlKtmA8set/PibqFPyytQJFAuxSLXC/rwD7vgqUCt0SI0zVWPGG7J5Y65kvdc99l7Yl7lJtwQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
"license": "Apache-2.0",
|
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"private": false,
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
@@ -89,10 +89,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
|||||||
it('s3://...?mirroredStore=... param is processed correctly', async function () {
|
it('s3://...?mirroredStore=... param is processed correctly', async function () {
|
||||||
this.timeout(600000)
|
this.timeout(600000)
|
||||||
|
|
||||||
const dir = tmpdir()
|
const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
|
||||||
console.log(dir)
|
console.log(dir)
|
||||||
const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
|
const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
|
||||||
const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
|
const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
|
||||||
@@ -63,118 +63,93 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
|||||||
const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
|
const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
|
||||||
|
|
||||||
const mirroredPath = path.join(dir, `${tableName}.lance`)
|
const mirroredPath = path.join(dir, `${tableName}.lance`)
|
||||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
|
||||||
if (err != null) throw err
|
|
||||||
// there should be three dirs
|
|
||||||
assert.equal(files.length, 3)
|
|
||||||
assert.isTrue(files[0].isDirectory())
|
|
||||||
assert.isTrue(files[1].isDirectory())
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
|
const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
||||||
if (err != null) throw err
|
// there should be three dirs
|
||||||
assert.equal(files.length, 1)
|
assert.equal(files.length, 3, 'files after table creation')
|
||||||
assert.isTrue(files[0].name.endsWith('.txn'))
|
assert.isTrue(files[0].isDirectory())
|
||||||
})
|
assert.isTrue(files[1].isDirectory())
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
|
const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(transactionFiles[0].name.endsWith('.txn'))
|
||||||
assert.isTrue(files[0].name.endsWith('.manifest'))
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
|
const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
|
||||||
assert.isTrue(files[0].name.endsWith('.lance'))
|
|
||||||
})
|
const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
||||||
})
|
assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
|
||||||
|
assert.isTrue(dataFiles[0].name.endsWith('.lance'))
|
||||||
|
|
||||||
// try create index and check if it's mirrored
|
// try create index and check if it's mirrored
|
||||||
await t.createIndex({ column: 'vector', type: 'ivf_pq' })
|
await t.createIndex({ column: 'vector', type: 'ivf_pq' })
|
||||||
|
|
||||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
||||||
if (err != null) throw err
|
// there should be four dirs
|
||||||
// there should be four dirs
|
assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
|
||||||
assert.equal(files.length, 4)
|
assert.isTrue(filesAfterIndex[0].isDirectory())
|
||||||
assert.isTrue(files[0].isDirectory())
|
assert.isTrue(filesAfterIndex[1].isDirectory())
|
||||||
assert.isTrue(files[1].isDirectory())
|
assert.isTrue(filesAfterIndex[2].isDirectory())
|
||||||
assert.isTrue(files[2].isDirectory())
|
|
||||||
|
|
||||||
// Two TXs now
|
// Two TXs now
|
||||||
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
|
const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
|
||||||
assert.equal(files.length, 2)
|
assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
|
||||||
assert.isTrue(files[0].name.endsWith('.txn'))
|
assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))
|
||||||
assert.isTrue(files[1].name.endsWith('.txn'))
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
|
const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))
|
||||||
assert.isTrue(files[0].name.endsWith('.lance'))
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
|
const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(indicesFiles.length, 1, 'indicesFiles')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(indicesFiles[0].isDirectory())
|
||||||
assert.isTrue(files[0].isDirectory())
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
|
const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
|
||||||
|
assert.equal(indexFiles.length, 2, 'indexFiles')
|
||||||
assert.equal(files.length, 1)
|
const fileNames = indexFiles.map(f => f.name).sort()
|
||||||
assert.isTrue(files[0].isFile())
|
assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
|
||||||
assert.isTrue(files[0].name.endsWith('.idx'))
|
assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
|
||||||
})
|
assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// try delete and check if it's mirrored
|
// try delete and check if it's mirrored
|
||||||
await t.delete('id = 0')
|
await t.delete('id = 0')
|
||||||
|
|
||||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
||||||
if (err != null) throw err
|
// there should be five dirs
|
||||||
// there should be five dirs
|
assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
|
||||||
assert.equal(files.length, 5)
|
assert.isTrue(filesAfterDelete[0].isDirectory())
|
||||||
assert.isTrue(files[0].isDirectory())
|
assert.isTrue(filesAfterDelete[1].isDirectory())
|
||||||
assert.isTrue(files[1].isDirectory())
|
assert.isTrue(filesAfterDelete[2].isDirectory())
|
||||||
assert.isTrue(files[2].isDirectory())
|
assert.isTrue(filesAfterDelete[3].isDirectory())
|
||||||
assert.isTrue(files[3].isDirectory())
|
assert.isTrue(filesAfterDelete[4].isDirectory())
|
||||||
assert.isTrue(files[4].isDirectory())
|
|
||||||
|
|
||||||
// Three TXs now
|
// Three TXs now
|
||||||
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
|
const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
|
||||||
assert.equal(files.length, 3)
|
assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
|
||||||
assert.isTrue(files[0].name.endsWith('.txn'))
|
assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))
|
||||||
assert.isTrue(files[1].name.endsWith('.txn'))
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
|
const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))
|
||||||
assert.isTrue(files[0].name.endsWith('.lance'))
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
|
const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
|
||||||
assert.equal(files.length, 1)
|
assert.isTrue(indicesFilesAfterDelete[0].isDirectory())
|
||||||
assert.isTrue(files[0].isDirectory())
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
|
const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
|
||||||
if (err != null) throw err
|
console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
|
||||||
|
assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
|
||||||
|
const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
|
||||||
|
assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
|
||||||
|
assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
|
||||||
|
assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
|
||||||
|
|
||||||
assert.equal(files.length, 1)
|
const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
|
||||||
assert.isTrue(files[0].isFile())
|
assert.equal(deletionFiles.length, 1, 'deletionFiles')
|
||||||
assert.isTrue(files[0].name.endsWith('.idx'))
|
assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
|
|
||||||
if (err != null) throw err
|
|
||||||
assert.equal(files.length, 1)
|
|
||||||
assert.isTrue(files[0].name.endsWith('.arrow'))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
13
nodejs/CLAUDE.md
Normal file
13
nodejs/CLAUDE.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
These are the typescript bindings of LanceDB.
|
||||||
|
The core Rust library is in the `../rust/lancedb` directory, the rust binding
|
||||||
|
code is in the `src/` directory and the typescript bindings are in
|
||||||
|
the `lancedb/` directory.
|
||||||
|
|
||||||
|
Whenever you change the Rust code, you will need to recompile: `npm run build`.
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
* Build: `npm run build`
|
||||||
|
* Lint: `npm run lint`
|
||||||
|
* Fix lints: `npm run lint-fix`
|
||||||
|
* Test: `npm test`
|
||||||
|
* Run single test file: `npm test __test__/arrow.test.ts`
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
46
nodejs/__test__/session.test.ts
Normal file
46
nodejs/__test__/session.test.ts
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import * as tmp from "tmp";
|
||||||
|
import { Session, connect } from "../lancedb";
|
||||||
|
|
||||||
|
describe("Session", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
});
|
||||||
|
afterEach(() => tmpDir.removeCallback());
|
||||||
|
|
||||||
|
it("should configure cache sizes and work with database operations", async () => {
|
||||||
|
// Create session with small cache limits for testing
|
||||||
|
const indexCacheSize = BigInt(1024 * 1024); // 1MB
|
||||||
|
const metadataCacheSize = BigInt(512 * 1024); // 512KB
|
||||||
|
|
||||||
|
const session = new Session(indexCacheSize, metadataCacheSize);
|
||||||
|
|
||||||
|
// Record initial cache state
|
||||||
|
const initialCacheSize = session.sizeBytes();
|
||||||
|
const initialCacheItems = session.approxNumItems();
|
||||||
|
|
||||||
|
// Test session works with database connection
|
||||||
|
const db = await connect({ uri: tmpDir.name, session: session });
|
||||||
|
|
||||||
|
// Create and use a table to exercise the session
|
||||||
|
const data = Array.from({ length: 100 }, (_, i) => ({
|
||||||
|
id: i,
|
||||||
|
text: `item ${i}`,
|
||||||
|
}));
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
const results = await table.query().limit(5).toArray();
|
||||||
|
|
||||||
|
expect(results).toHaveLength(5);
|
||||||
|
|
||||||
|
// Verify cache usage increased after operations
|
||||||
|
const finalCacheSize = session.sizeBytes();
|
||||||
|
const finalCacheItems = session.approxNumItems();
|
||||||
|
|
||||||
|
expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
|
||||||
|
expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
|
||||||
|
expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -85,6 +85,9 @@ export interface OpenTableOptions {
|
|||||||
/**
|
/**
|
||||||
* Set the size of the index cache, specified as a number of entries
|
* Set the size of the index cache, specified as a number of entries
|
||||||
*
|
*
|
||||||
|
* @deprecated Use session-level cache configuration instead.
|
||||||
|
* Create a Session with custom cache sizes and pass it to the connect() function.
|
||||||
|
*
|
||||||
* The exact meaning of an "entry" will depend on the type of index:
|
* The exact meaning of an "entry" will depend on the type of index:
|
||||||
* - IVF: there is one entry for each IVF partition
|
* - IVF: there is one entry for each IVF partition
|
||||||
* - BTREE: there is one entry for the entire index
|
* - BTREE: there is one entry for the entire index
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import {
|
|||||||
import {
|
import {
|
||||||
ConnectionOptions,
|
ConnectionOptions,
|
||||||
Connection as LanceDbConnection,
|
Connection as LanceDbConnection,
|
||||||
|
Session,
|
||||||
} from "./native.js";
|
} from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
@@ -51,6 +52,8 @@ export {
|
|||||||
OpenTableOptions,
|
OpenTableOptions,
|
||||||
} from "./connection";
|
} from "./connection";
|
||||||
|
|
||||||
|
export { Session } from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
ExecutableQuery,
|
ExecutableQuery,
|
||||||
Query,
|
Query,
|
||||||
@@ -131,6 +134,7 @@ export { IntoSql, packBits } from "./util";
|
|||||||
export async function connect(
|
export async function connect(
|
||||||
uri: string,
|
uri: string,
|
||||||
options?: Partial<ConnectionOptions>,
|
options?: Partial<ConnectionOptions>,
|
||||||
|
session?: Session,
|
||||||
): Promise<Connection>;
|
): Promise<Connection>;
|
||||||
/**
|
/**
|
||||||
* Connect to a LanceDB instance at the given URI.
|
* Connect to a LanceDB instance at the given URI.
|
||||||
@@ -149,31 +153,43 @@ export async function connect(
|
|||||||
* storageOptions: {timeout: "60s"}
|
* storageOptions: {timeout: "60s"}
|
||||||
* });
|
* });
|
||||||
* ```
|
* ```
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const session = Session.default();
|
||||||
|
* const conn = await connect({
|
||||||
|
* uri: "/path/to/database",
|
||||||
|
* session: session
|
||||||
|
* });
|
||||||
|
* ```
|
||||||
*/
|
*/
|
||||||
export async function connect(
|
export async function connect(
|
||||||
options: Partial<ConnectionOptions> & { uri: string },
|
options: Partial<ConnectionOptions> & { uri: string },
|
||||||
): Promise<Connection>;
|
): Promise<Connection>;
|
||||||
export async function connect(
|
export async function connect(
|
||||||
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
|
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
|
||||||
options: Partial<ConnectionOptions> = {},
|
options?: Partial<ConnectionOptions>,
|
||||||
): Promise<Connection> {
|
): Promise<Connection> {
|
||||||
let uri: string | undefined;
|
let uri: string | undefined;
|
||||||
|
let finalOptions: Partial<ConnectionOptions> = {};
|
||||||
|
|
||||||
if (typeof uriOrOptions !== "string") {
|
if (typeof uriOrOptions !== "string") {
|
||||||
const { uri: uri_, ...opts } = uriOrOptions;
|
const { uri: uri_, ...opts } = uriOrOptions;
|
||||||
uri = uri_;
|
uri = uri_;
|
||||||
options = opts;
|
finalOptions = opts;
|
||||||
} else {
|
} else {
|
||||||
uri = uriOrOptions;
|
uri = uriOrOptions;
|
||||||
|
finalOptions = options || {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!uri) {
|
if (!uri) {
|
||||||
throw new Error("uri is required");
|
throw new Error("uri is required");
|
||||||
}
|
}
|
||||||
|
|
||||||
options = (options as ConnectionOptions) ?? {};
|
finalOptions = (finalOptions as ConnectionOptions) ?? {};
|
||||||
(<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
|
(<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
|
||||||
(<ConnectionOptions>options).storageOptions,
|
(<ConnectionOptions>finalOptions).storageOptions,
|
||||||
);
|
);
|
||||||
const nativeConn = await LanceDbConnection.new(uri, options);
|
const nativeConn = await LanceDbConnection.new(uri, finalOptions);
|
||||||
return new LocalConnection(nativeConn);
|
return new LocalConnection(nativeConn);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.21.2-beta.0",
|
"version": "0.21.2-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -74,6 +74,10 @@ impl Connection {
|
|||||||
builder = builder.host_override(&host_override);
|
builder = builder.host_override(&host_override);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(session) = options.session {
|
||||||
|
builder = builder.session(session.inner.clone());
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ pub mod merge;
|
|||||||
mod query;
|
mod query;
|
||||||
pub mod remote;
|
pub mod remote;
|
||||||
mod rerankers;
|
mod rerankers;
|
||||||
|
mod session;
|
||||||
mod table;
|
mod table;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
||||||
@@ -34,6 +35,9 @@ pub struct ConnectionOptions {
|
|||||||
///
|
///
|
||||||
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||||
pub storage_options: Option<HashMap<String, String>>,
|
pub storage_options: Option<HashMap<String, String>>,
|
||||||
|
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||||
|
/// shared caches and other session-specific state.
|
||||||
|
pub session: Option<session::Session>,
|
||||||
|
|
||||||
/// (For LanceDB cloud only): configuration for the remote HTTP client.
|
/// (For LanceDB cloud only): configuration for the remote HTTP client.
|
||||||
pub client_config: Option<remote::ClientConfig>,
|
pub client_config: Option<remote::ClientConfig>,
|
||||||
|
|||||||
102
nodejs/src/session.rs
Normal file
102
nodejs/src/session.rs
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
|
use napi::bindgen_prelude::*;
|
||||||
|
use napi_derive::*;
|
||||||
|
|
||||||
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
|
///
|
||||||
|
/// Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
/// which can significantly impact memory use and performance. They can
|
||||||
|
/// also be re-used across multiple connections to share the same cache state.
|
||||||
|
#[napi]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Session {
|
||||||
|
pub(crate) inner: Arc<LanceSession>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Session {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Session")
|
||||||
|
.field("size_bytes", &self.inner.size_bytes())
|
||||||
|
.field("approx_num_items", &self.inner.approx_num_items())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
impl Session {
|
||||||
|
/// Create a new session with custom cache sizes.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// - `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||||
|
/// Index data is stored in memory in this cache to speed up queries.
|
||||||
|
/// Defaults to 6GB if not specified.
|
||||||
|
/// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||||
|
/// The metadata cache stores file metadata and schema information in memory.
|
||||||
|
/// This cache improves scan and write performance.
|
||||||
|
/// Defaults to 1GB if not specified.
|
||||||
|
#[napi(constructor)]
|
||||||
|
pub fn new(
|
||||||
|
index_cache_size_bytes: Option<BigInt>,
|
||||||
|
metadata_cache_size_bytes: Option<BigInt>,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
let index_cache_size = index_cache_size_bytes
|
||||||
|
.map(|size| size.get_u64().1 as usize)
|
||||||
|
.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
|
||||||
|
|
||||||
|
let metadata_cache_size = metadata_cache_size_bytes
|
||||||
|
.map(|size| size.get_u64().1 as usize)
|
||||||
|
.unwrap_or(1024 * 1024 * 1024); // 1GB default
|
||||||
|
|
||||||
|
let session = LanceSession::new(
|
||||||
|
index_cache_size,
|
||||||
|
metadata_cache_size,
|
||||||
|
Arc::new(ObjectStoreRegistry::default()),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inner: Arc::new(session),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a session with default cache sizes.
|
||||||
|
///
|
||||||
|
/// This is equivalent to creating a session with 6GB index cache
|
||||||
|
/// and 1GB metadata cache.
|
||||||
|
#[napi(factory)]
|
||||||
|
pub fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(LanceSession::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the current size of the session caches in bytes.
|
||||||
|
#[napi]
|
||||||
|
pub fn size_bytes(&self) -> BigInt {
|
||||||
|
BigInt::from(self.inner.size_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the approximate number of items cached in the session.
|
||||||
|
#[napi]
|
||||||
|
pub fn approx_num_items(&self) -> u32 {
|
||||||
|
self.inner.approx_num_items() as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implement FromNapiValue for Session to work with napi(object)
|
||||||
|
impl napi::bindgen_prelude::FromNapiValue for Session {
|
||||||
|
unsafe fn from_napi_value(
|
||||||
|
env: napi::sys::napi_env,
|
||||||
|
napi_val: napi::sys::napi_value,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
let object: napi::bindgen_prelude::ClassInstance<Session> =
|
||||||
|
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
||||||
|
let copy = object.clone();
|
||||||
|
Ok(copy)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.24.2-beta.1"
|
current_version = "0.24.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
19
python/CLAUDE.md
Normal file
19
python/CLAUDE.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
These are the Python bindings of LanceDB.
|
||||||
|
The core Rust library is in the `../rust/lancedb` directory, the rust binding
|
||||||
|
code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
|
||||||
|
* Build: `make develop`
|
||||||
|
* Format: `make format`
|
||||||
|
* Lint: `make check`
|
||||||
|
* Fix lints: `make fix`
|
||||||
|
* Test: `make test`
|
||||||
|
* Doc test: `make doctest`
|
||||||
|
|
||||||
|
Before committing changes, run lints and then formatting.
|
||||||
|
|
||||||
|
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
|
||||||
|
|
||||||
|
When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
|
||||||
|
with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.24.2-beta.1"
|
version = "0.24.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -85,8 +85,8 @@ embeddings = [
|
|||||||
"boto3>=1.28.57",
|
"boto3>=1.28.57",
|
||||||
"awscli>=1.29.57",
|
"awscli>=1.29.57",
|
||||||
"botocore>=1.31.57",
|
"botocore>=1.31.57",
|
||||||
|
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
|
||||||
"ollama>=0.3.0",
|
"ollama>=0.3.0",
|
||||||
"ibm-watsonx-ai>=1.1.2",
|
|
||||||
]
|
]
|
||||||
azure = ["adlfs>=2024.2.0"]
|
azure = ["adlfs>=2024.2.0"]
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from .remote import ClientConfig
|
|||||||
from .remote.db import RemoteDBConnection
|
from .remote.db import RemoteDBConnection
|
||||||
from .schema import vector
|
from .schema import vector
|
||||||
from .table import AsyncTable
|
from .table import AsyncTable
|
||||||
|
from ._lancedb import Session
|
||||||
|
|
||||||
|
|
||||||
def connect(
|
def connect(
|
||||||
@@ -30,6 +31,7 @@ def connect(
|
|||||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DBConnection:
|
) -> DBConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
@@ -64,6 +66,12 @@ def connect(
|
|||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
Additional options for the storage backend. See available options at
|
Additional options for the storage backend. See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
|
session: Session, optional
|
||||||
|
(For LanceDB OSS only)
|
||||||
|
A session to use for this connection. Sessions allow you to configure
|
||||||
|
cache sizes for index and metadata caches, which can significantly
|
||||||
|
impact memory use and performance. They can also be re-used across
|
||||||
|
multiple connections to share the same cache state.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -92,7 +100,7 @@ def connect(
|
|||||||
if api_key is None:
|
if api_key is None:
|
||||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
|
raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}")
|
||||||
if isinstance(request_thread_pool, int):
|
if isinstance(request_thread_pool, int):
|
||||||
request_thread_pool = ThreadPoolExecutor(request_thread_pool)
|
request_thread_pool = ThreadPoolExecutor(request_thread_pool)
|
||||||
return RemoteDBConnection(
|
return RemoteDBConnection(
|
||||||
@@ -113,6 +121,7 @@ def connect(
|
|||||||
uri,
|
uri,
|
||||||
read_consistency_interval=read_consistency_interval,
|
read_consistency_interval=read_consistency_interval,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
|
session=session,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -125,6 +134,7 @@ async def connect_async(
|
|||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
) -> AsyncConnection:
|
) -> AsyncConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
|
|
||||||
@@ -158,6 +168,12 @@ async def connect_async(
|
|||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
Additional options for the storage backend. See available options at
|
Additional options for the storage backend. See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
|
session: Session, optional
|
||||||
|
(For LanceDB OSS only)
|
||||||
|
A session to use for this connection. Sessions allow you to configure
|
||||||
|
cache sizes for index and metadata caches, which can significantly
|
||||||
|
impact memory use and performance. They can also be re-used across
|
||||||
|
multiple connections to share the same cache state.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -197,6 +213,7 @@ async def connect_async(
|
|||||||
read_consistency_interval_secs,
|
read_consistency_interval_secs,
|
||||||
client_config,
|
client_config,
|
||||||
storage_options,
|
storage_options,
|
||||||
|
session,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -212,6 +229,7 @@ __all__ = [
|
|||||||
"DBConnection",
|
"DBConnection",
|
||||||
"LanceDBConnection",
|
"LanceDBConnection",
|
||||||
"RemoteDBConnection",
|
"RemoteDBConnection",
|
||||||
|
"Session",
|
||||||
"__version__",
|
"__version__",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,19 @@ import pyarrow as pa
|
|||||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||||
from .remote import ClientConfig
|
from .remote import ClientConfig
|
||||||
|
|
||||||
|
class Session:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
index_cache_size_bytes: Optional[int] = None,
|
||||||
|
metadata_cache_size_bytes: Optional[int] = None,
|
||||||
|
): ...
|
||||||
|
@staticmethod
|
||||||
|
def default() -> "Session": ...
|
||||||
|
@property
|
||||||
|
def size_bytes(self) -> int: ...
|
||||||
|
@property
|
||||||
|
def approx_num_items(self) -> int: ...
|
||||||
|
|
||||||
class Connection(object):
|
class Connection(object):
|
||||||
uri: str
|
uri: str
|
||||||
async def table_names(
|
async def table_names(
|
||||||
@@ -89,6 +102,7 @@ async def connect(
|
|||||||
read_consistency_interval: Optional[float],
|
read_consistency_interval: Optional[float],
|
||||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||||
storage_options: Optional[Dict[str, str]],
|
storage_options: Optional[Dict[str, str]],
|
||||||
|
session: Optional[Session],
|
||||||
) -> Connection: ...
|
) -> Connection: ...
|
||||||
|
|
||||||
class RecordBatchStream:
|
class RecordBatchStream:
|
||||||
|
|||||||
@@ -94,9 +94,9 @@ def data_to_reader(
|
|||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Unknown data type {type(data)}. "
|
f"Unknown data type {type(data)}. "
|
||||||
"Please check "
|
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||||
"https://lancedb.github.io/lance/read_and_write.html "
|
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||||
"to see supported types."
|
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ if TYPE_CHECKING:
|
|||||||
from ._lancedb import Connection as LanceDbConnection
|
from ._lancedb import Connection as LanceDbConnection
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
|
from ._lancedb import Session
|
||||||
|
|
||||||
|
|
||||||
class DBConnection(EnforceOverrides):
|
class DBConnection(EnforceOverrides):
|
||||||
@@ -247,6 +248,9 @@ class DBConnection(EnforceOverrides):
|
|||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
|
**Deprecated**: Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
@@ -354,6 +358,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
*,
|
*,
|
||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
):
|
):
|
||||||
if not isinstance(uri, Path):
|
if not isinstance(uri, Path):
|
||||||
scheme = get_uri_scheme(uri)
|
scheme = get_uri_scheme(uri)
|
||||||
@@ -367,6 +372,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
self._entered = False
|
self._entered = False
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
self.storage_options = storage_options
|
self.storage_options = storage_options
|
||||||
|
self.session = session
|
||||||
|
|
||||||
if read_consistency_interval is not None:
|
if read_consistency_interval is not None:
|
||||||
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
||||||
@@ -382,6 +388,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
read_consistency_interval_secs,
|
read_consistency_interval_secs,
|
||||||
None,
|
None,
|
||||||
storage_options,
|
storage_options,
|
||||||
|
session,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||||
@@ -475,6 +482,17 @@ class LanceDBConnection(DBConnection):
|
|||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
|
if index_cache_size is not None:
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"index_cache_size is deprecated. Use session-level cache "
|
||||||
|
"configuration instead. Create a Session with custom cache sizes "
|
||||||
|
"and pass it to lancedb.connect().",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
self,
|
self,
|
||||||
name,
|
name,
|
||||||
@@ -820,6 +838,9 @@ class AsyncConnection(object):
|
|||||||
See available options at
|
See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
|
**Deprecated**: Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
|
|||||||
from .ollama import OllamaEmbeddings
|
from .ollama import OllamaEmbeddings
|
||||||
from .open_clip import OpenClipEmbeddings
|
from .open_clip import OpenClipEmbeddings
|
||||||
from .openai import OpenAIEmbeddings
|
from .openai import OpenAIEmbeddings
|
||||||
from .registry import EmbeddingFunctionRegistry, get_registry
|
from .registry import EmbeddingFunctionRegistry, get_registry, register
|
||||||
from .sentence_transformers import SentenceTransformerEmbeddings
|
from .sentence_transformers import SentenceTransformerEmbeddings
|
||||||
from .gte import GteEmbeddings
|
from .gte import GteEmbeddings
|
||||||
from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
|
from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
|
||||||
|
|||||||
@@ -9,11 +9,14 @@ from huggingface_hub import snapshot_download
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from transformers import BertTokenizer
|
from transformers import BertTokenizer
|
||||||
|
|
||||||
|
from .utils import create_import_stub
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import mlx.core as mx
|
import mlx.core as mx
|
||||||
import mlx.nn as nn
|
import mlx.nn as nn
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError("You need to install MLX to use this model use - pip install mlx")
|
mx = create_import_stub("mlx.core", "mlx")
|
||||||
|
nn = create_import_stub("mlx.nn", "mlx")
|
||||||
|
|
||||||
|
|
||||||
def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
|
def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
|
||||||
@@ -72,7 +75,7 @@ class TransformerEncoder(nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.layers = [
|
self.layers = [
|
||||||
TransformerEncoderLayer(dims, num_heads, mlp_dims)
|
TransformerEncoderLayer(dims, num_heads, mlp_dims)
|
||||||
for i in range(num_layers)
|
for _ in range(num_layers)
|
||||||
]
|
]
|
||||||
|
|
||||||
def __call__(self, x, mask):
|
def __call__(self, x, mask):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional, Type
|
||||||
|
|
||||||
from .base import EmbeddingFunction, EmbeddingFunctionConfig
|
from .base import EmbeddingFunction, EmbeddingFunctionConfig
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
|
|||||||
self._functions = {}
|
self._functions = {}
|
||||||
self._variables = {}
|
self._variables = {}
|
||||||
|
|
||||||
def register(self, alias: str = None):
|
def register(self, alias: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
This creates a decorator that can be used to register
|
This creates a decorator that can be used to register
|
||||||
an EmbeddingFunction.
|
an EmbeddingFunction.
|
||||||
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
|
|||||||
"""
|
"""
|
||||||
self._functions = {}
|
self._functions = {}
|
||||||
|
|
||||||
def get(self, name: str):
|
def get(self, name: str) -> Type[EmbeddingFunction]:
|
||||||
"""
|
"""
|
||||||
Fetch an embedding function class by name
|
Fetch an embedding function class by name
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,36 @@ from ..dependencies import pandas as pd
|
|||||||
from ..util import attempt_import_or_raise
|
from ..util import attempt_import_or_raise
|
||||||
|
|
||||||
|
|
||||||
|
def create_import_stub(module_name: str, package_name: str = None):
|
||||||
|
"""
|
||||||
|
Create a stub module that allows class definition but fails when used.
|
||||||
|
This allows modules to be imported for doctest collection even when
|
||||||
|
optional dependencies are not available.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
module_name : str
|
||||||
|
The name of the module to create a stub for
|
||||||
|
package_name : str, optional
|
||||||
|
The package name to suggest in the error message
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
object
|
||||||
|
A stub object that can be used in place of the module
|
||||||
|
"""
|
||||||
|
|
||||||
|
class _ImportStub:
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return _ImportStub # Return stub for chained access like nn.Module
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
pkg = package_name or module_name
|
||||||
|
raise ImportError(f"You need to install {pkg} to use this functionality")
|
||||||
|
|
||||||
|
return _ImportStub()
|
||||||
|
|
||||||
|
|
||||||
# ruff: noqa: PERF203
|
# ruff: noqa: PERF203
|
||||||
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
|
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
|
||||||
def wrapper(fn):
|
def wrapper(fn):
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from typing import (
|
|||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Tuple,
|
Tuple,
|
||||||
Type,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
Any,
|
Any,
|
||||||
)
|
)
|
||||||
@@ -58,6 +58,8 @@ if TYPE_CHECKING:
|
|||||||
else:
|
else:
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
T = TypeVar("T", bound="LanceModel")
|
||||||
|
|
||||||
|
|
||||||
# Pydantic validation function for vector queries
|
# Pydantic validation function for vector queries
|
||||||
def ensure_vector_query(
|
def ensure_vector_query(
|
||||||
@@ -746,8 +748,8 @@ class LanceQueryBuilder(ABC):
|
|||||||
return self.to_arrow(timeout=timeout).to_pylist()
|
return self.to_arrow(timeout=timeout).to_pylist()
|
||||||
|
|
||||||
def to_pydantic(
|
def to_pydantic(
|
||||||
self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
|
self, model: type[T], *, timeout: Optional[timedelta] = None
|
||||||
) -> List[LanceModel]:
|
) -> list[T]:
|
||||||
"""Return the table as a list of pydantic models.
|
"""Return the table as a list of pydantic models.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -906,11 +908,11 @@ class LanceQueryBuilder(ABC):
|
|||||||
>>> plan = table.search(query).explain_plan(True)
|
>>> plan = table.search(query).explain_plan(True)
|
||||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
GlobalLimitExec: skip=0, fetch=10
|
GlobalLimitExec: skip=0, fetch=10
|
||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -940,19 +942,19 @@ class LanceQueryBuilder(ABC):
|
|||||||
>>> plan = table.search(query).analyze_plan()
|
>>> plan = table.search(query).analyze_plan()
|
||||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
AnalyzeExec verbose=true, metrics=[]
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
ProjectionExec: expr=[...], metrics=[...]
|
TracedExec, metrics=[]
|
||||||
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
|
ProjectionExec: expr=[...], metrics=[...]
|
||||||
FilterExec: _distance@2 IS NOT NULL,
|
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
|
||||||
metrics=[output_rows=..., elapsed_compute=...]
|
FilterExec: _distance@2 IS NOT NULL,
|
||||||
SortExec: TopK(fetch=10), expr=[...],
|
metrics=[output_rows=..., elapsed_compute=...]
|
||||||
preserve_partitioning=[...],
|
SortExec: TopK(fetch=10), expr=[...],
|
||||||
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
|
preserve_partitioning=[...],
|
||||||
KNNVectorDistance: metric=l2,
|
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
|
||||||
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
|
KNNVectorDistance: metric=l2,
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true,
|
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
|
||||||
row_addr=false, ordered=false,
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
metrics=[output_rows=..., elapsed_compute=...,
|
metrics=[output_rows=..., elapsed_compute=...,
|
||||||
bytes_read=..., iops=..., requests=...]
|
bytes_read=..., iops=..., requests=...]
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -2043,7 +2045,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -2429,7 +2431,7 @@ class AsyncQueryBase(object):
|
|||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -3054,7 +3056,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
<BLANKLINE>
|
<BLANKLINE>
|
||||||
FTS Search Plan:
|
FTS Search Plan:
|
||||||
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
|
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
|
||||||
|
|||||||
@@ -102,7 +102,9 @@ if TYPE_CHECKING:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
|
def _into_pyarrow_reader(
|
||||||
|
data, schema: Optional[pa.Schema] = None
|
||||||
|
) -> pa.RecordBatchReader:
|
||||||
from lancedb.dependencies import datasets
|
from lancedb.dependencies import datasets
|
||||||
|
|
||||||
if _check_for_hugging_face(data):
|
if _check_for_hugging_face(data):
|
||||||
@@ -123,6 +125,12 @@ def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
|
|||||||
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
|
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
|
||||||
|
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
|
# Handle empty list case
|
||||||
|
if not data:
|
||||||
|
if schema is None:
|
||||||
|
raise ValueError("Cannot create table from empty list without a schema")
|
||||||
|
return pa.Table.from_pylist(data, schema=schema).to_reader()
|
||||||
|
|
||||||
# convert to list of dict if data is a bunch of LanceModels
|
# convert to list of dict if data is a bunch of LanceModels
|
||||||
if isinstance(data[0], LanceModel):
|
if isinstance(data[0], LanceModel):
|
||||||
schema = data[0].__class__.to_arrow_schema()
|
schema = data[0].__class__.to_arrow_schema()
|
||||||
@@ -165,9 +173,9 @@ def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
|
|||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Unknown data type {type(data)}. "
|
f"Unknown data type {type(data)}. "
|
||||||
"Please check "
|
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||||
"https://lancedb.github.io/lancedb/python/python/ "
|
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||||
"to see supported types."
|
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -236,7 +244,7 @@ def _sanitize_data(
|
|||||||
# 1. There might be embedding columns missing that will be added
|
# 1. There might be embedding columns missing that will be added
|
||||||
# in the add_embeddings step.
|
# in the add_embeddings step.
|
||||||
# 2. If `allow_subschemas` is True, there might be columns missing.
|
# 2. If `allow_subschemas` is True, there might be columns missing.
|
||||||
reader = _into_pyarrow_reader(data)
|
reader = _into_pyarrow_reader(data, target_schema)
|
||||||
|
|
||||||
reader = _append_vector_columns(reader, target_schema, metadata=metadata)
|
reader = _append_vector_columns(reader, target_schema, metadata=metadata)
|
||||||
|
|
||||||
|
|||||||
@@ -33,8 +33,11 @@ tantivy = pytest.importorskip("tantivy")
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def table(tmp_path) -> ldb.table.LanceTable:
|
def table(tmp_path) -> ldb.table.LanceTable:
|
||||||
|
# Use local random state to avoid affecting other tests
|
||||||
|
rng = np.random.RandomState(42)
|
||||||
|
local_random = random.Random(42)
|
||||||
db = ldb.connect(tmp_path)
|
db = ldb.connect(tmp_path)
|
||||||
vectors = [np.random.randn(128) for _ in range(100)]
|
vectors = [rng.randn(128) for _ in range(100)]
|
||||||
|
|
||||||
text_nouns = ("puppy", "car")
|
text_nouns = ("puppy", "car")
|
||||||
text2_nouns = ("rabbit", "girl", "monkey")
|
text2_nouns = ("rabbit", "girl", "monkey")
|
||||||
@@ -44,10 +47,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
text = [
|
text = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text_nouns[random.randrange(0, len(text_nouns))],
|
text_nouns[local_random.randrange(0, len(text_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
@@ -55,15 +58,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
text2 = [
|
text2 = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text2_nouns[random.randrange(0, len(text2_nouns))],
|
text2_nouns[local_random.randrange(0, len(text2_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
]
|
]
|
||||||
count = [random.randint(1, 10000) for _ in range(100)]
|
count = [local_random.randint(1, 10000) for _ in range(100)]
|
||||||
table = db.create_table(
|
table = db.create_table(
|
||||||
"test",
|
"test",
|
||||||
data=pd.DataFrame(
|
data=pd.DataFrame(
|
||||||
@@ -82,8 +85,11 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
||||||
|
# Use local random state to avoid affecting other tests
|
||||||
|
rng = np.random.RandomState(42)
|
||||||
|
local_random = random.Random(42)
|
||||||
db = await ldb.connect_async(tmp_path)
|
db = await ldb.connect_async(tmp_path)
|
||||||
vectors = [np.random.randn(128) for _ in range(100)]
|
vectors = [rng.randn(128) for _ in range(100)]
|
||||||
|
|
||||||
text_nouns = ("puppy", "car")
|
text_nouns = ("puppy", "car")
|
||||||
text2_nouns = ("rabbit", "girl", "monkey")
|
text2_nouns = ("rabbit", "girl", "monkey")
|
||||||
@@ -93,10 +99,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
|||||||
text = [
|
text = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text_nouns[random.randrange(0, len(text_nouns))],
|
text_nouns[local_random.randrange(0, len(text_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
@@ -104,15 +110,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
|||||||
text2 = [
|
text2 = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text2_nouns[random.randrange(0, len(text2_nouns))],
|
text2_nouns[local_random.randrange(0, len(text2_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
]
|
]
|
||||||
count = [random.randint(1, 10000) for _ in range(100)]
|
count = [local_random.randint(1, 10000) for _ in range(100)]
|
||||||
table = await db.create_table(
|
table = await db.create_table(
|
||||||
"test",
|
"test",
|
||||||
data=pd.DataFrame(
|
data=pd.DataFrame(
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ async def test_explain_plan(table: AsyncTable):
|
|||||||
assert "Vector Search Plan" in plan
|
assert "Vector Search Plan" in plan
|
||||||
assert "KNNVectorDistance" in plan
|
assert "KNNVectorDistance" in plan
|
||||||
assert "FTS Search Plan" in plan
|
assert "FTS Search Plan" in plan
|
||||||
assert "LanceScan" in plan
|
assert "LanceRead" in plan
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|||||||
@@ -839,7 +839,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
|
|||||||
table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
|
table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
|
||||||
)
|
)
|
||||||
assert "KNN" in plan_with_filter
|
assert "KNN" in plan_with_filter
|
||||||
assert "FilterExec" in plan_with_filter
|
assert "LanceRead" in plan_with_filter
|
||||||
|
|
||||||
# Test FTS query with filter
|
# Test FTS query with filter
|
||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
@@ -850,7 +850,8 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
|
|||||||
)
|
)
|
||||||
plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
|
plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
|
||||||
assert "MatchQuery: query=dog" in plan_fts_filter
|
assert "MatchQuery: query=dog" in plan_fts_filter
|
||||||
assert "FilterExec: id@" in plan_fts_filter # Should show filter details
|
assert "LanceRead" in plan_fts_filter
|
||||||
|
assert "full_filter=id = Int64(1)" in plan_fts_filter # Should show filter details
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -1338,3 +1339,20 @@ async def test_query_timeout_async(tmp_path):
|
|||||||
.nearest_to([0.0, 0.0])
|
.nearest_to([0.0, 0.0])
|
||||||
.to_list(timeout=timedelta(0))
|
.to_list(timeout=timedelta(0))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_empty_table(mem_db):
|
||||||
|
"""Test searching on empty table should not crash
|
||||||
|
|
||||||
|
Regression test for issue #303:
|
||||||
|
https://github.com/lancedb/lancedb/issues/303
|
||||||
|
Searching on empty table produces scary error message
|
||||||
|
"""
|
||||||
|
schema = pa.schema(
|
||||||
|
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
|
||||||
|
)
|
||||||
|
table = mem_db.create_table("test_empty_search", schema=schema)
|
||||||
|
|
||||||
|
# Search on empty table should return empty results, not crash
|
||||||
|
results = table.search([1.0, 2.0]).limit(5).to_list()
|
||||||
|
assert results == []
|
||||||
|
|||||||
38
python/python/tests/test_session.py
Normal file
38
python/python/tests/test_session.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_cache_configuration(tmp_path):
|
||||||
|
"""Test Session cache configuration and basic functionality."""
|
||||||
|
# Create session with small cache limits for testing
|
||||||
|
index_cache_size = 1024 * 1024 # 1MB
|
||||||
|
metadata_cache_size = 512 * 1024 # 512KB
|
||||||
|
|
||||||
|
session = lancedb.Session(
|
||||||
|
index_cache_size_bytes=index_cache_size,
|
||||||
|
metadata_cache_size_bytes=metadata_cache_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Record initial cache state
|
||||||
|
initial_cache_size = session.size_bytes
|
||||||
|
initial_cache_items = session.approx_num_items
|
||||||
|
|
||||||
|
# Test session works with database connection
|
||||||
|
db = lancedb.connect(tmp_path, session=session)
|
||||||
|
|
||||||
|
# Create and use a table to exercise the session
|
||||||
|
data = [{"id": i, "text": f"item {i}"} for i in range(100)]
|
||||||
|
table = db.create_table("test", data)
|
||||||
|
results = list(table.to_arrow().to_pylist())
|
||||||
|
|
||||||
|
assert len(results) == 100
|
||||||
|
|
||||||
|
# Verify cache usage increased after operations
|
||||||
|
final_cache_size = session.size_bytes
|
||||||
|
final_cache_items = session.approx_num_items
|
||||||
|
|
||||||
|
assert final_cache_size > initial_cache_size # Cache should have grown
|
||||||
|
assert final_cache_items >= initial_cache_items # Items should not decrease
|
||||||
|
assert initial_cache_size < index_cache_size + metadata_cache_size
|
||||||
@@ -1804,3 +1804,45 @@ def test_stats(mem_db: DBConnection):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_table_empty_list_with_schema(mem_db: DBConnection):
|
||||||
|
"""Test creating table with empty list data and schema
|
||||||
|
|
||||||
|
Regression test for IndexError: list index out of range
|
||||||
|
when calling create_table(name, data=[], schema=schema)
|
||||||
|
"""
|
||||||
|
schema = pa.schema(
|
||||||
|
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
|
||||||
|
)
|
||||||
|
table = mem_db.create_table("test_empty_list", data=[], schema=schema)
|
||||||
|
assert table.count_rows() == 0
|
||||||
|
assert table.schema == schema
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_table_empty_list_no_schema_error(mem_db: DBConnection):
|
||||||
|
"""Test that creating table with empty list and no schema raises error"""
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError, match="Cannot create table from empty list without a schema"
|
||||||
|
):
|
||||||
|
mem_db.create_table("test_empty_no_schema", data=[])
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_table_with_empty_embeddings(tmp_path):
|
||||||
|
"""Test exact scenario from issue #1968
|
||||||
|
|
||||||
|
Regression test for issue #1968:
|
||||||
|
https://github.com/lancedb/lancedb/issues/1968
|
||||||
|
"""
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
|
class MySchema(LanceModel):
|
||||||
|
text: str
|
||||||
|
embedding: Vector(16)
|
||||||
|
|
||||||
|
table = db.create_table("test", schema=MySchema)
|
||||||
|
table.add(
|
||||||
|
[{"text": "bar", "embedding": [0.1] * 16}],
|
||||||
|
on_bad_vectors="drop",
|
||||||
|
)
|
||||||
|
assert table.count_rows() == 1
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
|
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn connect(
|
pub fn connect(
|
||||||
py: Python,
|
py: Python,
|
||||||
@@ -190,6 +190,7 @@ pub fn connect(
|
|||||||
read_consistency_interval: Option<f64>,
|
read_consistency_interval: Option<f64>,
|
||||||
client_config: Option<PyClientConfig>,
|
client_config: Option<PyClientConfig>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
|
session: Option<crate::session::Session>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
let mut builder = lancedb::connect(&uri);
|
let mut builder = lancedb::connect(&uri);
|
||||||
@@ -213,6 +214,9 @@ pub fn connect(
|
|||||||
if let Some(client_config) = client_config {
|
if let Some(client_config) = client_config {
|
||||||
builder = builder.client_config(client_config.into());
|
builder = builder.client_config(client_config.into());
|
||||||
}
|
}
|
||||||
|
if let Some(session) = session {
|
||||||
|
builder = builder.session(session.inner.clone());
|
||||||
|
}
|
||||||
Ok(Connection::new(builder.execute().await.infer_error()?))
|
Ok(Connection::new(builder.execute().await.infer_error()?))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use pyo3::{
|
|||||||
wrap_pyfunction, Bound, PyResult, Python,
|
wrap_pyfunction, Bound, PyResult, Python,
|
||||||
};
|
};
|
||||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||||
|
use session::Session;
|
||||||
use table::{
|
use table::{
|
||||||
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
||||||
Table, UpdateResult,
|
Table, UpdateResult,
|
||||||
@@ -21,6 +22,7 @@ pub mod connection;
|
|||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
|
pub mod session;
|
||||||
pub mod table;
|
pub mod table;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
@@ -31,6 +33,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|||||||
.write_style("LANCEDB_LOG_STYLE");
|
.write_style("LANCEDB_LOG_STYLE");
|
||||||
env_logger::init_from_env(env);
|
env_logger::init_from_env(env);
|
||||||
m.add_class::<Connection>()?;
|
m.add_class::<Connection>()?;
|
||||||
|
m.add_class::<Session>()?;
|
||||||
m.add_class::<Table>()?;
|
m.add_class::<Table>()?;
|
||||||
m.add_class::<IndexConfig>()?;
|
m.add_class::<IndexConfig>()?;
|
||||||
m.add_class::<Query>()?;
|
m.add_class::<Query>()?;
|
||||||
|
|||||||
107
python/src/session.rs
Normal file
107
python/src/session.rs
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
|
use pyo3::{pyclass, pymethods, PyResult};
|
||||||
|
|
||||||
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
|
///
|
||||||
|
/// Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
/// which can significantly impact memory use and performance. They can
|
||||||
|
/// also be re-used across multiple connections to share the same cache state.
|
||||||
|
#[pyclass]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Session {
|
||||||
|
pub(crate) inner: Arc<LanceSession>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Session {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(LanceSession::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Session {
|
||||||
|
/// Create a new session with custom cache sizes.
|
||||||
|
///
|
||||||
|
/// Parameters
|
||||||
|
/// ----------
|
||||||
|
/// index_cache_size_bytes : int, optional
|
||||||
|
/// The size of the index cache in bytes.
|
||||||
|
/// Index data is stored in memory in this cache to speed up queries.
|
||||||
|
/// Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
|
||||||
|
/// metadata_cache_size_bytes : int, optional
|
||||||
|
/// The size of the metadata cache in bytes.
|
||||||
|
/// The metadata cache stores file metadata and schema information in memory.
|
||||||
|
/// This cache improves scan and write performance.
|
||||||
|
/// Default: 1GB (1024 * 1024 * 1024 bytes)
|
||||||
|
#[new]
|
||||||
|
#[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
|
||||||
|
pub fn new(
|
||||||
|
index_cache_size_bytes: Option<usize>,
|
||||||
|
metadata_cache_size_bytes: Option<usize>,
|
||||||
|
) -> PyResult<Self> {
|
||||||
|
let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
|
||||||
|
let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
|
||||||
|
|
||||||
|
let session = LanceSession::new(
|
||||||
|
index_cache_size,
|
||||||
|
metadata_cache_size,
|
||||||
|
Arc::new(ObjectStoreRegistry::default()),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inner: Arc::new(session),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a session with default cache sizes.
|
||||||
|
///
|
||||||
|
/// This is equivalent to creating a session with 6GB index cache
|
||||||
|
/// and 1GB metadata cache.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// Session
|
||||||
|
/// A new Session with default cache sizes
|
||||||
|
#[staticmethod]
|
||||||
|
#[allow(clippy::should_implement_trait)]
|
||||||
|
pub fn default() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the current size of the session caches in bytes.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// int
|
||||||
|
/// The total size of all caches in the session
|
||||||
|
#[getter]
|
||||||
|
pub fn size_bytes(&self) -> u64 {
|
||||||
|
self.inner.size_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the approximate number of items cached in the session.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// int
|
||||||
|
/// The number of cached items across all caches
|
||||||
|
#[getter]
|
||||||
|
pub fn approx_num_items(&self) -> usize {
|
||||||
|
self.inner.approx_num_items()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> String {
|
||||||
|
format!(
|
||||||
|
"Session(size_bytes={}, approx_num_items={})",
|
||||||
|
self.size_bytes(),
|
||||||
|
self.approx_num_items()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.21.2-beta.0"
|
version = "0.21.2-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -678,7 +678,8 @@ impl Database for ListingDatabase {
|
|||||||
let mut read_params = request.lance_read_params.unwrap_or_else(|| {
|
let mut read_params = request.lance_read_params.unwrap_or_else(|| {
|
||||||
let mut default_params = ReadParams::default();
|
let mut default_params = ReadParams::default();
|
||||||
if let Some(index_cache_size) = request.index_cache_size {
|
if let Some(index_cache_size) = request.index_cache_size {
|
||||||
default_params.index_cache_size = index_cache_size as usize;
|
#[allow(deprecated)]
|
||||||
|
default_params.index_cache_size(index_cache_size as usize);
|
||||||
}
|
}
|
||||||
default_params
|
default_params
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -290,3 +290,7 @@ impl Display for DistanceType {
|
|||||||
|
|
||||||
/// Connect to a database
|
/// Connect to a database
|
||||||
pub use connection::connect;
|
pub use connection::connect;
|
||||||
|
|
||||||
|
/// Re-export Lance Session and ObjectStoreRegistry for custom session creation
|
||||||
|
pub use lance::session::Session;
|
||||||
|
pub use lance_io::object_store::ObjectStoreRegistry;
|
||||||
|
|||||||
@@ -486,11 +486,9 @@ pub mod tests {
|
|||||||
TestFixture::check_plan(
|
TestFixture::check_plan(
|
||||||
plan,
|
plan,
|
||||||
"MetadataEraserExec
|
"MetadataEraserExec
|
||||||
CoalesceBatchesExec:...
|
|
||||||
FilterExec: i@0 >= 5
|
|
||||||
RepartitionExec:...
|
RepartitionExec:...
|
||||||
ProjectionExec:...
|
ProjectionExec:...
|
||||||
LanceScan:...",
|
LanceRead:...",
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user