mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
15 Commits
v0.19.1-be
...
docs/quick
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e278fc5a6 | ||
|
|
09fed1f286 | ||
|
|
cee2b5ea42 | ||
|
|
f315f9665a | ||
|
|
5deb26bc8b | ||
|
|
3cc670ac38 | ||
|
|
4ade3e31e2 | ||
|
|
a222d2cd91 | ||
|
|
508e621f3d | ||
|
|
a1a0472f3f | ||
|
|
3425a6d339 | ||
|
|
af54e0ce06 | ||
|
|
089905fe8f | ||
|
|
554939e5d2 | ||
|
|
7a13814922 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.19.1-beta.0"
|
current_version = "0.19.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -228,6 +228,7 @@ jobs:
|
|||||||
- name: Install lancedb
|
- name: Install lancedb
|
||||||
run: |
|
run: |
|
||||||
pip install "pydantic<2"
|
pip install "pydantic<2"
|
||||||
|
pip install pyarrow==16
|
||||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||||
pip install tantivy
|
pip install tantivy
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
|||||||
215
Cargo.lock
generated
215
Cargo.lock
generated
@@ -463,7 +463,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -474,7 +474,7 @@ checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -602,9 +602,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-bedrockruntime"
|
name = "aws-sdk-bedrockruntime"
|
||||||
version = "1.83.0"
|
version = "1.85.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b82a56e1a0c4b145031c3a99e68127eec0a4206ad34a5653ddf04afc18053376"
|
checksum = "6f6c003cd82739447a18d7616468b047341c125efff11fdafc77a5e777a861c9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
@@ -628,9 +628,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-dynamodb"
|
name = "aws-sdk-dynamodb"
|
||||||
version = "1.72.0"
|
version = "1.72.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "412cd587b03bacb2f7b94a5446cc77dee49a8fa848e636f9545df3aadbbfaf8b"
|
checksum = "b14d5b5d6849d1caa7b404ea57cbe25ed8ba25c3c7d47f45bcbd5b51e098ceac"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
@@ -1117,7 +1117,7 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
"rustc-hash 1.1.0",
|
"rustc-hash 1.1.0",
|
||||||
"shlex",
|
"shlex",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"which",
|
"which",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1229,9 +1229,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytemuck"
|
name = "bytemuck"
|
||||||
version = "1.22.0"
|
version = "1.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540"
|
checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck_derive",
|
"bytemuck_derive",
|
||||||
]
|
]
|
||||||
@@ -1244,7 +1244,7 @@ checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1753,7 +1753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501"
|
checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1783,7 +1783,7 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"strsim",
|
"strsim",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1794,7 +1794,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core",
|
"darling_core",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2152,7 +2152,7 @@ checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"datafusion-expr",
|
"datafusion-expr",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2321,7 +2321,7 @@ checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2342,7 +2342,7 @@ dependencies = [
|
|||||||
"darling",
|
"darling",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2352,7 +2352,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"derive_builder_core",
|
"derive_builder_core",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2416,7 +2416,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2509,7 +2509,7 @@ dependencies = [
|
|||||||
"heck 0.5.0",
|
"heck 0.5.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2521,7 +2521,7 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2737,9 +2737,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fsst"
|
name = "fsst"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "cbbb5d86fdf9f56c54cf7ec48f4471c0e901af458ee9821677dc8ba0c38bc0be"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
@@ -2815,7 +2814,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3517,7 +3516,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3681,7 +3680,7 @@ checksum = "199b7932d97e325aff3a7030e141eafe7f2c6268e1d1b24859b753a627f45254"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3728,9 +3727,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance"
|
name = "lance"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "285c3f98a7182e2f35eabc9be67927bb9167b236c6d9c45d894928cbe330067c"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -3792,9 +3790,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-arrow"
|
name = "lance-arrow"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "0f877f217d6f93b24b54a2390a988a32f99d6608fe8af7766d93bd515e77dd2a"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -3811,9 +3808,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-core"
|
name = "lance-core"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "52fa4661c532db5b53102e2b394c9735bf6e707c337dfa5b9d98baba5c0cba13"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -3849,9 +3845,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datafusion"
|
name = "lance-datafusion"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "3efa610cc1168aaf96734f2f7911fb874609304716aab3318a86193da883f700"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3880,9 +3875,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datagen"
|
name = "lance-datagen"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "4c61affd39495caa923f6a49a7cb0a9f36fea2d7231a039e557f908e0b3b59cf"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3897,9 +3891,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-encoding"
|
name = "lance-encoding"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "051d65ab02790552c8790fe22915fbdd1629f3e1fa2a6ef69946e77c9d2b6f8e"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayref",
|
"arrayref",
|
||||||
"arrow",
|
"arrow",
|
||||||
@@ -3938,9 +3931,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-file"
|
name = "lance-file"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "9e4aff9ff0801c82d8fcb88cacec4880b6aaf53c6480291d50a4fcc12e6853c4"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3974,9 +3966,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-index"
|
name = "lance-index"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "3823b5147002a3115456c4dd1e2b16c645c08f4653e6e9dc624b9381ba29c87f"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4029,9 +4020,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-io"
|
name = "lance-io"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "a401202f6a1997db4ea5e9eb1a73a352736b320808a2e8497686c44fe6badf01"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -4069,9 +4059,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-linalg"
|
name = "lance-linalg"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "82cc5333ed9d12f1745e849ad161746da0b12ae3d4c9897d1937411e6533f504"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ord",
|
"arrow-ord",
|
||||||
@@ -4094,9 +4083,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-table"
|
name = "lance-table"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "41a43c76277808c452135f33a6b46ca8ec6ba38167534ff5240b46098ed81e73"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4135,9 +4123,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-testing"
|
name = "lance-testing"
|
||||||
version = "0.26.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.27.0-beta.2#cf903b470be1aaff2998830bd0358226f27f4185"
|
||||||
checksum = "f697b2c273e4629c782205e563282c08a74fe237ca8dd36cf10f862951887a70"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-schema",
|
"arrow-schema",
|
||||||
@@ -4148,7 +4135,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.19.0-beta.11"
|
version = "0.19.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4235,7 +4222,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.19.0-beta.11"
|
version = "0.19.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4260,7 +4247,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.19.0-beta.11"
|
version = "0.19.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4279,7 +4266,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.22.0-beta.11"
|
version = "0.22.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
@@ -4681,7 +4668,7 @@ checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4749,7 +4736,7 @@ dependencies = [
|
|||||||
"napi-derive-backend",
|
"napi-derive-backend",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4764,7 +4751,7 @@ dependencies = [
|
|||||||
"quote",
|
"quote",
|
||||||
"regex",
|
"regex",
|
||||||
"semver 1.0.26",
|
"semver 1.0.26",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4964,7 +4951,7 @@ dependencies = [
|
|||||||
"proc-macro-crate",
|
"proc-macro-crate",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5231,7 +5218,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5657,7 +5644,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6"
|
checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5704,7 +5691,7 @@ dependencies = [
|
|||||||
"prost",
|
"prost",
|
||||||
"prost-types",
|
"prost-types",
|
||||||
"regex",
|
"regex",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -5718,7 +5705,7 @@ dependencies = [
|
|||||||
"itertools 0.14.0",
|
"itertools 0.14.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5732,9 +5719,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "psm"
|
name = "psm"
|
||||||
version = "0.1.25"
|
version = "0.1.26"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88"
|
checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
@@ -5791,7 +5778,7 @@ checksum = "b2df2884957d2476731f987673befac5d521dff10abb0a7cbe12015bc7702fe9"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5823,7 +5810,7 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"pyo3-macros-backend",
|
"pyo3-macros-backend",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5836,14 +5823,14 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"pyo3-build-config",
|
"pyo3-build-config",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quick-xml"
|
name = "quick-xml"
|
||||||
version = "0.37.4"
|
version = "0.37.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4ce8c88de324ff838700f36fb6ab86c96df0e3c4ab6ef3a9b2044465cce1369"
|
checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -6094,7 +6081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
|
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6312,7 +6299,7 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
"relative-path",
|
"relative-path",
|
||||||
"rustc_version",
|
"rustc_version",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -6646,7 +6633,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6709,7 +6696,7 @@ dependencies = [
|
|||||||
"darling",
|
"darling",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6842,7 +6829,7 @@ dependencies = [
|
|||||||
"heck 0.5.0",
|
"heck 0.5.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6916,7 +6903,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6927,9 +6914,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stacker"
|
name = "stacker"
|
||||||
version = "0.1.20"
|
version = "0.1.21"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9"
|
checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@@ -6993,7 +6980,7 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7015,9 +7002,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.100"
|
version = "2.0.101"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
|
checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -7052,7 +7039,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7308,7 +7295,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7319,7 +7306,7 @@ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7454,7 +7441,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7503,15 +7490,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "toml_datetime"
|
name = "toml_datetime"
|
||||||
version = "0.6.8"
|
version = "0.6.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
|
checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "toml_edit"
|
name = "toml_edit"
|
||||||
version = "0.22.24"
|
version = "0.22.26"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
|
checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap 2.9.0",
|
"indexmap 2.9.0",
|
||||||
"toml_datetime",
|
"toml_datetime",
|
||||||
@@ -7564,7 +7551,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7834,7 +7821,7 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -7869,7 +7856,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"wasm-bindgen-backend",
|
"wasm-bindgen-backend",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
@@ -7918,9 +7905,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "webpki-roots"
|
name = "webpki-roots"
|
||||||
version = "0.26.8"
|
version = "0.26.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9"
|
checksum = "29aad86cec885cafd03e8305fd727c418e970a521322c91688414d5b8efba16b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rustls-pki-types",
|
"rustls-pki-types",
|
||||||
]
|
]
|
||||||
@@ -8031,7 +8018,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8042,7 +8029,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8053,7 +8040,7 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8064,7 +8051,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8479,7 +8466,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"synstructure",
|
"synstructure",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -8509,7 +8496,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8520,7 +8507,7 @@ checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8540,7 +8527,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
"synstructure",
|
"synstructure",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -8569,7 +8556,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.100",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
16
Cargo.toml
16
Cargo.toml
@@ -21,14 +21,14 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.26.0", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.27.0", "features" = ["dynamodb"], tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-io = "=0.26.0"
|
lance-io = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-index = "=0.26.0"
|
lance-index = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-linalg = "=0.26.0"
|
lance-linalg = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-table = "=0.26.0"
|
lance-table = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-testing = "=0.26.0"
|
lance-testing = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-datafusion = "=0.26.0"
|
lance-datafusion = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-encoding = "=0.26.0"
|
lance-encoding = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "54.1", optional = false }
|
arrow = { version = "54.1", optional = false }
|
||||||
arrow-array = "54.1"
|
arrow-array = "54.1"
|
||||||
|
|||||||
@@ -105,7 +105,8 @@ markdown_extensions:
|
|||||||
nav:
|
nav:
|
||||||
- Home:
|
- Home:
|
||||||
- LanceDB: index.md
|
- LanceDB: index.md
|
||||||
- 🏃🏼♂️ Quick start: basic.md
|
- 👉 Quickstart: quickstart.md
|
||||||
|
- 🏃🏼♂️ Basic Usage: basic.md
|
||||||
- 📚 Concepts:
|
- 📚 Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
@@ -237,7 +238,9 @@ nav:
|
|||||||
- 👾 JavaScript (lancedb): js/globals.md
|
- 👾 JavaScript (lancedb): js/globals.md
|
||||||
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
||||||
|
|
||||||
- Quick start: basic.md
|
- Getting Started:
|
||||||
|
- Quickstart: quickstart.md
|
||||||
|
- Basic Usage: basic.md
|
||||||
- Concepts:
|
- Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# Quick start
|
# Basic Usage
|
||||||
|
|
||||||
!!! info "LanceDB can be run in a number of ways:"
|
!!! info "LanceDB can be run in a number of ways:"
|
||||||
|
|
||||||
|
|||||||
@@ -33,20 +33,20 @@ Construct a MergeInsertBuilder. __Internal use only.__
|
|||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
execute(data): Promise<void>
|
execute(data): Promise<MergeStats>
|
||||||
```
|
```
|
||||||
|
|
||||||
Executes the merge insert operation
|
Executes the merge insert operation
|
||||||
|
|
||||||
Nothing is returned but the `Table` is updated
|
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **data**: [`Data`](../type-aliases/Data.md)
|
* **data**: [`Data`](../type-aliases/Data.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`MergeStats`](../interfaces/MergeStats.md)>
|
||||||
|
|
||||||
|
Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
|||||||
@@ -615,6 +615,22 @@ of the given query
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### stats()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract stats(): Promise<TableStatistics>
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns table and fragment statistics
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`TableStatistics`](../interfaces/TableStatistics.md)>
|
||||||
|
|
||||||
|
The table and fragment statistics
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### tags()
|
### tags()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -42,6 +42,8 @@
|
|||||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||||
|
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
||||||
|
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
||||||
- [FtsOptions](interfaces/FtsOptions.md)
|
- [FtsOptions](interfaces/FtsOptions.md)
|
||||||
- [FullTextQuery](interfaces/FullTextQuery.md)
|
- [FullTextQuery](interfaces/FullTextQuery.md)
|
||||||
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
||||||
@@ -52,6 +54,7 @@
|
|||||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||||
|
- [MergeStats](interfaces/MergeStats.md)
|
||||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||||
@@ -59,6 +62,7 @@
|
|||||||
- [RemovalStats](interfaces/RemovalStats.md)
|
- [RemovalStats](interfaces/RemovalStats.md)
|
||||||
- [RetryConfig](interfaces/RetryConfig.md)
|
- [RetryConfig](interfaces/RetryConfig.md)
|
||||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||||
|
- [TableStatistics](interfaces/TableStatistics.md)
|
||||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||||
- [Version](interfaces/Version.md)
|
- [Version](interfaces/Version.md)
|
||||||
|
|||||||
37
docs/src/js/interfaces/FragmentStatistics.md
Normal file
37
docs/src/js/interfaces/FragmentStatistics.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / FragmentStatistics
|
||||||
|
|
||||||
|
# Interface: FragmentStatistics
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### lengths
|
||||||
|
|
||||||
|
```ts
|
||||||
|
lengths: FragmentSummaryStats;
|
||||||
|
```
|
||||||
|
|
||||||
|
Statistics on the number of rows in the table fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numFragments
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numFragments: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of fragments in the table
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numSmallFragments
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numSmallFragments: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of uncompacted fragments in the table
|
||||||
77
docs/src/js/interfaces/FragmentSummaryStats.md
Normal file
77
docs/src/js/interfaces/FragmentSummaryStats.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / FragmentSummaryStats
|
||||||
|
|
||||||
|
# Interface: FragmentSummaryStats
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### max
|
||||||
|
|
||||||
|
```ts
|
||||||
|
max: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of rows in the fragment with the most rows
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### mean
|
||||||
|
|
||||||
|
```ts
|
||||||
|
mean: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The mean number of rows in the fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### min
|
||||||
|
|
||||||
|
```ts
|
||||||
|
min: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of rows in the fragment with the fewest rows
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### p25
|
||||||
|
|
||||||
|
```ts
|
||||||
|
p25: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The 25th percentile of number of rows in the fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### p50
|
||||||
|
|
||||||
|
```ts
|
||||||
|
p50: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The 50th percentile of number of rows in the fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### p75
|
||||||
|
|
||||||
|
```ts
|
||||||
|
p75: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The 75th percentile of number of rows in the fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### p99
|
||||||
|
|
||||||
|
```ts
|
||||||
|
p99: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The 99th percentile of number of rows in the fragments
|
||||||
31
docs/src/js/interfaces/MergeStats.md
Normal file
31
docs/src/js/interfaces/MergeStats.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / MergeStats
|
||||||
|
|
||||||
|
# Interface: MergeStats
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### numDeletedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numDeletedRows: bigint;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numInsertedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numInsertedRows: bigint;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numUpdatedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numUpdatedRows: bigint;
|
||||||
|
```
|
||||||
47
docs/src/js/interfaces/TableStatistics.md
Normal file
47
docs/src/js/interfaces/TableStatistics.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / TableStatistics
|
||||||
|
|
||||||
|
# Interface: TableStatistics
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### fragmentStats
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fragmentStats: FragmentStatistics;
|
||||||
|
```
|
||||||
|
|
||||||
|
Statistics on table fragments
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numIndices
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numIndices: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of indices in the table
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of rows in the table
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### totalBytes
|
||||||
|
|
||||||
|
```ts
|
||||||
|
totalBytes: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The total number of bytes in the table
|
||||||
101
docs/src/quickstart.md
Normal file
101
docs/src/quickstart.md
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
|
||||||
|
# Getting Started with LanceDB: A Minimal Vector Search Tutorial
|
||||||
|
|
||||||
|
Let's set up a LanceDB database, insert vector data, and perform a simple vector search. We'll use simple character classes like "knight" and "rogue" to illustrate semantic relevance.
|
||||||
|
|
||||||
|
## 1. Install Dependencies
|
||||||
|
|
||||||
|
Before starting, make sure you have the necessary packages:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lancedb pandas numpy
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Import Required Libraries
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Connect to LanceDB
|
||||||
|
|
||||||
|
You can use a local directory to store your database:
|
||||||
|
|
||||||
|
```python
|
||||||
|
db = lancedb.connect("./lancedb")
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Create Sample Data
|
||||||
|
|
||||||
|
Add sample text data and corresponding 4D vectors:
|
||||||
|
|
||||||
|
```python
|
||||||
|
data = pd.DataFrame([
|
||||||
|
{"id": "1", "vector": [1.0, 0.0, 0.0, 0.0], "text": "knight"},
|
||||||
|
{"id": "2", "vector": [0.9, 0.1, 0.0, 0.0], "text": "warrior"},
|
||||||
|
{"id": "3", "vector": [0.0, 1.0, 0.0, 0.0], "text": "rogue"},
|
||||||
|
{"id": "4", "vector": [0.0, 0.9, 0.1, 0.0], "text": "thief"},
|
||||||
|
{"id": "5", "vector": [0.5, 0.5, 0.0, 0.0], "text": "ranger"},
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5. Create a Table in LanceDB
|
||||||
|
|
||||||
|
```python
|
||||||
|
table = db.create_table("rpg_classes", data=data, mode="overwrite")
|
||||||
|
```
|
||||||
|
|
||||||
|
Let's see how the table looks:
|
||||||
|
```python
|
||||||
|
print(data)
|
||||||
|
```
|
||||||
|
|
||||||
|
| id | vector | text |
|
||||||
|
|----|--------|------|
|
||||||
|
| 1 | [1.0, 0.0, 0.0, 0.0] | knight |
|
||||||
|
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior |
|
||||||
|
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue |
|
||||||
|
| 4 | [0.0, 0.9, 0.1, 0.0] | thief |
|
||||||
|
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 6. Perform a Vector Search
|
||||||
|
|
||||||
|
Search for the most similar character classes to our query vector:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Query as if we are searching for "rogue"
|
||||||
|
results = table.search([0.95, 0.05, 0.0, 0.0]).limit(3).to_df()
|
||||||
|
print(results)
|
||||||
|
```
|
||||||
|
|
||||||
|
This will return the top 3 closest classes to the vector, effectively showing how LanceDB can be used for semantic search.
|
||||||
|
|
||||||
|
| id | vector | text | _distance |
|
||||||
|
|------|------------------------|----------|-----------|
|
||||||
|
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue | 0.00 |
|
||||||
|
| 4 | [0.0, 0.9, 0.1, 0.0] | thief | 0.02 |
|
||||||
|
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
||||||
|
|
||||||
|
Let's try searching for "knight"
|
||||||
|
|
||||||
|
```python
|
||||||
|
query_vector = [1.0, 0.0, 0.0, 0.0]
|
||||||
|
results = table.search(query_vector).limit(3).to_pandas()
|
||||||
|
print(results)
|
||||||
|
```
|
||||||
|
|
||||||
|
| id | vector | text | _distance |
|
||||||
|
|------|------------------------|----------|-----------|
|
||||||
|
| 1 | [1.0, 0.0, 0.0, 0.0] | knight | 0.00 |
|
||||||
|
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior | 0.02 |
|
||||||
|
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
That's it - you just conducted vector search!
|
||||||
|
|
||||||
|
For more beginner tips, check out the [Basic Usage](basic.md) guide.
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.19.1-beta.0</version>
|
<version>0.19.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.19.1-beta.0</version>
|
<version>0.19.1-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
44
node/package-lock.json
generated
44
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.19.0",
|
"@lancedb/vectordb-darwin-arm64": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.19.0",
|
"@lancedb/vectordb-darwin-x64": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.19.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.19.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.1"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.1-beta.1.tgz",
|
||||||
"integrity": "sha512-cR04V8azbrEfJ3FX5WJjwvkmKySI+dS4laBWqtXaMyLDSX034E3P3Ve8jKfYdP4NaBSGlGZlySpGawEEBLH92A==",
|
"integrity": "sha512-Epvel0pF5TM6MtIWQ2KhqezqSSHTL3Wr7a2rGAwz6X/XY23i6DbMPpPs0HyeIDzDrhxNfE3cz3S+SiCA6xpR0g==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -340,9 +340,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.1-beta.1.tgz",
|
||||||
"integrity": "sha512-qDrui0LR4f2QqFovDx8VcbVY5So5gi0HgHWeh6kypl4R4SS+pYfW3jTPVDz1YpxxlB9GHACM5qBdul6KFpnoug==",
|
"integrity": "sha512-hOiUSlIoISbiXytp46hToi/r6sF5pImAsfbzCsIq8ExDV4TPa8fjbhcIT80vxxOwc2mpSSK4HsVJYod95RSbEQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -353,9 +353,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.1-beta.1.tgz",
|
||||||
"integrity": "sha512-peoq/Mh9ml2h6xSngbfVt0yeuIO3ln4/dG9mfubXPJyNlM7tANzD+IY0Xs+B03m+fXbJ7LFZ8de4aIP9pWh4iQ==",
|
"integrity": "sha512-/1JhGVDEngwrlM8o2TNW8G6nJ9U/VgHKAORmj/cTA7O30helJIoo9jfvUAUy+vZ4VoEwRXQbMI+gaYTg0l3MTg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -366,9 +366,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.1-beta.1.tgz",
|
||||||
"integrity": "sha512-MUsOXk+InI0ywuygcHvYG8+awrJUnsbrUstTPETN2+QAV7QOX+TlafupLUUrfp1/pUOPt/ZraHEaqFRw1Vdxqg==",
|
"integrity": "sha512-zNRGSSUt8nTJMmll4NdxhQjwxR8Rezq3T4dsRoiDts5ienMam5HFjYiZ3FkDZQo16rgq2BcbFuH1G8u1chywlg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -379,9 +379,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.1-beta.1.tgz",
|
||||||
"integrity": "sha512-stk3uqMAbHxTodmzqMPKUl54GBfVKNDMR3EIo3d299QcXyOdSuEeHgeZa+iy0hHeIFL0TqHi4o8tStNzFLBAHg==",
|
"integrity": "sha512-yV550AJGlsIFdm1KoHQPJ1TZx121ZXCIdebBtBZj3wOObIhyB/i0kZAtGvwjkmr7EYyfzt1EHZzbjSGVdehIAA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"private": false,
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
@@ -89,10 +89,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.19.1-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.19.1-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.19.1-beta.0"
|
version = "0.19.1-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -374,6 +374,71 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(table2.numRows).toBe(4);
|
expect(table2.numRows).toBe(4);
|
||||||
expect(table2.schema).toEqual(schema);
|
expect(table2.schema).toEqual(schema);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should correctly retain values in nested struct fields", async function () {
|
||||||
|
// Define test data with nested struct
|
||||||
|
const testData = [
|
||||||
|
{
|
||||||
|
id: "doc1",
|
||||||
|
vector: [1, 2, 3],
|
||||||
|
metadata: {
|
||||||
|
filePath: "/path/to/file1.ts",
|
||||||
|
startLine: 10,
|
||||||
|
endLine: 20,
|
||||||
|
text: "function test() { return true; }",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "doc2",
|
||||||
|
vector: [4, 5, 6],
|
||||||
|
metadata: {
|
||||||
|
filePath: "/path/to/file2.ts",
|
||||||
|
startLine: 30,
|
||||||
|
endLine: 40,
|
||||||
|
text: "function test2() { return false; }",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Create Arrow table from the data
|
||||||
|
const table = makeArrowTable(testData);
|
||||||
|
|
||||||
|
// Verify schema has the nested struct fields
|
||||||
|
const metadataField = table.schema.fields.find(
|
||||||
|
(f) => f.name === "metadata",
|
||||||
|
);
|
||||||
|
expect(metadataField).toBeDefined();
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: accessing fields in different Arrow versions
|
||||||
|
const childNames = metadataField?.type.children.map((c: any) => c.name);
|
||||||
|
expect(childNames).toEqual([
|
||||||
|
"filePath",
|
||||||
|
"startLine",
|
||||||
|
"endLine",
|
||||||
|
"text",
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Convert to buffer and back (simulating storage and retrieval)
|
||||||
|
const buf = await fromTableToBuffer(table);
|
||||||
|
const retrievedTable = tableFromIPC(buf);
|
||||||
|
|
||||||
|
// Verify the retrieved table has the same structure
|
||||||
|
const rows = [];
|
||||||
|
for (let i = 0; i < retrievedTable.numRows; i++) {
|
||||||
|
rows.push(retrievedTable.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check values in the first row
|
||||||
|
const firstRow = rows[0];
|
||||||
|
expect(firstRow.id).toBe("doc1");
|
||||||
|
expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
|
||||||
|
|
||||||
|
// Verify metadata values are preserved (this is where the bug is)
|
||||||
|
expect(firstRow.metadata).toBeDefined();
|
||||||
|
expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
|
||||||
|
expect(firstRow.metadata.startLine).toBe(10);
|
||||||
|
expect(firstRow.metadata.endLine).toBe(20);
|
||||||
|
expect(firstRow.metadata.text).toBe("function test() { return true; }");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
class DummyEmbedding extends EmbeddingFunction<string> {
|
class DummyEmbedding extends EmbeddingFunction<string> {
|
||||||
|
|||||||
@@ -71,6 +71,29 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
await expect(table.countRows()).resolves.toBe(3);
|
await expect(table.countRows()).resolves.toBe(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should show table stats", async () => {
|
||||||
|
await table.add([{ id: 1 }, { id: 2 }]);
|
||||||
|
await table.add([{ id: 1 }]);
|
||||||
|
await expect(table.stats()).resolves.toEqual({
|
||||||
|
fragmentStats: {
|
||||||
|
lengths: {
|
||||||
|
max: 2,
|
||||||
|
mean: 1,
|
||||||
|
min: 1,
|
||||||
|
p25: 1,
|
||||||
|
p50: 2,
|
||||||
|
p75: 2,
|
||||||
|
p99: 2,
|
||||||
|
},
|
||||||
|
numFragments: 2,
|
||||||
|
numSmallFragments: 2,
|
||||||
|
},
|
||||||
|
numIndices: 0,
|
||||||
|
numRows: 3,
|
||||||
|
totalBytes: 24,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("should overwrite data if asked", async () => {
|
it("should overwrite data if asked", async () => {
|
||||||
await table.add([{ id: 1 }, { id: 2 }]);
|
await table.add([{ id: 1 }, { id: 2 }]);
|
||||||
await table.add([{ id: 1 }], { mode: "overwrite" });
|
await table.add([{ id: 1 }], { mode: "overwrite" });
|
||||||
@@ -315,11 +338,16 @@ describe("merge insert", () => {
|
|||||||
{ a: 3, b: "y" },
|
{ a: 3, b: "y" },
|
||||||
{ a: 4, b: "z" },
|
{ a: 4, b: "z" },
|
||||||
];
|
];
|
||||||
await table
|
const stats = await table
|
||||||
.mergeInsert("a")
|
.mergeInsert("a")
|
||||||
.whenMatchedUpdateAll()
|
.whenMatchedUpdateAll()
|
||||||
.whenNotMatchedInsertAll()
|
.whenNotMatchedInsertAll()
|
||||||
.execute(newData);
|
.execute(newData);
|
||||||
|
|
||||||
|
expect(stats.numInsertedRows).toBe(1n);
|
||||||
|
expect(stats.numUpdatedRows).toBe(2n);
|
||||||
|
expect(stats.numDeletedRows).toBe(0n);
|
||||||
|
|
||||||
const expected = [
|
const expected = [
|
||||||
{ a: 1, b: "a" },
|
{ a: 1, b: "a" },
|
||||||
{ a: 2, b: "x" },
|
{ a: 2, b: "x" },
|
||||||
|
|||||||
@@ -639,8 +639,9 @@ function transposeData(
|
|||||||
): Vector {
|
): Vector {
|
||||||
if (field.type instanceof Struct) {
|
if (field.type instanceof Struct) {
|
||||||
const childFields = field.type.children;
|
const childFields = field.type.children;
|
||||||
|
const fullPath = [...path, field.name];
|
||||||
const childVectors = childFields.map((child) => {
|
const childVectors = childFields.map((child) => {
|
||||||
return transposeData(data, child, [...path, child.name]);
|
return transposeData(data, child, fullPath);
|
||||||
});
|
});
|
||||||
const structData = makeData({
|
const structData = makeData({
|
||||||
type: field.type,
|
type: field.type,
|
||||||
@@ -652,7 +653,14 @@ function transposeData(
|
|||||||
const values = data.map((datum) => {
|
const values = data.map((datum) => {
|
||||||
let current: unknown = datum;
|
let current: unknown = datum;
|
||||||
for (const key of valuesPath) {
|
for (const key of valuesPath) {
|
||||||
if (isObject(current) && Object.hasOwn(current, key)) {
|
if (current == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
isObject(current) &&
|
||||||
|
(Object.hasOwn(current, key) || key in current)
|
||||||
|
) {
|
||||||
current = current[key];
|
current = current[key];
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -23,8 +23,12 @@ export {
|
|||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
CompactionStats,
|
CompactionStats,
|
||||||
RemovalStats,
|
RemovalStats,
|
||||||
|
TableStatistics,
|
||||||
|
FragmentStatistics,
|
||||||
|
FragmentSummaryStats,
|
||||||
Tags,
|
Tags,
|
||||||
TagContents,
|
TagContents,
|
||||||
|
MergeStats,
|
||||||
} from "./native.js";
|
} from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
||||||
import { NativeMergeInsertBuilder } from "./native";
|
import { MergeStats, NativeMergeInsertBuilder } from "./native";
|
||||||
|
|
||||||
/** A builder used to create and run a merge insert operation */
|
/** A builder used to create and run a merge insert operation */
|
||||||
export class MergeInsertBuilder {
|
export class MergeInsertBuilder {
|
||||||
@@ -73,9 +73,9 @@ export class MergeInsertBuilder {
|
|||||||
/**
|
/**
|
||||||
* Executes the merge insert operation
|
* Executes the merge insert operation
|
||||||
*
|
*
|
||||||
* Nothing is returned but the `Table` is updated
|
* @returns Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
||||||
*/
|
*/
|
||||||
async execute(data: Data): Promise<void> {
|
async execute(data: Data): Promise<MergeStats> {
|
||||||
let schema: Schema;
|
let schema: Schema;
|
||||||
if (this.#schema instanceof Promise) {
|
if (this.#schema instanceof Promise) {
|
||||||
schema = await this.#schema;
|
schema = await this.#schema;
|
||||||
@@ -84,6 +84,6 @@ export class MergeInsertBuilder {
|
|||||||
schema = this.#schema;
|
schema = this.#schema;
|
||||||
}
|
}
|
||||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||||
await this.#native.execute(buffer);
|
return await this.#native.execute(buffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import {
|
|||||||
IndexConfig,
|
IndexConfig,
|
||||||
IndexStatistics,
|
IndexStatistics,
|
||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
|
TableStatistics,
|
||||||
Tags,
|
Tags,
|
||||||
Table as _NativeTable,
|
Table as _NativeTable,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
@@ -482,6 +483,13 @@ export abstract class Table {
|
|||||||
* Use {@link Table.listIndices} to find the names of the indices.
|
* Use {@link Table.listIndices} to find the names of the indices.
|
||||||
*/
|
*/
|
||||||
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
||||||
|
|
||||||
|
/** Returns table and fragment statistics
|
||||||
|
*
|
||||||
|
* @returns {TableStatistics} The table and fragment statistics
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
abstract stats(): Promise<TableStatistics>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class LocalTable extends Table {
|
export class LocalTable extends Table {
|
||||||
@@ -775,6 +783,11 @@ export class LocalTable extends Table {
|
|||||||
}
|
}
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async stats(): Promise<TableStatistics> {
|
||||||
|
return await this.inner.stats();
|
||||||
|
}
|
||||||
|
|
||||||
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
||||||
on = Array.isArray(on) ? on : [on];
|
on = Array.isArray(on) ? on : [on];
|
||||||
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.19.0",
|
"version": "0.19.1-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.19.1-beta.0",
|
"version": "0.19.1-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ impl NativeMergeInsertBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn execute(&self, buf: Buffer) -> napi::Result<()> {
|
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeStats> {
|
||||||
let data = ipc_file_to_batches(buf.to_vec())
|
let data = ipc_file_to_batches(buf.to_vec())
|
||||||
.and_then(IntoArrow::into_arrow)
|
.and_then(IntoArrow::into_arrow)
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
@@ -46,12 +46,14 @@ impl NativeMergeInsertBuilder {
|
|||||||
|
|
||||||
let this = self.clone();
|
let this = self.clone();
|
||||||
|
|
||||||
this.inner.execute(data).await.map_err(|e| {
|
let stats = this.inner.execute(data).await.map_err(|e| {
|
||||||
napi::Error::from_reason(format!(
|
napi::Error::from_reason(format!(
|
||||||
"Failed to execute merge insert: {}",
|
"Failed to execute merge insert: {}",
|
||||||
convert_error(&e)
|
convert_error(&e)
|
||||||
))
|
))
|
||||||
})
|
})?;
|
||||||
|
|
||||||
|
Ok(stats.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -60,3 +62,20 @@ impl From<MergeInsertBuilder> for NativeMergeInsertBuilder {
|
|||||||
Self { inner }
|
Self { inner }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct MergeStats {
|
||||||
|
pub num_inserted_rows: BigInt,
|
||||||
|
pub num_updated_rows: BigInt,
|
||||||
|
pub num_deleted_rows: BigInt,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<lancedb::table::MergeStats> for MergeStats {
|
||||||
|
fn from(stats: lancedb::table::MergeStats) -> Self {
|
||||||
|
Self {
|
||||||
|
num_inserted_rows: stats.num_inserted_rows.into(),
|
||||||
|
num_updated_rows: stats.num_updated_rows.into(),
|
||||||
|
num_deleted_rows: stats.num_deleted_rows.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -157,6 +157,12 @@ impl Table {
|
|||||||
.default_error()
|
.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(catch_unwind)]
|
||||||
|
pub async fn stats(&self) -> Result<TableStatistics> {
|
||||||
|
let stats = self.inner_ref()?.stats().await.default_error()?;
|
||||||
|
Ok(stats.into())
|
||||||
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn update(
|
pub async fn update(
|
||||||
&self,
|
&self,
|
||||||
@@ -555,6 +561,80 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct TableStatistics {
|
||||||
|
/// The total number of bytes in the table
|
||||||
|
pub total_bytes: i64,
|
||||||
|
|
||||||
|
/// The number of rows in the table
|
||||||
|
pub num_rows: i64,
|
||||||
|
|
||||||
|
/// The number of indices in the table
|
||||||
|
pub num_indices: i64,
|
||||||
|
|
||||||
|
/// Statistics on table fragments
|
||||||
|
pub fragment_stats: FragmentStatistics,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct FragmentStatistics {
|
||||||
|
/// The number of fragments in the table
|
||||||
|
pub num_fragments: i64,
|
||||||
|
|
||||||
|
/// The number of uncompacted fragments in the table
|
||||||
|
pub num_small_fragments: i64,
|
||||||
|
|
||||||
|
/// Statistics on the number of rows in the table fragments
|
||||||
|
pub lengths: FragmentSummaryStats,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct FragmentSummaryStats {
|
||||||
|
/// The number of rows in the fragment with the fewest rows
|
||||||
|
pub min: i64,
|
||||||
|
|
||||||
|
/// The number of rows in the fragment with the most rows
|
||||||
|
pub max: i64,
|
||||||
|
|
||||||
|
/// The mean number of rows in the fragments
|
||||||
|
pub mean: i64,
|
||||||
|
|
||||||
|
/// The 25th percentile of number of rows in the fragments
|
||||||
|
pub p25: i64,
|
||||||
|
|
||||||
|
/// The 50th percentile of number of rows in the fragments
|
||||||
|
pub p50: i64,
|
||||||
|
|
||||||
|
/// The 75th percentile of number of rows in the fragments
|
||||||
|
pub p75: i64,
|
||||||
|
|
||||||
|
/// The 99th percentile of number of rows in the fragments
|
||||||
|
pub p99: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<lancedb::table::TableStatistics> for TableStatistics {
|
||||||
|
fn from(v: lancedb::table::TableStatistics) -> Self {
|
||||||
|
Self {
|
||||||
|
total_bytes: v.total_bytes as i64,
|
||||||
|
num_rows: v.num_rows as i64,
|
||||||
|
num_indices: v.num_indices as i64,
|
||||||
|
fragment_stats: FragmentStatistics {
|
||||||
|
num_fragments: v.fragment_stats.num_fragments as i64,
|
||||||
|
num_small_fragments: v.fragment_stats.num_small_fragments as i64,
|
||||||
|
lengths: FragmentSummaryStats {
|
||||||
|
min: v.fragment_stats.lengths.min as i64,
|
||||||
|
max: v.fragment_stats.lengths.max as i64,
|
||||||
|
mean: v.fragment_stats.lengths.mean as i64,
|
||||||
|
p25: v.fragment_stats.lengths.p25 as i64,
|
||||||
|
p50: v.fragment_stats.lengths.p50 as i64,
|
||||||
|
p75: v.fragment_stats.lengths.p75 as i64,
|
||||||
|
p99: v.fragment_stats.lengths.p99 as i64,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct Version {
|
pub struct Version {
|
||||||
pub version: i64,
|
pub version: i64,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.1-beta.0"
|
current_version = "0.22.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.22.1-beta.0"
|
version = "0.22.1-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ dependencies = [
|
|||||||
"numpy",
|
"numpy",
|
||||||
"overrides>=0.7",
|
"overrides>=0.7",
|
||||||
"packaging",
|
"packaging",
|
||||||
"pyarrow>=14",
|
"pyarrow>=16",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -578,6 +578,9 @@ class RemoteTable(Table):
|
|||||||
):
|
):
|
||||||
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
||||||
|
|
||||||
|
def stats(self):
|
||||||
|
return LOOP.run(self._table.stats())
|
||||||
|
|
||||||
def uses_v2_manifest_paths(self) -> bool:
|
def uses_v2_manifest_paths(self) -> bool:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"uses_v2_manifest_paths() is not supported on the LanceDB Cloud"
|
"uses_v2_manifest_paths() is not supported on the LanceDB Cloud"
|
||||||
|
|||||||
@@ -739,6 +739,13 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def stats(self) -> TableStatistics:
|
||||||
|
"""
|
||||||
|
Retrieve table and fragment statistics.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
self,
|
self,
|
||||||
@@ -955,10 +962,12 @@ class Table(ABC):
|
|||||||
>>> table = db.create_table("my_table", data)
|
>>> table = db.create_table("my_table", data)
|
||||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||||
>>> # Perform a "upsert" operation
|
>>> # Perform a "upsert" operation
|
||||||
>>> table.merge_insert("a") \\
|
>>> stats = table.merge_insert("a") \\
|
||||||
... .when_matched_update_all() \\
|
... .when_matched_update_all() \\
|
||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
|
>>> stats
|
||||||
|
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
@@ -1876,6 +1885,9 @@ class LanceTable(Table):
|
|||||||
) -> None:
|
) -> None:
|
||||||
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
||||||
|
|
||||||
|
def stats(self) -> TableStatistics:
|
||||||
|
return LOOP.run(self._table.stats())
|
||||||
|
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
@@ -2479,7 +2491,9 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors: OnBadVectorsType,
|
on_bad_vectors: OnBadVectorsType,
|
||||||
fill_value: float,
|
fill_value: float,
|
||||||
):
|
):
|
||||||
LOOP.run(self._table._do_merge(merge, new_data, on_bad_vectors, fill_value))
|
return LOOP.run(
|
||||||
|
self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
|
||||||
|
)
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.21.0",
|
deprecated_in="0.21.0",
|
||||||
@@ -3170,6 +3184,12 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
await self._inner.wait_for_index(index_names, timeout)
|
await self._inner.wait_for_index(index_names, timeout)
|
||||||
|
|
||||||
|
async def stats(self) -> TableStatistics:
|
||||||
|
"""
|
||||||
|
Retrieve table and fragment statistics.
|
||||||
|
"""
|
||||||
|
return await self._inner.stats()
|
||||||
|
|
||||||
async def add(
|
async def add(
|
||||||
self,
|
self,
|
||||||
data: DATA,
|
data: DATA,
|
||||||
@@ -3261,10 +3281,12 @@ class AsyncTable:
|
|||||||
>>> table = db.create_table("my_table", data)
|
>>> table = db.create_table("my_table", data)
|
||||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||||
>>> # Perform a "upsert" operation
|
>>> # Perform a "upsert" operation
|
||||||
>>> table.merge_insert("a") \\
|
>>> stats = table.merge_insert("a") \\
|
||||||
... .when_matched_update_all() \\
|
... .when_matched_update_all() \\
|
||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
|
>>> stats
|
||||||
|
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
@@ -3620,7 +3642,7 @@ class AsyncTable:
|
|||||||
)
|
)
|
||||||
if isinstance(data, pa.Table):
|
if isinstance(data, pa.Table):
|
||||||
data = pa.RecordBatchReader.from_batches(data.schema, data.to_batches())
|
data = pa.RecordBatchReader.from_batches(data.schema, data.to_batches())
|
||||||
await self._inner.execute_merge_insert(
|
return await self._inner.execute_merge_insert(
|
||||||
data,
|
data,
|
||||||
dict(
|
dict(
|
||||||
on=merge._on,
|
on=merge._on,
|
||||||
@@ -4068,6 +4090,82 @@ class IndexStatistics:
|
|||||||
return getattr(self, key)
|
return getattr(self, key)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TableStatistics:
|
||||||
|
"""
|
||||||
|
Statistics about a table and fragments.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
total_bytes: int
|
||||||
|
The total number of bytes in the table.
|
||||||
|
num_rows: int
|
||||||
|
The total number of rows in the table.
|
||||||
|
num_indices: int
|
||||||
|
The total number of indices in the table.
|
||||||
|
fragment_stats: FragmentStatistics
|
||||||
|
Statistics about fragments in the table.
|
||||||
|
"""
|
||||||
|
|
||||||
|
total_bytes: int
|
||||||
|
num_rows: int
|
||||||
|
num_indices: int
|
||||||
|
fragment_stats: FragmentStatistics
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FragmentStatistics:
|
||||||
|
"""
|
||||||
|
Statistics about fragments.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
num_fragments: int
|
||||||
|
The total number of fragments in the table.
|
||||||
|
num_small_fragments: int
|
||||||
|
The total number of small fragments in the table.
|
||||||
|
Small fragments have low row counts and may need to be compacted.
|
||||||
|
lengths: FragmentSummaryStats
|
||||||
|
Statistics about the number of rows in the table fragments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
num_fragments: int
|
||||||
|
num_small_fragments: int
|
||||||
|
lengths: FragmentSummaryStats
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FragmentSummaryStats:
|
||||||
|
"""
|
||||||
|
Statistics about fragments sizes
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
min: int
|
||||||
|
The number of rows in the fragment with the fewest rows.
|
||||||
|
max: int
|
||||||
|
The number of rows in the fragment with the most rows.
|
||||||
|
mean: int
|
||||||
|
The mean number of rows in the fragments.
|
||||||
|
p25: int
|
||||||
|
The 25th percentile of number of rows in the fragments.
|
||||||
|
p50: int
|
||||||
|
The 50th percentile of number of rows in the fragments.
|
||||||
|
p75: int
|
||||||
|
The 75th percentile of number of rows in the fragments.
|
||||||
|
p99: int
|
||||||
|
The 99th percentile of number of rows in the fragments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
min: int
|
||||||
|
max: int
|
||||||
|
mean: int
|
||||||
|
p25: int
|
||||||
|
p50: int
|
||||||
|
p75: int
|
||||||
|
p99: int
|
||||||
|
|
||||||
|
|
||||||
class Tags:
|
class Tags:
|
||||||
"""
|
"""
|
||||||
Table tag manager.
|
Table tag manager.
|
||||||
|
|||||||
@@ -18,15 +18,19 @@ def test_upsert(mem_db):
|
|||||||
{"id": 1, "name": "Bobby"},
|
{"id": 1, "name": "Bobby"},
|
||||||
{"id": 2, "name": "Charlie"},
|
{"id": 2, "name": "Charlie"},
|
||||||
]
|
]
|
||||||
(
|
stats = (
|
||||||
table.merge_insert("id")
|
table.merge_insert("id")
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
.execute(new_users)
|
.execute(new_users)
|
||||||
)
|
)
|
||||||
table.count_rows() # 3
|
table.count_rows() # 3
|
||||||
|
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
||||||
# --8<-- [end:upsert_basic]
|
# --8<-- [end:upsert_basic]
|
||||||
assert table.count_rows() == 3
|
assert table.count_rows() == 3
|
||||||
|
assert stats["num_inserted_rows"] == 1
|
||||||
|
assert stats["num_updated_rows"] == 1
|
||||||
|
assert stats["num_deleted_rows"] == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -44,15 +48,19 @@ async def test_upsert_async(mem_db_async):
|
|||||||
{"id": 1, "name": "Bobby"},
|
{"id": 1, "name": "Bobby"},
|
||||||
{"id": 2, "name": "Charlie"},
|
{"id": 2, "name": "Charlie"},
|
||||||
]
|
]
|
||||||
await (
|
stats = await (
|
||||||
table.merge_insert("id")
|
table.merge_insert("id")
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
.execute(new_users)
|
.execute(new_users)
|
||||||
)
|
)
|
||||||
await table.count_rows() # 3
|
await table.count_rows() # 3
|
||||||
|
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
||||||
# --8<-- [end:upsert_basic_async]
|
# --8<-- [end:upsert_basic_async]
|
||||||
assert await table.count_rows() == 3
|
assert await table.count_rows() == 3
|
||||||
|
assert stats["num_inserted_rows"] == 1
|
||||||
|
assert stats["num_updated_rows"] == 1
|
||||||
|
assert stats["num_deleted_rows"] == 0
|
||||||
|
|
||||||
|
|
||||||
def test_insert_if_not_exists(mem_db):
|
def test_insert_if_not_exists(mem_db):
|
||||||
@@ -69,10 +77,16 @@ def test_insert_if_not_exists(mem_db):
|
|||||||
{"domain": "google.com", "name": "Google"},
|
{"domain": "google.com", "name": "Google"},
|
||||||
{"domain": "facebook.com", "name": "Facebook"},
|
{"domain": "facebook.com", "name": "Facebook"},
|
||||||
]
|
]
|
||||||
(table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains))
|
stats = (
|
||||||
|
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
||||||
|
)
|
||||||
table.count_rows() # 3
|
table.count_rows() # 3
|
||||||
|
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
||||||
# --8<-- [end:insert_if_not_exists]
|
# --8<-- [end:insert_if_not_exists]
|
||||||
assert table.count_rows() == 3
|
assert table.count_rows() == 3
|
||||||
|
assert stats["num_inserted_rows"] == 1
|
||||||
|
assert stats["num_updated_rows"] == 0
|
||||||
|
assert stats["num_deleted_rows"] == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -90,12 +104,16 @@ async def test_insert_if_not_exists_async(mem_db_async):
|
|||||||
{"domain": "google.com", "name": "Google"},
|
{"domain": "google.com", "name": "Google"},
|
||||||
{"domain": "facebook.com", "name": "Facebook"},
|
{"domain": "facebook.com", "name": "Facebook"},
|
||||||
]
|
]
|
||||||
await (
|
stats = await (
|
||||||
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
||||||
)
|
)
|
||||||
await table.count_rows() # 3
|
await table.count_rows() # 3
|
||||||
|
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
||||||
# --8<-- [end:insert_if_not_exists_async]
|
# --8<-- [end:insert_if_not_exists_async]
|
||||||
assert await table.count_rows() == 3
|
assert await table.count_rows() == 3
|
||||||
|
assert stats["num_inserted_rows"] == 1
|
||||||
|
assert stats["num_updated_rows"] == 0
|
||||||
|
assert stats["num_deleted_rows"] == 0
|
||||||
|
|
||||||
|
|
||||||
def test_replace_range(mem_db):
|
def test_replace_range(mem_db):
|
||||||
@@ -113,7 +131,7 @@ def test_replace_range(mem_db):
|
|||||||
new_chunks = [
|
new_chunks = [
|
||||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||||
]
|
]
|
||||||
(
|
stats = (
|
||||||
table.merge_insert(["doc_id", "chunk_id"])
|
table.merge_insert(["doc_id", "chunk_id"])
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
@@ -121,8 +139,12 @@ def test_replace_range(mem_db):
|
|||||||
.execute(new_chunks)
|
.execute(new_chunks)
|
||||||
)
|
)
|
||||||
table.count_rows("doc_id = 1") # 1
|
table.count_rows("doc_id = 1") # 1
|
||||||
|
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
||||||
# --8<-- [end:replace_range]
|
# --8<-- [end:replace_range]
|
||||||
assert table.count_rows("doc_id = 1") == 1
|
assert table.count_rows("doc_id = 1") == 1
|
||||||
|
assert stats["num_inserted_rows"] == 0
|
||||||
|
assert stats["num_updated_rows"] == 1
|
||||||
|
assert stats["num_deleted_rows"] == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -141,7 +163,7 @@ async def test_replace_range_async(mem_db_async):
|
|||||||
new_chunks = [
|
new_chunks = [
|
||||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||||
]
|
]
|
||||||
await (
|
stats = await (
|
||||||
table.merge_insert(["doc_id", "chunk_id"])
|
table.merge_insert(["doc_id", "chunk_id"])
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
@@ -149,5 +171,9 @@ async def test_replace_range_async(mem_db_async):
|
|||||||
.execute(new_chunks)
|
.execute(new_chunks)
|
||||||
)
|
)
|
||||||
await table.count_rows("doc_id = 1") # 1
|
await table.count_rows("doc_id = 1") # 1
|
||||||
|
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
||||||
# --8<-- [end:replace_range_async]
|
# --8<-- [end:replace_range_async]
|
||||||
assert await table.count_rows("doc_id = 1") == 1
|
assert await table.count_rows("doc_id = 1") == 1
|
||||||
|
assert stats["num_inserted_rows"] == 0
|
||||||
|
assert stats["num_updated_rows"] == 1
|
||||||
|
assert stats["num_deleted_rows"] == 1
|
||||||
|
|||||||
@@ -389,6 +389,50 @@ def test_table_wait_for_index_timeout():
|
|||||||
table.wait_for_index(["id_idx"], timedelta(seconds=1))
|
table.wait_for_index(["id_idx"], timedelta(seconds=1))
|
||||||
|
|
||||||
|
|
||||||
|
def test_stats():
|
||||||
|
stats = {
|
||||||
|
"total_bytes": 38,
|
||||||
|
"num_rows": 2,
|
||||||
|
"num_indices": 0,
|
||||||
|
"fragment_stats": {
|
||||||
|
"num_fragments": 1,
|
||||||
|
"num_small_fragments": 1,
|
||||||
|
"lengths": {
|
||||||
|
"min": 2,
|
||||||
|
"max": 2,
|
||||||
|
"mean": 2,
|
||||||
|
"p25": 2,
|
||||||
|
"p50": 2,
|
||||||
|
"p75": 2,
|
||||||
|
"p99": 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def handler(request):
|
||||||
|
if request.path == "/v1/table/test/create/?mode=create":
|
||||||
|
request.send_response(200)
|
||||||
|
request.send_header("Content-Type", "application/json")
|
||||||
|
request.end_headers()
|
||||||
|
request.wfile.write(b"{}")
|
||||||
|
elif request.path == "/v1/table/test/stats/":
|
||||||
|
request.send_response(200)
|
||||||
|
request.send_header("Content-Type", "application/json")
|
||||||
|
request.end_headers()
|
||||||
|
payload = json.dumps(stats)
|
||||||
|
request.wfile.write(payload.encode())
|
||||||
|
else:
|
||||||
|
print(request.path)
|
||||||
|
request.send_response(404)
|
||||||
|
request.end_headers()
|
||||||
|
|
||||||
|
with mock_lancedb_connection(handler) as db:
|
||||||
|
table = db.create_table("test", [{"id": 1}])
|
||||||
|
res = table.stats()
|
||||||
|
print(f"{res=}")
|
||||||
|
assert res == stats
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
||||||
def handler(request):
|
def handler(request):
|
||||||
|
|||||||
@@ -1695,3 +1695,31 @@ def test_replace_field_metadata(tmp_path):
|
|||||||
schema = table.schema
|
schema = table.schema
|
||||||
field = schema[0].metadata
|
field = schema[0].metadata
|
||||||
assert field == {b"foo": b"bar"}
|
assert field == {b"foo": b"bar"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_stats(mem_db: DBConnection):
|
||||||
|
table = mem_db.create_table(
|
||||||
|
"my_table",
|
||||||
|
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
|
||||||
|
)
|
||||||
|
assert len(table) == 2
|
||||||
|
stats = table.stats()
|
||||||
|
print(f"{stats=}")
|
||||||
|
assert stats == {
|
||||||
|
"total_bytes": 38,
|
||||||
|
"num_rows": 2,
|
||||||
|
"num_indices": 0,
|
||||||
|
"fragment_stats": {
|
||||||
|
"num_fragments": 1,
|
||||||
|
"num_small_fragments": 1,
|
||||||
|
"lengths": {
|
||||||
|
"min": 2,
|
||||||
|
"max": 2,
|
||||||
|
"mean": 2,
|
||||||
|
"p25": 2,
|
||||||
|
"p50": 2,
|
||||||
|
"p75": 2,
|
||||||
|
"p99": 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|||||||
@@ -279,6 +279,40 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn stats(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
let stats = inner.stats().await.infer_error()?;
|
||||||
|
Python::with_gil(|py| {
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||||
|
dict.set_item("num_rows", stats.num_rows)?;
|
||||||
|
dict.set_item("num_indices", stats.num_indices)?;
|
||||||
|
|
||||||
|
let fragment_stats = PyDict::new(py);
|
||||||
|
fragment_stats.set_item("num_fragments", stats.fragment_stats.num_fragments)?;
|
||||||
|
fragment_stats.set_item(
|
||||||
|
"num_small_fragments",
|
||||||
|
stats.fragment_stats.num_small_fragments,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let fragment_lengths = PyDict::new(py);
|
||||||
|
fragment_lengths.set_item("min", stats.fragment_stats.lengths.min)?;
|
||||||
|
fragment_lengths.set_item("max", stats.fragment_stats.lengths.max)?;
|
||||||
|
fragment_lengths.set_item("mean", stats.fragment_stats.lengths.mean)?;
|
||||||
|
fragment_lengths.set_item("p25", stats.fragment_stats.lengths.p25)?;
|
||||||
|
fragment_lengths.set_item("p50", stats.fragment_stats.lengths.p50)?;
|
||||||
|
fragment_lengths.set_item("p75", stats.fragment_stats.lengths.p75)?;
|
||||||
|
fragment_lengths.set_item("p99", stats.fragment_stats.lengths.p99)?;
|
||||||
|
|
||||||
|
fragment_stats.set_item("lengths", fragment_lengths)?;
|
||||||
|
dict.set_item("fragment_stats", fragment_stats)?;
|
||||||
|
|
||||||
|
Ok(Some(dict.unbind()))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn __repr__(&self) -> String {
|
pub fn __repr__(&self) -> String {
|
||||||
match &self.inner {
|
match &self.inner {
|
||||||
None => format!("ClosedTable({})", self.name),
|
None => format!("ClosedTable({})", self.name),
|
||||||
@@ -455,8 +489,14 @@ impl Table {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
builder.execute(Box::new(batches)).await.infer_error()?;
|
let stats = builder.execute(Box::new(batches)).await.infer_error()?;
|
||||||
Ok(())
|
Python::with_gil(|py| {
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
dict.set_item("num_inserted_rows", stats.num_inserted_rows)?;
|
||||||
|
dict.set_item("num_updated_rows", stats.num_updated_rows)?;
|
||||||
|
dict.set_item("num_deleted_rows", stats.num_deleted_rows)?;
|
||||||
|
Ok(dict.unbind())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.19.1-beta.0"
|
version = "0.19.1-beta.1"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.19.1-beta.0"
|
version = "0.19.1-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use crate::index::Index;
|
|||||||
use crate::index::IndexStatistics;
|
use crate::index::IndexStatistics;
|
||||||
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
||||||
use crate::table::Tags;
|
use crate::table::Tags;
|
||||||
use crate::table::{AddDataMode, AnyQuery, Filter};
|
use crate::table::{AddDataMode, AnyQuery, Filter, TableStatistics};
|
||||||
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
||||||
use crate::{DistanceType, Error, Table};
|
use crate::{DistanceType, Error, Table};
|
||||||
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||||
@@ -47,6 +47,7 @@ use crate::{
|
|||||||
TableDefinition, UpdateBuilder,
|
TableDefinition, UpdateBuilder,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
use lance::dataset::MergeStats;
|
||||||
|
|
||||||
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
|
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
|
||||||
|
|
||||||
@@ -1022,7 +1023,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
&self,
|
&self,
|
||||||
params: MergeInsertBuilder,
|
params: MergeInsertBuilder,
|
||||||
new_data: Box<dyn RecordBatchReader + Send>,
|
new_data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()> {
|
) -> Result<MergeStats> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
|
|
||||||
let query = MergeInsertRequest::try_from(params)?;
|
let query = MergeInsertRequest::try_from(params)?;
|
||||||
@@ -1034,9 +1035,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
|
|
||||||
let (request_id, response) = self.send_streaming(request, new_data, true).await?;
|
let (request_id, response) = self.send_streaming(request, new_data, true).await?;
|
||||||
|
|
||||||
|
// TODO: server can response with these stats in response body.
|
||||||
|
// We should test that we can handle both empty response from old server
|
||||||
|
// and response with stats from new server.
|
||||||
self.check_table_response(&request_id, response).await?;
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
Ok(MergeStats::default())
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
||||||
@@ -1242,6 +1245,20 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
fn dataset_uri(&self) -> &str {
|
fn dataset_uri(&self) -> &str {
|
||||||
"NOT_SUPPORTED"
|
"NOT_SUPPORTED"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn stats(&self) -> Result<TableStatistics> {
|
||||||
|
let request = self.client.post(&format!("/v1/table/{}/stats/", self.name));
|
||||||
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
|
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||||
|
|
||||||
|
let stats = serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||||
|
source: format!("Failed to parse table statistics: {}", e).into(),
|
||||||
|
request_id,
|
||||||
|
status_code: None,
|
||||||
|
})?;
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
@@ -1334,7 +1351,12 @@ mod tests {
|
|||||||
Box::pin(table.count_rows(None).map_ok(|_| ())),
|
Box::pin(table.count_rows(None).map_ok(|_| ())),
|
||||||
Box::pin(table.update().column("a", "a + 1").execute().map_ok(|_| ())),
|
Box::pin(table.update().column("a", "a + 1").execute().map_ok(|_| ())),
|
||||||
Box::pin(table.add(example_data()).execute().map_ok(|_| ())),
|
Box::pin(table.add(example_data()).execute().map_ok(|_| ())),
|
||||||
Box::pin(table.merge_insert(&["test"]).execute(example_data())),
|
Box::pin(
|
||||||
|
table
|
||||||
|
.merge_insert(&["test"])
|
||||||
|
.execute(example_data())
|
||||||
|
.map_ok(|_| ()),
|
||||||
|
),
|
||||||
Box::pin(table.delete("false")),
|
Box::pin(table.delete("false")),
|
||||||
Box::pin(table.add_columns(
|
Box::pin(table.add_columns(
|
||||||
NewColumnTransform::SqlExpressions(vec![("x".into(), "y".into())]),
|
NewColumnTransform::SqlExpressions(vec![("x".into(), "y".into())]),
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ use lance::dataset::cleanup::RemovalStats;
|
|||||||
use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions};
|
use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions};
|
||||||
use lance::dataset::scanner::Scanner;
|
use lance::dataset::scanner::Scanner;
|
||||||
pub use lance::dataset::ColumnAlteration;
|
pub use lance::dataset::ColumnAlteration;
|
||||||
|
pub use lance::dataset::MergeStats;
|
||||||
pub use lance::dataset::NewColumnTransform;
|
pub use lance::dataset::NewColumnTransform;
|
||||||
pub use lance::dataset::ReadParams;
|
pub use lance::dataset::ReadParams;
|
||||||
pub use lance::dataset::Version;
|
pub use lance::dataset::Version;
|
||||||
@@ -80,10 +81,13 @@ pub mod merge;
|
|||||||
|
|
||||||
use crate::index::waiter::wait_for_index;
|
use crate::index::waiter::wait_for_index;
|
||||||
pub use chrono::Duration;
|
pub use chrono::Duration;
|
||||||
|
use futures::future::join_all;
|
||||||
pub use lance::dataset::optimize::CompactionOptions;
|
pub use lance::dataset::optimize::CompactionOptions;
|
||||||
pub use lance::dataset::refs::{TagContents, Tags as LanceTags};
|
pub use lance::dataset::refs::{TagContents, Tags as LanceTags};
|
||||||
pub use lance::dataset::scanner::DatasetRecordBatchStream;
|
pub use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||||
|
use lance::dataset::statistics::DatasetStatisticsExt;
|
||||||
pub use lance_index::optimize::OptimizeOptions;
|
pub use lance_index::optimize::OptimizeOptions;
|
||||||
|
use serde_with::skip_serializing_none;
|
||||||
|
|
||||||
/// Defines the type of column
|
/// Defines the type of column
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -484,7 +488,7 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
|||||||
&self,
|
&self,
|
||||||
params: MergeInsertBuilder,
|
params: MergeInsertBuilder,
|
||||||
new_data: Box<dyn RecordBatchReader + Send>,
|
new_data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()>;
|
) -> Result<MergeStats>;
|
||||||
/// Gets the table tag manager.
|
/// Gets the table tag manager.
|
||||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>>;
|
async fn tags(&self) -> Result<Box<dyn Tags + '_>>;
|
||||||
/// Optimize the dataset.
|
/// Optimize the dataset.
|
||||||
@@ -523,6 +527,8 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
|||||||
index_names: &[&str],
|
index_names: &[&str],
|
||||||
timeout: std::time::Duration,
|
timeout: std::time::Duration,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
|
/// Get statistics on the table
|
||||||
|
async fn stats(&self) -> Result<TableStatistics>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Table is a collection of strong typed Rows.
|
/// A Table is a collection of strong typed Rows.
|
||||||
@@ -1241,6 +1247,11 @@ impl Table {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
Ok(Arc::new(repartitioned))
|
Ok(Arc::new(repartitioned))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieve statistics on the table
|
||||||
|
pub async fn stats(&self) -> Result<TableStatistics> {
|
||||||
|
self.inner.stats().await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct NativeTags {
|
pub struct NativeTags {
|
||||||
@@ -2357,7 +2368,7 @@ impl BaseTable for NativeTable {
|
|||||||
&self,
|
&self,
|
||||||
params: MergeInsertBuilder,
|
params: MergeInsertBuilder,
|
||||||
new_data: Box<dyn RecordBatchReader + Send>,
|
new_data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()> {
|
) -> Result<MergeStats> {
|
||||||
let dataset = Arc::new(self.dataset.get().await?.clone());
|
let dataset = Arc::new(self.dataset.get().await?.clone());
|
||||||
let mut builder = LanceMergeInsertBuilder::try_new(dataset.clone(), params.on)?;
|
let mut builder = LanceMergeInsertBuilder::try_new(dataset.clone(), params.on)?;
|
||||||
match (
|
match (
|
||||||
@@ -2384,9 +2395,9 @@ impl BaseTable for NativeTable {
|
|||||||
builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
|
builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
|
||||||
}
|
}
|
||||||
let job = builder.try_build()?;
|
let job = builder.try_build()?;
|
||||||
let (new_dataset, _stats) = job.execute_reader(new_data).await?;
|
let (new_dataset, stats) = job.execute_reader(new_data).await?;
|
||||||
self.dataset.set_latest(new_dataset.as_ref().clone()).await;
|
self.dataset.set_latest(new_dataset.as_ref().clone()).await;
|
||||||
Ok(())
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Delete rows from the table
|
/// Delete rows from the table
|
||||||
@@ -2568,6 +2579,108 @@ impl BaseTable for NativeTable {
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
wait_for_index(self, index_names, timeout).await
|
wait_for_index(self, index_names, timeout).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn stats(&self) -> Result<TableStatistics> {
|
||||||
|
let num_rows = self.count_rows(None).await?;
|
||||||
|
let num_indices = self.list_indices().await?.len();
|
||||||
|
let ds = self.dataset.get().await?;
|
||||||
|
let ds_clone = (*ds).clone();
|
||||||
|
let ds_stats = Arc::new(ds_clone).calculate_data_stats().await?;
|
||||||
|
let total_bytes = ds_stats.fields.iter().map(|f| f.bytes_on_disk).sum::<u64>() as usize;
|
||||||
|
|
||||||
|
let frags = ds.get_fragments();
|
||||||
|
let mut sorted_sizes = join_all(
|
||||||
|
frags
|
||||||
|
.iter()
|
||||||
|
.map(|frag| async move { frag.physical_rows().await.unwrap_or(0) }),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
sorted_sizes.sort();
|
||||||
|
|
||||||
|
let small_frag_threshold = 100000;
|
||||||
|
let num_fragments = sorted_sizes.len();
|
||||||
|
let num_small_fragments = sorted_sizes
|
||||||
|
.iter()
|
||||||
|
.filter(|&&size| size < small_frag_threshold)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
let p25 = *sorted_sizes.get(num_fragments / 4).unwrap_or(&0);
|
||||||
|
let p50 = *sorted_sizes.get(num_fragments / 2).unwrap_or(&0);
|
||||||
|
let p75 = *sorted_sizes.get(num_fragments * 3 / 4).unwrap_or(&0);
|
||||||
|
let p99 = *sorted_sizes.get(num_fragments * 99 / 100).unwrap_or(&0);
|
||||||
|
let min = sorted_sizes.first().copied().unwrap_or(0);
|
||||||
|
let max = sorted_sizes.last().copied().unwrap_or(0);
|
||||||
|
let mean = if num_fragments == 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
sorted_sizes.iter().copied().sum::<usize>() / num_fragments
|
||||||
|
};
|
||||||
|
|
||||||
|
let frag_stats = FragmentStatistics {
|
||||||
|
num_fragments,
|
||||||
|
num_small_fragments,
|
||||||
|
lengths: FragmentSummaryStats {
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
mean,
|
||||||
|
p25,
|
||||||
|
p50,
|
||||||
|
p75,
|
||||||
|
p99,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let stats = TableStatistics {
|
||||||
|
total_bytes,
|
||||||
|
num_rows,
|
||||||
|
num_indices,
|
||||||
|
fragment_stats: frag_stats,
|
||||||
|
};
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[skip_serializing_none]
|
||||||
|
#[derive(Debug, Deserialize, PartialEq)]
|
||||||
|
pub struct TableStatistics {
|
||||||
|
/// The total number of bytes in the table
|
||||||
|
pub total_bytes: usize,
|
||||||
|
|
||||||
|
/// The number of rows in the table
|
||||||
|
pub num_rows: usize,
|
||||||
|
|
||||||
|
/// The number of indices in the table
|
||||||
|
pub num_indices: usize,
|
||||||
|
|
||||||
|
/// Statistics on table fragments
|
||||||
|
pub fragment_stats: FragmentStatistics,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[skip_serializing_none]
|
||||||
|
#[derive(Debug, Deserialize, PartialEq)]
|
||||||
|
pub struct FragmentStatistics {
|
||||||
|
/// The number of fragments in the table
|
||||||
|
pub num_fragments: usize,
|
||||||
|
|
||||||
|
/// The number of uncompacted fragments in the table
|
||||||
|
pub num_small_fragments: usize,
|
||||||
|
|
||||||
|
/// Statistics on the number of rows in the table fragments
|
||||||
|
pub lengths: FragmentSummaryStats,
|
||||||
|
// todo: add size statistics
|
||||||
|
// /// Statistics on the number of bytes in the table fragments
|
||||||
|
// sizes: FragmentStats,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[skip_serializing_none]
|
||||||
|
#[derive(Debug, Deserialize, PartialEq)]
|
||||||
|
pub struct FragmentSummaryStats {
|
||||||
|
pub min: usize,
|
||||||
|
pub max: usize,
|
||||||
|
pub mean: usize,
|
||||||
|
pub p25: usize,
|
||||||
|
pub p50: usize,
|
||||||
|
pub p75: usize,
|
||||||
|
pub p99: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -3945,4 +4058,108 @@ mod tests {
|
|||||||
Some(&"test_field_val1".to_string())
|
Some(&"test_field_val1".to_string())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
pub async fn test_stats() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(vec![
|
||||||
|
Field::new("id", DataType::Int32, false),
|
||||||
|
Field::new("foo", DataType::Int32, true),
|
||||||
|
]));
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![
|
||||||
|
Arc::new(Int32Array::from_iter_values(0..100)),
|
||||||
|
Arc::new(Int32Array::from_iter_values(0..100)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_stats",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
for _ in 0..10 {
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![
|
||||||
|
Arc::new(Int32Array::from_iter_values(0..15)),
|
||||||
|
Arc::new(Int32Array::from_iter_values(0..15)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
table
|
||||||
|
.add(RecordBatchIterator::new(
|
||||||
|
vec![Ok(batch.clone())],
|
||||||
|
batch.schema(),
|
||||||
|
))
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let empty_table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_stats_empty",
|
||||||
|
RecordBatchIterator::new(vec![], batch.schema()),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let res = table.stats().await.unwrap();
|
||||||
|
println!("{:#?}", res);
|
||||||
|
assert_eq!(
|
||||||
|
res,
|
||||||
|
TableStatistics {
|
||||||
|
num_rows: 250,
|
||||||
|
num_indices: 0,
|
||||||
|
total_bytes: 2000,
|
||||||
|
fragment_stats: FragmentStatistics {
|
||||||
|
num_fragments: 11,
|
||||||
|
num_small_fragments: 11,
|
||||||
|
lengths: FragmentSummaryStats {
|
||||||
|
min: 15,
|
||||||
|
max: 100,
|
||||||
|
mean: 22,
|
||||||
|
p25: 15,
|
||||||
|
p50: 15,
|
||||||
|
p75: 15,
|
||||||
|
p99: 100,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
let res = empty_table.stats().await.unwrap();
|
||||||
|
println!("{:#?}", res);
|
||||||
|
assert_eq!(
|
||||||
|
res,
|
||||||
|
TableStatistics {
|
||||||
|
num_rows: 0,
|
||||||
|
num_indices: 0,
|
||||||
|
total_bytes: 0,
|
||||||
|
fragment_stats: FragmentStatistics {
|
||||||
|
num_fragments: 0,
|
||||||
|
num_small_fragments: 0,
|
||||||
|
lengths: FragmentSummaryStats {
|
||||||
|
min: 0,
|
||||||
|
max: 0,
|
||||||
|
mean: 0,
|
||||||
|
p25: 0,
|
||||||
|
p50: 0,
|
||||||
|
p75: 0,
|
||||||
|
p99: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::RecordBatchReader;
|
use arrow_array::RecordBatchReader;
|
||||||
|
use lance::dataset::MergeStats;
|
||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@@ -86,8 +87,9 @@ impl MergeInsertBuilder {
|
|||||||
|
|
||||||
/// Executes the merge insert operation
|
/// Executes the merge insert operation
|
||||||
///
|
///
|
||||||
/// Nothing is returned but the [`super::Table`] is updated
|
/// Returns statistics about the merge operation including the number of rows
|
||||||
pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<()> {
|
/// inserted, updated, and deleted.
|
||||||
|
pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<MergeStats> {
|
||||||
self.table.clone().merge_insert(self, new_data).await
|
self.table.clone().merge_insert(self, new_data).await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user