mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 12:52:58 +00:00
Compare commits
1 Commits
python-v0.
...
v0.22.1-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eea8f28d78 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.1-beta.3"
|
current_version = "0.22.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
4
.github/workflows/docs_test.yml
vendored
4
.github/workflows/docs_test.yml
vendored
@@ -24,8 +24,7 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
test-python:
|
test-python:
|
||||||
name: Test doc python code
|
name: Test doc python code
|
||||||
runs-on: warp-ubuntu-2204-x64-8x
|
runs-on: ubuntu-24.04
|
||||||
timeout-minutes: 60
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -49,6 +48,7 @@ jobs:
|
|||||||
uses: swatinem/rust-cache@v2
|
uses: swatinem/rust-cache@v2
|
||||||
- name: Build Python
|
- name: Build Python
|
||||||
working-directory: docs/test
|
working-directory: docs/test
|
||||||
|
timeout-minutes: 60
|
||||||
run:
|
run:
|
||||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
|
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
|
||||||
- name: Create test files
|
- name: Create test files
|
||||||
|
|||||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
|||||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||||
mac:
|
mac:
|
||||||
timeout-minutes: 90
|
timeout-minutes: 60
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ${{ matrix.config.runner }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@@ -64,7 +64,7 @@ jobs:
|
|||||||
- target: x86_64-apple-darwin
|
- target: x86_64-apple-darwin
|
||||||
runner: macos-13
|
runner: macos-13
|
||||||
- target: aarch64-apple-darwin
|
- target: aarch64-apple-darwin
|
||||||
runner: warp-macos-14-arm64-6x
|
runner: macos-14
|
||||||
env:
|
env:
|
||||||
MACOSX_DEPLOYMENT_TARGET: 10.15
|
MACOSX_DEPLOYMENT_TARGET: 10.15
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
112
Cargo.lock
generated
112
Cargo.lock
generated
@@ -713,9 +713,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-s3"
|
name = "aws-sdk-s3"
|
||||||
version = "1.105.0"
|
version = "1.104.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c99789e929b5e1d9a5aa3fa1d81317f3a789afc796141d11b0eaafd9d9f47e38"
|
checksum = "38c488cd6abb0ec9811c401894191932e941c5f91dc226043edacd0afa1634bc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
@@ -963,9 +963,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-smithy-runtime"
|
name = "aws-smithy-runtime"
|
||||||
version = "1.9.2"
|
version = "1.9.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
|
checksum = "d3946acbe1ead1301ba6862e712c7903ca9bb230bdf1fbd1b5ac54158ef2ab1f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http",
|
||||||
@@ -2929,18 +2929,6 @@ version = "0.2.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
|
checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fastbloom"
|
|
||||||
version = "0.14.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a"
|
|
||||||
dependencies = [
|
|
||||||
"getrandom 0.3.3",
|
|
||||||
"libm",
|
|
||||||
"rand 0.9.2",
|
|
||||||
"siphasher",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastdivide"
|
name = "fastdivide"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@@ -3040,9 +3028,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fsst"
|
name = "fsst"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "fe0a0b1d16ce6b863be8ab766004d89ebf0779fd6ce31b0ef3bbc7fedaaad373"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"rand 0.9.2",
|
"rand 0.9.2",
|
||||||
@@ -4219,9 +4206,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance"
|
name = "lance"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "42171f2af5d377e6bbcc8a8572144ee15b73a8f78ceb6160f1adeabf0d0f3e3c"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -4284,9 +4270,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-arrow"
|
name = "lance-arrow"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "25ef9499a1e581112f45fbf743fdc8e24830cda0bd13396b11c71aa6e6cba083"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4304,9 +4289,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-bitpacking"
|
name = "lance-bitpacking"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "1101fffd5b161bbdc6e932d6c0a7f94cb1752b0f8cd6d18ef9064052ab901a84"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayref",
|
"arrayref",
|
||||||
"paste",
|
"paste",
|
||||||
@@ -4315,9 +4299,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-core"
|
name = "lance-core"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "527ee5e6472d058d8c66c702fbe318a3f60f971e652e60dcfc6349bdbc9b0733"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4352,9 +4335,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datafusion"
|
name = "lance-datafusion"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "65a80f7f15f2d941ec7b8253625cbb8e12081ea27584dd1fbc657fb9fb377f7a"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4383,9 +4365,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datagen"
|
name = "lance-datagen"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "0495c8afa18f246ac4b337c47d7827560283783963dd2177862d91161478fd79"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4402,9 +4383,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-encoding"
|
name = "lance-encoding"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "0e80e9ae49d68b95d58e77d9177f68983dce4f0803ef42840e1631b38dd66adc"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4432,7 +4412,6 @@ dependencies = [
|
|||||||
"prost-types",
|
"prost-types",
|
||||||
"rand 0.9.2",
|
"rand 0.9.2",
|
||||||
"snafu",
|
"snafu",
|
||||||
"strum",
|
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
"xxhash-rust",
|
"xxhash-rust",
|
||||||
@@ -4441,9 +4420,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-file"
|
name = "lance-file"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "f1707f9f5097b36c82d3a8524bb41c762c80d5dfa5e32aa7bfc6a1c0847a1cce"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4477,9 +4455,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-index"
|
name = "lance-index"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "28ab52586a5a7f5371a5abf4862968231f8c0232ce0780bc456f1ec16e9370f9"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4500,7 +4477,6 @@ dependencies = [
|
|||||||
"datafusion-sql",
|
"datafusion-sql",
|
||||||
"deepsize",
|
"deepsize",
|
||||||
"dirs",
|
"dirs",
|
||||||
"fastbloom",
|
|
||||||
"fst",
|
"fst",
|
||||||
"futures",
|
"futures",
|
||||||
"half",
|
"half",
|
||||||
@@ -4515,7 +4491,6 @@ dependencies = [
|
|||||||
"lance-io",
|
"lance-io",
|
||||||
"lance-linalg",
|
"lance-linalg",
|
||||||
"lance-table",
|
"lance-table",
|
||||||
"libm",
|
|
||||||
"log",
|
"log",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"object_store",
|
"object_store",
|
||||||
@@ -4532,15 +4507,13 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
"twox-hash",
|
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-io"
|
name = "lance-io"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "d606f9f6a7f8ec2cacf28dfce7b2fc39e7db9f0ec77f907b8e47c756e3dd163b"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -4580,9 +4553,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-linalg"
|
name = "lance-linalg"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "c9f1a94a5d966ff1eae817a835e3a57b34f73300f83a43bb28e7e2806695b8ba"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -4605,9 +4577,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-table"
|
name = "lance-table"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "fac5c0ca6e5c285645465b95fb99fc464a1fd22a6d4b32ae0e0760f06b4b8a7f"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4645,9 +4616,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-testing"
|
name = "lance-testing"
|
||||||
version = "0.37.0"
|
version = "0.35.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2"
|
||||||
checksum = "384acc1dd13379a2ae24f3e3635d9c1f4fb4dc1534f7ffd2740c268f2eb73455"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-schema",
|
"arrow-schema",
|
||||||
@@ -4658,7 +4628,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.1-beta.3"
|
version = "0.22.1-beta.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4745,7 +4715,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.22.1-beta.3"
|
version = "0.22.1-beta.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4765,7 +4735,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.1-beta.3"
|
version = "0.25.1-beta.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -7811,15 +7781,6 @@ version = "0.11.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "strum"
|
|
||||||
version = "0.25.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
|
|
||||||
dependencies = [
|
|
||||||
"strum_macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strum_macros"
|
name = "strum_macros"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
@@ -8480,9 +8441,6 @@ name = "twox-hash"
|
|||||||
version = "2.1.1"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
|
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
|
||||||
dependencies = [
|
|
||||||
"rand 0.9.2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typenum"
|
name = "typenum"
|
||||||
|
|||||||
16
Cargo.toml
16
Cargo.toml
@@ -15,14 +15,14 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] }
|
lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-io = { "version" = "=0.37.0", default-features = false }
|
lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-index = "=0.37.0"
|
lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-linalg = "=0.37.0"
|
lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-table = "=0.37.0"
|
lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-testing = "=0.37.0"
|
lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-datafusion = "=0.37.0"
|
lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
lance-encoding = "=0.37.0"
|
lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "55.1", optional = false }
|
arrow = { version = "55.1", optional = false }
|
||||||
arrow-array = "55.1"
|
arrow-array = "55.1"
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -19,12 +18,8 @@ def run_command(command: str) -> str:
|
|||||||
|
|
||||||
def get_latest_stable_version() -> str:
|
def get_latest_stable_version() -> str:
|
||||||
version_line = run_command("cargo info lance | grep '^version:'")
|
version_line = run_command("cargo info lance | grep '^version:'")
|
||||||
# Example output: "version: 0.35.0 (latest 0.37.0)"
|
version = version_line.split(" ")[1].strip()
|
||||||
match = re.search(r'\(latest ([0-9.]+)\)', version_line)
|
return version
|
||||||
if match:
|
|
||||||
return match.group(1)
|
|
||||||
# Fallback: use the first version after 'version:'
|
|
||||||
return version_line.split("version:")[1].split()[0].strip()
|
|
||||||
|
|
||||||
|
|
||||||
def get_latest_preview_version() -> str:
|
def get_latest_preview_version() -> str:
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ pub trait JNIEnvExt {
|
|||||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||||
|
|
||||||
/// Get strings from Java List<String> object.
|
/// Get strings from Java List<String> object.
|
||||||
#[allow(dead_code)]
|
|
||||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||||
|
|
||||||
/// Get strings from Java String[] object.
|
/// Get strings from Java String[] object.
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ use jni::JNIEnv;
|
|||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub trait FromJObject<T> {
|
pub trait FromJObject<T> {
|
||||||
fn extract(&self) -> Result<T>;
|
fn extract(&self) -> Result<T>;
|
||||||
}
|
}
|
||||||
@@ -40,7 +39,6 @@ impl FromJObject<f64> for JObject<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub trait FromJString {
|
pub trait FromJString {
|
||||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||||
}
|
}
|
||||||
@@ -68,7 +66,6 @@ pub trait JMapExt {
|
|||||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||||
where
|
where
|
||||||
for<'a> JObject<'a>: FromJObject<T>,
|
for<'a> JObject<'a>: FromJObject<T>,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.3</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.3</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.3</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.22.1-beta.3"
|
version = "0.22.1-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -1008,64 +1008,5 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(result).toEqual(null);
|
expect(result).toEqual(null);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("boolean null handling", function () {
|
|
||||||
it("should handle null values in nullable boolean fields", () => {
|
|
||||||
const { makeArrowTable } = require("../lancedb/arrow");
|
|
||||||
const schema = new Schema([new Field("test", new arrow.Bool(), true)]);
|
|
||||||
|
|
||||||
// Test with all null values
|
|
||||||
const data = [{ test: null }];
|
|
||||||
const table = makeArrowTable(data, { schema });
|
|
||||||
|
|
||||||
expect(table.numRows).toBe(1);
|
|
||||||
expect(table.schema.names).toEqual(["test"]);
|
|
||||||
expect(table.getChild("test")!.get(0)).toBeNull();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should handle mixed null and non-null boolean values", () => {
|
|
||||||
const { makeArrowTable } = require("../lancedb/arrow");
|
|
||||||
const schema = new Schema([new Field("test", new Bool(), true)]);
|
|
||||||
|
|
||||||
// Test with mixed values
|
|
||||||
const data = [{ test: true }, { test: null }, { test: false }];
|
|
||||||
const table = makeArrowTable(data, { schema });
|
|
||||||
|
|
||||||
expect(table.numRows).toBe(3);
|
|
||||||
expect(table.getChild("test")!.get(0)).toBe(true);
|
|
||||||
expect(table.getChild("test")!.get(1)).toBeNull();
|
|
||||||
expect(table.getChild("test")!.get(2)).toBe(false);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
// Test for the undefined values bug fix
|
|
||||||
describe("undefined values handling", () => {
|
|
||||||
it("should handle mixed undefined and actual values", () => {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field("text", new Utf8(), true), // nullable
|
|
||||||
new Field("number", new Int32(), true), // nullable
|
|
||||||
new Field("bool", new Bool(), true), // nullable
|
|
||||||
]);
|
|
||||||
|
|
||||||
const data = [
|
|
||||||
{ text: undefined, number: 42, bool: true },
|
|
||||||
{ text: "hello", number: undefined, bool: false },
|
|
||||||
{ text: "world", number: 123, bool: undefined },
|
|
||||||
];
|
|
||||||
const table = makeArrowTable(data, { schema });
|
|
||||||
|
|
||||||
const result = table.toArray();
|
|
||||||
expect(result).toHaveLength(3);
|
|
||||||
expect(result[0].text).toBe(null);
|
|
||||||
expect(result[0].number).toBe(42);
|
|
||||||
expect(result[0].bool).toBe(true);
|
|
||||||
expect(result[1].text).toBe("hello");
|
|
||||||
expect(result[1].number).toBe(null);
|
|
||||||
expect(result[1].bool).toBe(false);
|
|
||||||
expect(result[2].text).toBe("world");
|
|
||||||
expect(result[2].number).toBe(123);
|
|
||||||
expect(result[2].bool).toBe(null);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -203,106 +203,3 @@ describe("given a connection", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("clone table functionality", () => {
|
|
||||||
let tmpDir: tmp.DirResult;
|
|
||||||
let db: Connection;
|
|
||||||
beforeEach(async () => {
|
|
||||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
||||||
db = await connect(tmpDir.name);
|
|
||||||
});
|
|
||||||
afterEach(() => tmpDir.removeCallback());
|
|
||||||
|
|
||||||
it("should clone a table with latest version (default behavior)", async () => {
|
|
||||||
// Create source table with some data
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
|
||||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
|
||||||
];
|
|
||||||
const sourceTable = await db.createTable("source", data);
|
|
||||||
|
|
||||||
// Add more data to create a new version
|
|
||||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
|
||||||
await sourceTable.add(moreData);
|
|
||||||
|
|
||||||
// Clone the table (should get latest version with 3 rows)
|
|
||||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
|
||||||
const clonedTable = await db.cloneTable("cloned", sourceUri);
|
|
||||||
|
|
||||||
// Verify cloned table has all 3 rows
|
|
||||||
expect(await clonedTable.countRows()).toBe(3);
|
|
||||||
expect((await db.tableNames()).includes("cloned")).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should clone a table from a specific version", async () => {
|
|
||||||
// Create source table with initial data
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
|
||||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
|
||||||
];
|
|
||||||
const sourceTable = await db.createTable("source", data);
|
|
||||||
|
|
||||||
// Get the initial version
|
|
||||||
const initialVersion = await sourceTable.version();
|
|
||||||
|
|
||||||
// Add more data to create a new version
|
|
||||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
|
||||||
await sourceTable.add(moreData);
|
|
||||||
|
|
||||||
// Verify source now has 3 rows
|
|
||||||
expect(await sourceTable.countRows()).toBe(3);
|
|
||||||
|
|
||||||
// Clone from the initial version (should have only 2 rows)
|
|
||||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
|
||||||
const clonedTable = await db.cloneTable("cloned", sourceUri, {
|
|
||||||
sourceVersion: initialVersion,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Verify cloned table has only the initial 2 rows
|
|
||||||
expect(await clonedTable.countRows()).toBe(2);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should clone a table from a tagged version", async () => {
|
|
||||||
// Create source table with initial data
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
|
||||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
|
||||||
];
|
|
||||||
const sourceTable = await db.createTable("source", data);
|
|
||||||
|
|
||||||
// Create a tag for the current version
|
|
||||||
const tags = await sourceTable.tags();
|
|
||||||
await tags.create("v1.0", await sourceTable.version());
|
|
||||||
|
|
||||||
// Add more data after the tag
|
|
||||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
|
||||||
await sourceTable.add(moreData);
|
|
||||||
|
|
||||||
// Verify source now has 3 rows
|
|
||||||
expect(await sourceTable.countRows()).toBe(3);
|
|
||||||
|
|
||||||
// Clone from the tagged version (should have only 2 rows)
|
|
||||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
|
||||||
const clonedTable = await db.cloneTable("cloned", sourceUri, {
|
|
||||||
sourceTag: "v1.0",
|
|
||||||
});
|
|
||||||
|
|
||||||
// Verify cloned table has only the tagged version's 2 rows
|
|
||||||
expect(await clonedTable.countRows()).toBe(2);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should fail when attempting deep clone", async () => {
|
|
||||||
// Create source table with some data
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
|
||||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
|
||||||
];
|
|
||||||
await db.createTable("source", data);
|
|
||||||
|
|
||||||
// Try to create a deep clone (should fail)
|
|
||||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
|
||||||
await expect(
|
|
||||||
db.cloneTable("cloned", sourceUri, { isShallow: false }),
|
|
||||||
).rejects.toThrow("Deep clone is not yet implemented");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -256,60 +256,6 @@ describe("embedding functions", () => {
|
|||||||
expect(actual).toHaveProperty("text");
|
expect(actual).toHaveProperty("text");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should handle undefined vector field with embedding function correctly", async () => {
|
|
||||||
@register("undefined_test")
|
|
||||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
|
||||||
ndims() {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
embeddingDataType(): Float {
|
|
||||||
return new Float32();
|
|
||||||
}
|
|
||||||
async computeQueryEmbeddings(_data: string) {
|
|
||||||
return [1, 2, 3];
|
|
||||||
}
|
|
||||||
async computeSourceEmbeddings(data: string[]) {
|
|
||||||
return Array.from({ length: data.length }).fill([
|
|
||||||
1, 2, 3,
|
|
||||||
]) as number[][];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const func = getRegistry()
|
|
||||||
.get<MockEmbeddingFunction>("undefined_test")!
|
|
||||||
.create();
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field("text", new Utf8(), true),
|
|
||||||
new Field(
|
|
||||||
"vector",
|
|
||||||
new FixedSizeList(3, new Field("item", new Float32(), true)),
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
]);
|
|
||||||
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const table = await db.createEmptyTable("test_undefined", schema, {
|
|
||||||
embeddingFunction: {
|
|
||||||
function: func,
|
|
||||||
sourceColumn: "text",
|
|
||||||
vectorColumn: "vector",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// Test that undefined, null, and omitted vector fields all work
|
|
||||||
await table.add([{ text: "test1", vector: undefined }]);
|
|
||||||
await table.add([{ text: "test2", vector: null }]);
|
|
||||||
await table.add([{ text: "test3" }]);
|
|
||||||
|
|
||||||
const rows = await table.query().toArray();
|
|
||||||
expect(rows.length).toBe(3);
|
|
||||||
|
|
||||||
// All rows should have vectors computed by the embedding function
|
|
||||||
for (const row of rows) {
|
|
||||||
expect(row.vector).toBeDefined();
|
|
||||||
expect(JSON.parse(JSON.stringify(row.vector))).toEqual([1, 2, 3]);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
test.each([new Float16(), new Float32(), new Float64()])(
|
test.each([new Float16(), new Float32(), new Float64()])(
|
||||||
"should be able to provide manual embeddings with multiple float datatype",
|
"should be able to provide manual embeddings with multiple float datatype",
|
||||||
async (floatType) => {
|
async (floatType) => {
|
||||||
|
|||||||
@@ -512,11 +512,7 @@ function* rowPathsAndValues(
|
|||||||
if (isObject(value)) {
|
if (isObject(value)) {
|
||||||
yield* rowPathsAndValues(value, [...basePath, key]);
|
yield* rowPathsAndValues(value, [...basePath, key]);
|
||||||
} else {
|
} else {
|
||||||
// Skip undefined values - they should be treated the same as missing fields
|
yield [[...basePath, key], value];
|
||||||
// for embedding function purposes
|
|
||||||
if (value !== undefined) {
|
|
||||||
yield [[...basePath, key], value];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -705,7 +701,7 @@ function transposeData(
|
|||||||
}
|
}
|
||||||
return current;
|
return current;
|
||||||
});
|
});
|
||||||
return makeVector(values, field.type, undefined, field.nullable);
|
return makeVector(values, field.type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -752,30 +748,9 @@ function makeVector(
|
|||||||
values: unknown[],
|
values: unknown[],
|
||||||
type?: DataType,
|
type?: DataType,
|
||||||
stringAsDictionary?: boolean,
|
stringAsDictionary?: boolean,
|
||||||
nullable?: boolean,
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||||
): Vector<any> {
|
): Vector<any> {
|
||||||
if (type !== undefined) {
|
if (type !== undefined) {
|
||||||
// Convert undefined values to null for nullable fields
|
|
||||||
if (nullable) {
|
|
||||||
values = values.map((v) => (v === undefined ? null : v));
|
|
||||||
}
|
|
||||||
|
|
||||||
// workaround for: https://github.com/apache/arrow-js/issues/68
|
|
||||||
if (DataType.isBool(type)) {
|
|
||||||
const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
|
|
||||||
if (!hasNonNullValue) {
|
|
||||||
const nullBitmap = new Uint8Array(Math.ceil(values.length / 8));
|
|
||||||
const data = makeData({
|
|
||||||
type: type,
|
|
||||||
length: values.length,
|
|
||||||
nullCount: values.length,
|
|
||||||
nullBitmap,
|
|
||||||
});
|
|
||||||
return arrowMakeVector(data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No need for inference, let Arrow create it
|
// No need for inference, let Arrow create it
|
||||||
if (type instanceof Int) {
|
if (type instanceof Int) {
|
||||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
if (DataType.isInt(type) && type.bitWidth === 64) {
|
||||||
@@ -900,12 +875,7 @@ async function applyEmbeddingsFromMetadata(
|
|||||||
for (const field of schema.fields) {
|
for (const field of schema.fields) {
|
||||||
if (!(field.name in columns)) {
|
if (!(field.name in columns)) {
|
||||||
const nullValues = new Array(table.numRows).fill(null);
|
const nullValues = new Array(table.numRows).fill(null);
|
||||||
columns[field.name] = makeVector(
|
columns[field.name] = makeVector(nullValues, field.type);
|
||||||
nullValues,
|
|
||||||
field.type,
|
|
||||||
undefined,
|
|
||||||
field.nullable,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -969,12 +939,7 @@ async function applyEmbeddings<T>(
|
|||||||
} else if (schema != null) {
|
} else if (schema != null) {
|
||||||
const destField = schema.fields.find((f) => f.name === destColumn);
|
const destField = schema.fields.find((f) => f.name === destColumn);
|
||||||
if (destField != null) {
|
if (destField != null) {
|
||||||
newColumns[destColumn] = makeVector(
|
newColumns[destColumn] = makeVector([], destField.type);
|
||||||
[],
|
|
||||||
destField.type,
|
|
||||||
undefined,
|
|
||||||
destField.nullable,
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
|
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
|
||||||
|
|||||||
@@ -268,33 +268,6 @@ export abstract class Connection {
|
|||||||
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
||||||
*/
|
*/
|
||||||
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
||||||
|
|
||||||
/**
|
|
||||||
* Clone a table from a source table.
|
|
||||||
*
|
|
||||||
* A shallow clone creates a new table that shares the underlying data files
|
|
||||||
* with the source table but has its own independent manifest. This allows
|
|
||||||
* both the source and cloned tables to evolve independently while initially
|
|
||||||
* sharing the same data, deletion, and index files.
|
|
||||||
*
|
|
||||||
* @param {string} targetTableName - The name of the target table to create.
|
|
||||||
* @param {string} sourceUri - The URI of the source table to clone from.
|
|
||||||
* @param {object} options - Clone options.
|
|
||||||
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
|
|
||||||
* @param {number} options.sourceVersion - The version of the source table to clone.
|
|
||||||
* @param {string} options.sourceTag - The tag of the source table to clone.
|
|
||||||
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
|
|
||||||
*/
|
|
||||||
abstract cloneTable(
|
|
||||||
targetTableName: string,
|
|
||||||
sourceUri: string,
|
|
||||||
options?: {
|
|
||||||
targetNamespace?: string[];
|
|
||||||
sourceVersion?: number;
|
|
||||||
sourceTag?: string;
|
|
||||||
isShallow?: boolean;
|
|
||||||
},
|
|
||||||
): Promise<Table>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @hideconstructor */
|
/** @hideconstructor */
|
||||||
@@ -359,28 +332,6 @@ export class LocalConnection extends Connection {
|
|||||||
return new LocalTable(innerTable);
|
return new LocalTable(innerTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
async cloneTable(
|
|
||||||
targetTableName: string,
|
|
||||||
sourceUri: string,
|
|
||||||
options?: {
|
|
||||||
targetNamespace?: string[];
|
|
||||||
sourceVersion?: number;
|
|
||||||
sourceTag?: string;
|
|
||||||
isShallow?: boolean;
|
|
||||||
},
|
|
||||||
): Promise<Table> {
|
|
||||||
const innerTable = await this.inner.cloneTable(
|
|
||||||
targetTableName,
|
|
||||||
sourceUri,
|
|
||||||
options?.targetNamespace ?? [],
|
|
||||||
options?.sourceVersion ?? null,
|
|
||||||
options?.sourceTag ?? null,
|
|
||||||
options?.isShallow ?? true,
|
|
||||||
);
|
|
||||||
|
|
||||||
return new LocalTable(innerTable);
|
|
||||||
}
|
|
||||||
|
|
||||||
private getStorageOptions(
|
private getStorageOptions(
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Record<string, string> | undefined {
|
): Record<string, string> | undefined {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.22.1-beta.3",
|
"version": "0.22.1-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -213,36 +213,6 @@ impl Connection {
|
|||||||
Ok(Table::new(tbl))
|
Ok(Table::new(tbl))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn clone_table(
|
|
||||||
&self,
|
|
||||||
target_table_name: String,
|
|
||||||
source_uri: String,
|
|
||||||
target_namespace: Vec<String>,
|
|
||||||
source_version: Option<i64>,
|
|
||||||
source_tag: Option<String>,
|
|
||||||
is_shallow: bool,
|
|
||||||
) -> napi::Result<Table> {
|
|
||||||
let mut builder = self
|
|
||||||
.get_inner()?
|
|
||||||
.clone_table(&target_table_name, &source_uri);
|
|
||||||
|
|
||||||
builder = builder.target_namespace(target_namespace);
|
|
||||||
|
|
||||||
if let Some(version) = source_version {
|
|
||||||
builder = builder.source_version(version as u64);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(tag) = source_tag {
|
|
||||||
builder = builder.source_tag(tag);
|
|
||||||
}
|
|
||||||
|
|
||||||
builder = builder.is_shallow(is_shallow);
|
|
||||||
|
|
||||||
let tbl = builder.execute().await.default_error()?;
|
|
||||||
Ok(Table::new(tbl))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
|
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.1"
|
current_version = "0.25.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.1"
|
version = "0.25.1-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -60,15 +60,6 @@ class Connection(object):
|
|||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
async def clone_table(
|
|
||||||
self,
|
|
||||||
target_table_name: str,
|
|
||||||
source_uri: str,
|
|
||||||
target_namespace: List[str] = [],
|
|
||||||
source_version: Optional[int] = None,
|
|
||||||
source_tag: Optional[str] = None,
|
|
||||||
is_shallow: bool = True,
|
|
||||||
) -> Table: ...
|
|
||||||
async def rename_table(
|
async def rename_table(
|
||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
|
|||||||
@@ -665,60 +665,6 @@ class LanceDBConnection(DBConnection):
|
|||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
def clone_table(
|
|
||||||
self,
|
|
||||||
target_table_name: str,
|
|
||||||
source_uri: str,
|
|
||||||
*,
|
|
||||||
target_namespace: List[str] = [],
|
|
||||||
source_version: Optional[int] = None,
|
|
||||||
source_tag: Optional[str] = None,
|
|
||||||
is_shallow: bool = True,
|
|
||||||
) -> LanceTable:
|
|
||||||
"""Clone a table from a source table.
|
|
||||||
|
|
||||||
A shallow clone creates a new table that shares the underlying data files
|
|
||||||
with the source table but has its own independent manifest. This allows
|
|
||||||
both the source and cloned tables to evolve independently while initially
|
|
||||||
sharing the same data, deletion, and index files.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
target_table_name: str
|
|
||||||
The name of the target table to create.
|
|
||||||
source_uri: str
|
|
||||||
The URI of the source table to clone from.
|
|
||||||
target_namespace: List[str], optional
|
|
||||||
The namespace for the target table.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
source_version: int, optional
|
|
||||||
The version of the source table to clone.
|
|
||||||
source_tag: str, optional
|
|
||||||
The tag of the source table to clone.
|
|
||||||
is_shallow: bool, default True
|
|
||||||
Whether to perform a shallow clone (True) or deep clone (False).
|
|
||||||
Currently only shallow clone is supported.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
A LanceTable object representing the cloned table.
|
|
||||||
"""
|
|
||||||
LOOP.run(
|
|
||||||
self._conn.clone_table(
|
|
||||||
target_table_name,
|
|
||||||
source_uri,
|
|
||||||
target_namespace=target_namespace,
|
|
||||||
source_version=source_version,
|
|
||||||
source_tag=source_tag,
|
|
||||||
is_shallow=is_shallow,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return LanceTable.open(
|
|
||||||
self,
|
|
||||||
target_table_name,
|
|
||||||
namespace=target_namespace,
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_table(
|
def drop_table(
|
||||||
self,
|
self,
|
||||||
@@ -1190,54 +1136,6 @@ class AsyncConnection(object):
|
|||||||
)
|
)
|
||||||
return AsyncTable(table)
|
return AsyncTable(table)
|
||||||
|
|
||||||
async def clone_table(
|
|
||||||
self,
|
|
||||||
target_table_name: str,
|
|
||||||
source_uri: str,
|
|
||||||
*,
|
|
||||||
target_namespace: List[str] = [],
|
|
||||||
source_version: Optional[int] = None,
|
|
||||||
source_tag: Optional[str] = None,
|
|
||||||
is_shallow: bool = True,
|
|
||||||
) -> AsyncTable:
|
|
||||||
"""Clone a table from a source table.
|
|
||||||
|
|
||||||
A shallow clone creates a new table that shares the underlying data files
|
|
||||||
with the source table but has its own independent manifest. This allows
|
|
||||||
both the source and cloned tables to evolve independently while initially
|
|
||||||
sharing the same data, deletion, and index files.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
target_table_name: str
|
|
||||||
The name of the target table to create.
|
|
||||||
source_uri: str
|
|
||||||
The URI of the source table to clone from.
|
|
||||||
target_namespace: List[str], optional
|
|
||||||
The namespace for the target table.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
source_version: int, optional
|
|
||||||
The version of the source table to clone.
|
|
||||||
source_tag: str, optional
|
|
||||||
The tag of the source table to clone.
|
|
||||||
is_shallow: bool, default True
|
|
||||||
Whether to perform a shallow clone (True) or deep clone (False).
|
|
||||||
Currently only shallow clone is supported.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
An AsyncTable object representing the cloned table.
|
|
||||||
"""
|
|
||||||
table = await self._inner.clone_table(
|
|
||||||
target_table_name,
|
|
||||||
source_uri,
|
|
||||||
target_namespace=target_namespace,
|
|
||||||
source_version=source_version,
|
|
||||||
source_tag=source_tag,
|
|
||||||
is_shallow=is_shallow,
|
|
||||||
)
|
|
||||||
return AsyncTable(table)
|
|
||||||
|
|
||||||
async def rename_table(
|
async def rename_table(
|
||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ class EmbeddingFunctionRegistry:
|
|||||||
obj["vector_column"]: EmbeddingFunctionConfig(
|
obj["vector_column"]: EmbeddingFunctionConfig(
|
||||||
vector_column=obj["vector_column"],
|
vector_column=obj["vector_column"],
|
||||||
source_column=obj["source_column"],
|
source_column=obj["source_column"],
|
||||||
function=self.get(obj["name"]).create(**obj["model"]),
|
function=self.get(obj["name"])(**obj["model"]),
|
||||||
)
|
)
|
||||||
for obj in raw_list
|
for obj in raw_list
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -251,13 +251,6 @@ class HnswPq:
|
|||||||
results. In most cases, there is no benefit to setting this higher than 500.
|
results. In most cases, there is no benefit to setting this higher than 500.
|
||||||
This value should be set to a value that is not less than `ef` in the
|
This value should be set to a value that is not less than `ef` in the
|
||||||
search phase.
|
search phase.
|
||||||
|
|
||||||
target_partition_size, default is 1,048,576
|
|
||||||
|
|
||||||
The target size of each partition.
|
|
||||||
|
|
||||||
This value controls the tradeoff between search performance and accuracy.
|
|
||||||
faster search but less accurate results as higher value.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -268,7 +261,6 @@ class HnswPq:
|
|||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
m: int = 20
|
m: int = 20
|
||||||
ef_construction: int = 300
|
ef_construction: int = 300
|
||||||
target_partition_size: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -359,12 +351,6 @@ class HnswSq:
|
|||||||
This value should be set to a value that is not less than `ef` in the search
|
This value should be set to a value that is not less than `ef` in the search
|
||||||
phase.
|
phase.
|
||||||
|
|
||||||
target_partition_size, default is 1,048,576
|
|
||||||
|
|
||||||
The target size of each partition.
|
|
||||||
|
|
||||||
This value controls the tradeoff between search performance and accuracy.
|
|
||||||
faster search but less accurate results as higher value.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -373,7 +359,6 @@ class HnswSq:
|
|||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
m: int = 20
|
m: int = 20
|
||||||
ef_construction: int = 300
|
ef_construction: int = 300
|
||||||
target_partition_size: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -459,20 +444,12 @@ class IvfFlat:
|
|||||||
cases the default should be sufficient.
|
cases the default should be sufficient.
|
||||||
|
|
||||||
The default value is 256.
|
The default value is 256.
|
||||||
|
|
||||||
target_partition_size, default is 8192
|
|
||||||
|
|
||||||
The target size of each partition.
|
|
||||||
|
|
||||||
This value controls the tradeoff between search performance and accuracy.
|
|
||||||
faster search but less accurate results as higher value.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
|
||||||
num_partitions: Optional[int] = None
|
num_partitions: Optional[int] = None
|
||||||
max_iterations: int = 50
|
max_iterations: int = 50
|
||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
target_partition_size: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -587,13 +564,6 @@ class IvfPq:
|
|||||||
cases the default should be sufficient.
|
cases the default should be sufficient.
|
||||||
|
|
||||||
The default value is 256.
|
The default value is 256.
|
||||||
|
|
||||||
target_partition_size, default is 8192
|
|
||||||
|
|
||||||
The target size of each partition.
|
|
||||||
|
|
||||||
This value controls the tradeoff between search performance and accuracy.
|
|
||||||
faster search but less accurate results as higher value.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -602,7 +572,6 @@ class IvfPq:
|
|||||||
num_bits: int = 8
|
num_bits: int = 8
|
||||||
max_iterations: int = 50
|
max_iterations: int = 50
|
||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
target_partition_size: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
|||||||
@@ -212,53 +212,6 @@ class RemoteDBConnection(DBConnection):
|
|||||||
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
||||||
return RemoteTable(table, self.db_name)
|
return RemoteTable(table, self.db_name)
|
||||||
|
|
||||||
def clone_table(
|
|
||||||
self,
|
|
||||||
target_table_name: str,
|
|
||||||
source_uri: str,
|
|
||||||
*,
|
|
||||||
target_namespace: List[str] = [],
|
|
||||||
source_version: Optional[int] = None,
|
|
||||||
source_tag: Optional[str] = None,
|
|
||||||
is_shallow: bool = True,
|
|
||||||
) -> Table:
|
|
||||||
"""Clone a table from a source table.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
target_table_name: str
|
|
||||||
The name of the target table to create.
|
|
||||||
source_uri: str
|
|
||||||
The URI of the source table to clone from.
|
|
||||||
target_namespace: List[str], optional
|
|
||||||
The namespace for the target table.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
source_version: int, optional
|
|
||||||
The version of the source table to clone.
|
|
||||||
source_tag: str, optional
|
|
||||||
The tag of the source table to clone.
|
|
||||||
is_shallow: bool, default True
|
|
||||||
Whether to perform a shallow clone (True) or deep clone (False).
|
|
||||||
Currently only shallow clone is supported.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
A RemoteTable object representing the cloned table.
|
|
||||||
"""
|
|
||||||
from .table import RemoteTable
|
|
||||||
|
|
||||||
table = LOOP.run(
|
|
||||||
self._conn.clone_table(
|
|
||||||
target_table_name,
|
|
||||||
source_uri,
|
|
||||||
target_namespace=target_namespace,
|
|
||||||
source_version=source_version,
|
|
||||||
source_tag=source_tag,
|
|
||||||
is_shallow=is_shallow,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return RemoteTable(table, self.db_name)
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from .linear_combination import LinearCombinationReranker
|
|||||||
from .openai import OpenaiReranker
|
from .openai import OpenaiReranker
|
||||||
from .jinaai import JinaReranker
|
from .jinaai import JinaReranker
|
||||||
from .rrf import RRFReranker
|
from .rrf import RRFReranker
|
||||||
from .mrr import MRRReranker
|
|
||||||
from .answerdotai import AnswerdotaiRerankers
|
from .answerdotai import AnswerdotaiRerankers
|
||||||
from .voyageai import VoyageAIReranker
|
from .voyageai import VoyageAIReranker
|
||||||
|
|
||||||
@@ -24,5 +23,4 @@ __all__ = [
|
|||||||
"RRFReranker",
|
"RRFReranker",
|
||||||
"AnswerdotaiRerankers",
|
"AnswerdotaiRerankers",
|
||||||
"VoyageAIReranker",
|
"VoyageAIReranker",
|
||||||
"MRRReranker",
|
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,169 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
|
|
||||||
from typing import Union, List, TYPE_CHECKING
|
|
||||||
import pyarrow as pa
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from collections import defaultdict
|
|
||||||
from .base import Reranker
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from ..table import LanceVectorQueryBuilder
|
|
||||||
|
|
||||||
|
|
||||||
class MRRReranker(Reranker):
|
|
||||||
"""
|
|
||||||
Reranks the results using Mean Reciprocal Rank (MRR) algorithm based
|
|
||||||
on the scores of vector and FTS search.
|
|
||||||
Algorithm reference - https://en.wikipedia.org/wiki/Mean_reciprocal_rank
|
|
||||||
|
|
||||||
MRR calculates the average of reciprocal ranks across different search results.
|
|
||||||
For each document, it computes the reciprocal of its rank in each system,
|
|
||||||
then takes the mean of these reciprocal ranks as the final score.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
weight_vector : float, default 0.5
|
|
||||||
Weight for vector search results (0.0 to 1.0)
|
|
||||||
weight_fts : float, default 0.5
|
|
||||||
Weight for FTS search results (0.0 to 1.0)
|
|
||||||
Note: weight_vector + weight_fts should equal 1.0
|
|
||||||
return_score : str, default "relevance"
|
|
||||||
Options are "relevance" or "all"
|
|
||||||
The type of score to return. If "relevance", will return only the relevance
|
|
||||||
score. If "all", will return all scores from the vector and FTS search along
|
|
||||||
with the relevance score.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
weight_vector: float = 0.5,
|
|
||||||
weight_fts: float = 0.5,
|
|
||||||
return_score="relevance",
|
|
||||||
):
|
|
||||||
if not (0.0 <= weight_vector <= 1.0):
|
|
||||||
raise ValueError("weight_vector must be between 0.0 and 1.0")
|
|
||||||
if not (0.0 <= weight_fts <= 1.0):
|
|
||||||
raise ValueError("weight_fts must be between 0.0 and 1.0")
|
|
||||||
if abs(weight_vector + weight_fts - 1.0) > 1e-6:
|
|
||||||
raise ValueError("weight_vector + weight_fts must equal 1.0")
|
|
||||||
|
|
||||||
super().__init__(return_score)
|
|
||||||
self.weight_vector = weight_vector
|
|
||||||
self.weight_fts = weight_fts
|
|
||||||
|
|
||||||
def rerank_hybrid(
|
|
||||||
self,
|
|
||||||
query: str, # noqa: F821
|
|
||||||
vector_results: pa.Table,
|
|
||||||
fts_results: pa.Table,
|
|
||||||
):
|
|
||||||
vector_ids = vector_results["_rowid"].to_pylist() if vector_results else []
|
|
||||||
fts_ids = fts_results["_rowid"].to_pylist() if fts_results else []
|
|
||||||
|
|
||||||
# Maps result_id to list of (type, reciprocal_rank)
|
|
||||||
mrr_score_map = defaultdict(list)
|
|
||||||
|
|
||||||
if vector_ids:
|
|
||||||
for rank, result_id in enumerate(vector_ids, 1):
|
|
||||||
reciprocal_rank = 1.0 / rank
|
|
||||||
mrr_score_map[result_id].append(("vector", reciprocal_rank))
|
|
||||||
|
|
||||||
if fts_ids:
|
|
||||||
for rank, result_id in enumerate(fts_ids, 1):
|
|
||||||
reciprocal_rank = 1.0 / rank
|
|
||||||
mrr_score_map[result_id].append(("fts", reciprocal_rank))
|
|
||||||
|
|
||||||
final_mrr_scores = {}
|
|
||||||
for result_id, scores in mrr_score_map.items():
|
|
||||||
vector_rr = 0.0
|
|
||||||
fts_rr = 0.0
|
|
||||||
|
|
||||||
for score_type, reciprocal_rank in scores:
|
|
||||||
if score_type == "vector":
|
|
||||||
vector_rr = reciprocal_rank
|
|
||||||
elif score_type == "fts":
|
|
||||||
fts_rr = reciprocal_rank
|
|
||||||
|
|
||||||
# If a document doesn't appear, its reciprocal rank is 0
|
|
||||||
weighted_mrr = self.weight_vector * vector_rr + self.weight_fts * fts_rr
|
|
||||||
final_mrr_scores[result_id] = weighted_mrr
|
|
||||||
|
|
||||||
combined_results = self.merge_results(vector_results, fts_results)
|
|
||||||
combined_row_ids = combined_results["_rowid"].to_pylist()
|
|
||||||
relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
|
|
||||||
combined_results = combined_results.append_column(
|
|
||||||
"_relevance_score", pa.array(relevance_scores, type=pa.float32())
|
|
||||||
)
|
|
||||||
combined_results = combined_results.sort_by(
|
|
||||||
[("_relevance_score", "descending")]
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.score == "relevance":
|
|
||||||
combined_results = self._keep_relevance_score(combined_results)
|
|
||||||
|
|
||||||
return combined_results
|
|
||||||
|
|
||||||
def rerank_multivector(
|
|
||||||
self,
|
|
||||||
vector_results: Union[List[pa.Table], List["LanceVectorQueryBuilder"]],
|
|
||||||
query: str = None,
|
|
||||||
deduplicate: bool = True, # noqa: F821
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Reranks the results from multiple vector searches using MRR algorithm.
|
|
||||||
Each vector search result is treated as a separate ranking system,
|
|
||||||
and MRR calculates the mean of reciprocal ranks across all systems.
|
|
||||||
This cannot reuse rerank_hybrid because MRR semantics require treating
|
|
||||||
each vector result as a separate ranking system.
|
|
||||||
"""
|
|
||||||
if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
|
|
||||||
raise ValueError(
|
|
||||||
"All elements in vector_results should be of the same type"
|
|
||||||
)
|
|
||||||
|
|
||||||
# avoid circular import
|
|
||||||
if type(vector_results[0]).__name__ == "LanceVectorQueryBuilder":
|
|
||||||
vector_results = [result.to_arrow() for result in vector_results]
|
|
||||||
elif not isinstance(vector_results[0], pa.Table):
|
|
||||||
raise ValueError(
|
|
||||||
"vector_results should be a list of pa.Table or LanceVectorQueryBuilder"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not all("_rowid" in result.column_names for result in vector_results):
|
|
||||||
raise ValueError(
|
|
||||||
"'_rowid' is required for deduplication. \
|
|
||||||
add _rowid to search results like this: \
|
|
||||||
`search().with_row_id(True)`"
|
|
||||||
)
|
|
||||||
|
|
||||||
mrr_score_map = defaultdict(list)
|
|
||||||
|
|
||||||
for result_table in vector_results:
|
|
||||||
result_ids = result_table["_rowid"].to_pylist()
|
|
||||||
for rank, result_id in enumerate(result_ids, 1):
|
|
||||||
reciprocal_rank = 1.0 / rank
|
|
||||||
mrr_score_map[result_id].append(reciprocal_rank)
|
|
||||||
|
|
||||||
final_mrr_scores = {}
|
|
||||||
for result_id, reciprocal_ranks in mrr_score_map.items():
|
|
||||||
mean_rr = np.mean(reciprocal_ranks)
|
|
||||||
final_mrr_scores[result_id] = mean_rr
|
|
||||||
|
|
||||||
combined = pa.concat_tables(vector_results, **self._concat_tables_args)
|
|
||||||
combined = self._deduplicate(combined)
|
|
||||||
|
|
||||||
combined_row_ids = combined["_rowid"].to_pylist()
|
|
||||||
|
|
||||||
relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
|
|
||||||
combined = combined.append_column(
|
|
||||||
"_relevance_score", pa.array(relevance_scores, type=pa.float32())
|
|
||||||
)
|
|
||||||
combined = combined.sort_by([("_relevance_score", "descending")])
|
|
||||||
|
|
||||||
if self.score == "relevance":
|
|
||||||
combined = self._keep_relevance_score(combined)
|
|
||||||
|
|
||||||
return combined
|
|
||||||
@@ -691,7 +691,6 @@ class Table(ABC):
|
|||||||
ef_construction: int = 300,
|
ef_construction: int = 300,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
train: bool = True,
|
train: bool = True,
|
||||||
target_partition_size: Optional[int] = None,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
|
|
||||||
@@ -1470,7 +1469,10 @@ class Table(ABC):
|
|||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
If True, retrain the vector indices, this would refine the IVF clustering
|
||||||
|
and quantization, which may improve the search accuracy. It's faster than
|
||||||
|
re-creating the index from scratch, so it's recommended to try this first,
|
||||||
|
when the data distribution has changed significantly.
|
||||||
|
|
||||||
Experimental API
|
Experimental API
|
||||||
----------------
|
----------------
|
||||||
@@ -2000,7 +2002,6 @@ class LanceTable(Table):
|
|||||||
*,
|
*,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
train: bool = True,
|
train: bool = True,
|
||||||
target_partition_size: Optional[int] = None,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table."""
|
"""Create an index on the table."""
|
||||||
if accelerator is not None:
|
if accelerator is not None:
|
||||||
@@ -2017,7 +2018,6 @@ class LanceTable(Table):
|
|||||||
num_bits=num_bits,
|
num_bits=num_bits,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
target_partition_size=target_partition_size,
|
|
||||||
)
|
)
|
||||||
self.checkout_latest()
|
self.checkout_latest()
|
||||||
return
|
return
|
||||||
@@ -2027,7 +2027,6 @@ class LanceTable(Table):
|
|||||||
num_partitions=num_partitions,
|
num_partitions=num_partitions,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
target_partition_size=target_partition_size,
|
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_PQ":
|
elif index_type == "IVF_PQ":
|
||||||
config = IvfPq(
|
config = IvfPq(
|
||||||
@@ -2037,7 +2036,6 @@ class LanceTable(Table):
|
|||||||
num_bits=num_bits,
|
num_bits=num_bits,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
target_partition_size=target_partition_size,
|
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_HNSW_PQ":
|
elif index_type == "IVF_HNSW_PQ":
|
||||||
config = HnswPq(
|
config = HnswPq(
|
||||||
@@ -2049,7 +2047,6 @@ class LanceTable(Table):
|
|||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
target_partition_size=target_partition_size,
|
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_HNSW_SQ":
|
elif index_type == "IVF_HNSW_SQ":
|
||||||
config = HnswSq(
|
config = HnswSq(
|
||||||
@@ -2059,7 +2056,6 @@ class LanceTable(Table):
|
|||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
target_partition_size=target_partition_size,
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown index type {index_type}")
|
raise ValueError(f"Unknown index type {index_type}")
|
||||||
@@ -2832,7 +2828,10 @@ class LanceTable(Table):
|
|||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
If True, retrain the vector indices, this would refine the IVF clustering
|
||||||
|
and quantization, which may improve the search accuracy. It's faster than
|
||||||
|
re-creating the index from scratch, so it's recommended to try this first,
|
||||||
|
when the data distribution has changed significantly.
|
||||||
|
|
||||||
Experimental API
|
Experimental API
|
||||||
----------------
|
----------------
|
||||||
@@ -4292,7 +4291,10 @@ class AsyncTable:
|
|||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
If True, retrain the vector indices, this would refine the IVF clustering
|
||||||
|
and quantization, which may improve the search accuracy. It's faster than
|
||||||
|
re-creating the index from scratch, so it's recommended to try this first,
|
||||||
|
when the data distribution has changed significantly.
|
||||||
|
|
||||||
Experimental API
|
Experimental API
|
||||||
----------------
|
----------------
|
||||||
@@ -4315,19 +4317,10 @@ class AsyncTable:
|
|||||||
cleanup_since_ms: Optional[int] = None
|
cleanup_since_ms: Optional[int] = None
|
||||||
if cleanup_older_than is not None:
|
if cleanup_older_than is not None:
|
||||||
cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
|
cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
|
||||||
|
|
||||||
if retrain:
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
warnings.warn(
|
|
||||||
"The 'retrain' parameter is deprecated and will be removed in a "
|
|
||||||
"future version.",
|
|
||||||
DeprecationWarning,
|
|
||||||
)
|
|
||||||
|
|
||||||
return await self._inner.optimize(
|
return await self._inner.optimize(
|
||||||
cleanup_since_ms=cleanup_since_ms,
|
cleanup_since_ms=cleanup_since_ms,
|
||||||
delete_unverified=delete_unverified,
|
delete_unverified=delete_unverified,
|
||||||
|
retrain=retrain,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def list_indices(self) -> Iterable[IndexConfig]:
|
async def list_indices(self) -> Iterable[IndexConfig]:
|
||||||
|
|||||||
@@ -747,16 +747,15 @@ def test_local_namespace_operations(tmp_path):
|
|||||||
# Create a local database connection
|
# Create a local database connection
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
# Test list_namespaces returns empty list for root namespace
|
# Test list_namespaces returns empty list
|
||||||
namespaces = list(db.list_namespaces())
|
namespaces = list(db.list_namespaces())
|
||||||
assert namespaces == []
|
assert namespaces == []
|
||||||
|
|
||||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
# Test list_namespaces with parameters still returns empty list
|
||||||
with pytest.raises(
|
namespaces_with_params = list(
|
||||||
NotImplementedError,
|
db.list_namespaces(namespace=["test"], page_token="token", limit=5)
|
||||||
match="Namespace operations are not supported for listing database",
|
)
|
||||||
):
|
assert namespaces_with_params == []
|
||||||
list(db.list_namespaces(namespace=["test"]))
|
|
||||||
|
|
||||||
|
|
||||||
def test_local_create_namespace_not_supported(tmp_path):
|
def test_local_create_namespace_not_supported(tmp_path):
|
||||||
@@ -831,119 +830,3 @@ def test_local_table_operations_with_namespace_raise_error(tmp_path):
|
|||||||
# Test table_names without namespace - should work normally
|
# Test table_names without namespace - should work normally
|
||||||
tables_root = list(db.table_names())
|
tables_root = list(db.table_names())
|
||||||
assert "test_table" in tables_root
|
assert "test_table" in tables_root
|
||||||
|
|
||||||
|
|
||||||
def test_clone_table_latest_version(tmp_path):
|
|
||||||
"""Test cloning a table with the latest version (default behavior)"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Create source table with some data
|
|
||||||
data = [
|
|
||||||
{"id": 1, "text": "hello", "vector": [1.0, 2.0]},
|
|
||||||
{"id": 2, "text": "world", "vector": [3.0, 4.0]},
|
|
||||||
]
|
|
||||||
source_table = db.create_table("source", data=data)
|
|
||||||
|
|
||||||
# Add more data to create a new version
|
|
||||||
more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
|
|
||||||
source_table.add(more_data)
|
|
||||||
|
|
||||||
# Clone the table (should get latest version with 3 rows)
|
|
||||||
source_uri = os.path.join(tmp_path, "source.lance")
|
|
||||||
cloned_table = db.clone_table("cloned", source_uri)
|
|
||||||
|
|
||||||
# Verify cloned table has all 3 rows
|
|
||||||
assert cloned_table.count_rows() == 3
|
|
||||||
assert "cloned" in db.table_names()
|
|
||||||
|
|
||||||
# Verify data matches
|
|
||||||
cloned_data = cloned_table.to_pandas()
|
|
||||||
assert len(cloned_data) == 3
|
|
||||||
assert set(cloned_data["id"].tolist()) == {1, 2, 3}
|
|
||||||
|
|
||||||
|
|
||||||
def test_clone_table_specific_version(tmp_path):
|
|
||||||
"""Test cloning a table from a specific version"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Create source table with initial data
|
|
||||||
data = [
|
|
||||||
{"id": 1, "text": "hello", "vector": [1.0, 2.0]},
|
|
||||||
{"id": 2, "text": "world", "vector": [3.0, 4.0]},
|
|
||||||
]
|
|
||||||
source_table = db.create_table("source", data=data)
|
|
||||||
|
|
||||||
# Get the initial version
|
|
||||||
initial_version = source_table.version
|
|
||||||
|
|
||||||
# Add more data to create a new version
|
|
||||||
more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
|
|
||||||
source_table.add(more_data)
|
|
||||||
|
|
||||||
# Verify source now has 3 rows
|
|
||||||
assert source_table.count_rows() == 3
|
|
||||||
|
|
||||||
# Clone from the initial version (should have only 2 rows)
|
|
||||||
source_uri = os.path.join(tmp_path, "source.lance")
|
|
||||||
cloned_table = db.clone_table("cloned", source_uri, source_version=initial_version)
|
|
||||||
|
|
||||||
# Verify cloned table has only the initial 2 rows
|
|
||||||
assert cloned_table.count_rows() == 2
|
|
||||||
cloned_data = cloned_table.to_pandas()
|
|
||||||
assert set(cloned_data["id"].tolist()) == {1, 2}
|
|
||||||
|
|
||||||
|
|
||||||
def test_clone_table_with_tag(tmp_path):
|
|
||||||
"""Test cloning a table from a tagged version"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Create source table with initial data
|
|
||||||
data = [
|
|
||||||
{"id": 1, "text": "hello", "vector": [1.0, 2.0]},
|
|
||||||
{"id": 2, "text": "world", "vector": [3.0, 4.0]},
|
|
||||||
]
|
|
||||||
source_table = db.create_table("source", data=data)
|
|
||||||
|
|
||||||
# Create a tag for the current version
|
|
||||||
source_table.tags.create("v1.0", source_table.version)
|
|
||||||
|
|
||||||
# Add more data after the tag
|
|
||||||
more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
|
|
||||||
source_table.add(more_data)
|
|
||||||
|
|
||||||
# Verify source now has 3 rows
|
|
||||||
assert source_table.count_rows() == 3
|
|
||||||
|
|
||||||
# Clone from the tagged version (should have only 2 rows)
|
|
||||||
source_uri = os.path.join(tmp_path, "source.lance")
|
|
||||||
cloned_table = db.clone_table("cloned", source_uri, source_tag="v1.0")
|
|
||||||
|
|
||||||
# Verify cloned table has only the tagged version's 2 rows
|
|
||||||
assert cloned_table.count_rows() == 2
|
|
||||||
cloned_data = cloned_table.to_pandas()
|
|
||||||
assert set(cloned_data["id"].tolist()) == {1, 2}
|
|
||||||
|
|
||||||
|
|
||||||
def test_clone_table_deep_clone_fails(tmp_path):
|
|
||||||
"""Test that deep clone raises an unsupported error"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Create source table with some data
|
|
||||||
data = [
|
|
||||||
{"id": 1, "text": "hello", "vector": [1.0, 2.0]},
|
|
||||||
{"id": 2, "text": "world", "vector": [3.0, 4.0]},
|
|
||||||
]
|
|
||||||
db.create_table("source", data=data)
|
|
||||||
|
|
||||||
# Try to create a deep clone (should fail)
|
|
||||||
source_uri = os.path.join(tmp_path, "source.lance")
|
|
||||||
with pytest.raises(Exception, match="Deep clone is not yet implemented"):
|
|
||||||
db.clone_table("cloned", source_uri, is_shallow=False)
|
|
||||||
|
|||||||
@@ -114,63 +114,6 @@ def test_embedding_function_variables():
|
|||||||
assert func.safe_model_dump()["secret_key"] == "$var:secret"
|
assert func.safe_model_dump()["secret_key"] == "$var:secret"
|
||||||
|
|
||||||
|
|
||||||
def test_parse_functions_with_variables():
|
|
||||||
@register("variable-parsing-test")
|
|
||||||
class VariableParsingFunction(TextEmbeddingFunction):
|
|
||||||
api_key: str
|
|
||||||
base_url: Optional[str] = None
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def sensitive_keys():
|
|
||||||
return ["api_key"]
|
|
||||||
|
|
||||||
def ndims(self):
|
|
||||||
return 10
|
|
||||||
|
|
||||||
def generate_embeddings(self, texts):
|
|
||||||
# Mock implementation that just returns random embeddings
|
|
||||||
# In real usage, this would use the api_key to call an API
|
|
||||||
return [np.random.rand(self.ndims()).tolist() for _ in texts]
|
|
||||||
|
|
||||||
registry = EmbeddingFunctionRegistry.get_instance()
|
|
||||||
|
|
||||||
registry.set_var("test_api_key", "sk-test-key-12345")
|
|
||||||
registry.set_var("test_base_url", "https://api.example.com")
|
|
||||||
|
|
||||||
conf = EmbeddingFunctionConfig(
|
|
||||||
source_column="text",
|
|
||||||
vector_column="vector",
|
|
||||||
function=registry.get("variable-parsing-test").create(
|
|
||||||
api_key="$var:test_api_key", base_url="$var:test_base_url"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
metadata = registry.get_table_metadata([conf])
|
|
||||||
|
|
||||||
# Create a mock arrow table with the metadata
|
|
||||||
schema = pa.schema(
|
|
||||||
[pa.field("text", pa.string()), pa.field("vector", pa.list_(pa.float32(), 10))]
|
|
||||||
)
|
|
||||||
table = pa.table({"text": [], "vector": []}, schema=schema)
|
|
||||||
table = table.replace_schema_metadata(metadata)
|
|
||||||
|
|
||||||
ds = lance.write_dataset(table, "memory://")
|
|
||||||
|
|
||||||
configs = registry.parse_functions(ds.schema.metadata)
|
|
||||||
|
|
||||||
assert "vector" in configs
|
|
||||||
parsed_func = configs["vector"].function
|
|
||||||
|
|
||||||
assert parsed_func.api_key == "sk-test-key-12345"
|
|
||||||
assert parsed_func.base_url == "https://api.example.com"
|
|
||||||
|
|
||||||
embeddings = parsed_func.generate_embeddings(["test text"])
|
|
||||||
assert len(embeddings) == 1
|
|
||||||
assert len(embeddings[0]) == 10
|
|
||||||
|
|
||||||
assert parsed_func.safe_model_dump()["api_key"] == "$var:test_api_key"
|
|
||||||
|
|
||||||
|
|
||||||
def test_embedding_with_bad_results(tmp_path):
|
def test_embedding_with_bad_results(tmp_path):
|
||||||
@register("null-embedding")
|
@register("null-embedding")
|
||||||
class NullEmbeddingFunction(TextEmbeddingFunction):
|
class NullEmbeddingFunction(TextEmbeddingFunction):
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ from lancedb.rerankers import (
|
|||||||
JinaReranker,
|
JinaReranker,
|
||||||
AnswerdotaiRerankers,
|
AnswerdotaiRerankers,
|
||||||
VoyageAIReranker,
|
VoyageAIReranker,
|
||||||
MRRReranker,
|
|
||||||
)
|
)
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
@@ -47,7 +46,6 @@ def get_test_table(tmp_path, use_tantivy):
|
|||||||
db,
|
db,
|
||||||
"my_table",
|
"my_table",
|
||||||
schema=MyTable,
|
schema=MyTable,
|
||||||
mode="overwrite",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Need to test with a bunch of phrases to make sure sorting is consistent
|
# Need to test with a bunch of phrases to make sure sorting is consistent
|
||||||
@@ -98,7 +96,7 @@ def get_test_table(tmp_path, use_tantivy):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create a fts index
|
# Create a fts index
|
||||||
table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)
|
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||||
|
|
||||||
return table, MyTable
|
return table, MyTable
|
||||||
|
|
||||||
@@ -322,34 +320,6 @@ def test_rrf_reranker(tmp_path, use_tantivy):
|
|||||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
|
||||||
def test_mrr_reranker(tmp_path, use_tantivy):
|
|
||||||
reranker = MRRReranker()
|
|
||||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
|
||||||
|
|
||||||
# Test multi-vector part
|
|
||||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
|
||||||
query = "single player experience"
|
|
||||||
rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
|
|
||||||
rs2 = (
|
|
||||||
table.search(query, vector_column_name="meta_vector")
|
|
||||||
.limit(10)
|
|
||||||
.with_row_id(True)
|
|
||||||
)
|
|
||||||
result = reranker.rerank_multivector([rs1, rs2])
|
|
||||||
assert "_relevance_score" in result.column_names
|
|
||||||
assert len(result) <= 20
|
|
||||||
|
|
||||||
if len(result) > 1:
|
|
||||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
|
||||||
"The _relevance_score should be descending."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test with duplicate results
|
|
||||||
result_deduped = reranker.rerank_multivector([rs1, rs2, rs1])
|
|
||||||
assert len(result_deduped) == len(result)
|
|
||||||
|
|
||||||
|
|
||||||
def test_rrf_reranker_distance():
|
def test_rrf_reranker_distance():
|
||||||
data = pa.table(
|
data = pa.table(
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -674,45 +674,6 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
|||||||
"vector", replace=True, config=expected_config, name=None, train=True
|
"vector", replace=True, config=expected_config, name=None, train=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Test with target_partition_size
|
|
||||||
table.create_index(
|
|
||||||
metric="l2",
|
|
||||||
num_sub_vectors=96,
|
|
||||||
vector_column_name="vector",
|
|
||||||
replace=True,
|
|
||||||
index_cache_size=256,
|
|
||||||
num_bits=4,
|
|
||||||
target_partition_size=8192,
|
|
||||||
)
|
|
||||||
expected_config = IvfPq(
|
|
||||||
distance_type="l2",
|
|
||||||
num_sub_vectors=96,
|
|
||||||
num_bits=4,
|
|
||||||
target_partition_size=8192,
|
|
||||||
)
|
|
||||||
mock_create_index.assert_called_with(
|
|
||||||
"vector", replace=True, config=expected_config, name=None, train=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# target_partition_size has a default value,
|
|
||||||
# so `num_partitions` and `target_partition_size` are not required
|
|
||||||
table.create_index(
|
|
||||||
metric="l2",
|
|
||||||
num_sub_vectors=96,
|
|
||||||
vector_column_name="vector",
|
|
||||||
replace=True,
|
|
||||||
index_cache_size=256,
|
|
||||||
num_bits=4,
|
|
||||||
)
|
|
||||||
expected_config = IvfPq(
|
|
||||||
distance_type="l2",
|
|
||||||
num_sub_vectors=96,
|
|
||||||
num_bits=4,
|
|
||||||
)
|
|
||||||
mock_create_index.assert_called_with(
|
|
||||||
"vector", replace=True, config=expected_config, name=None, train=True
|
|
||||||
)
|
|
||||||
|
|
||||||
table.create_index(
|
table.create_index(
|
||||||
vector_column_name="my_vector",
|
vector_column_name="my_vector",
|
||||||
metric="dot",
|
metric="dot",
|
||||||
|
|||||||
@@ -163,34 +163,6 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (target_table_name, source_uri, target_namespace=vec![], source_version=None, source_tag=None, is_shallow=true))]
|
|
||||||
pub fn clone_table(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
target_table_name: String,
|
|
||||||
source_uri: String,
|
|
||||||
target_namespace: Vec<String>,
|
|
||||||
source_version: Option<u64>,
|
|
||||||
source_tag: Option<String>,
|
|
||||||
is_shallow: bool,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
|
||||||
|
|
||||||
let mut builder = inner.clone_table(target_table_name, source_uri);
|
|
||||||
builder = builder.target_namespace(target_namespace);
|
|
||||||
if let Some(version) = source_version {
|
|
||||||
builder = builder.source_version(version);
|
|
||||||
}
|
|
||||||
if let Some(tag) = source_tag {
|
|
||||||
builder = builder.source_tag(tag);
|
|
||||||
}
|
|
||||||
builder = builder.is_shallow(is_shallow);
|
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let table = builder.execute().await.infer_error()?;
|
|
||||||
Ok(Table::new(table))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
|
#[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
|
||||||
pub fn rename_table(
|
pub fn rename_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
@@ -283,7 +255,7 @@ impl Connection {
|
|||||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn connect(
|
pub fn connect(
|
||||||
py: Python<'_>,
|
py: Python,
|
||||||
uri: String,
|
uri: String,
|
||||||
api_key: Option<String>,
|
api_key: Option<String>,
|
||||||
region: Option<String>,
|
region: Option<String>,
|
||||||
|
|||||||
@@ -63,9 +63,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
|
||||||
ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
|
|
||||||
}
|
|
||||||
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
||||||
},
|
},
|
||||||
"IvfPq" => {
|
"IvfPq" => {
|
||||||
@@ -79,9 +76,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
|
ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
|
||||||
ivf_pq_builder = ivf_pq_builder.target_partition_size(target_partition_size);
|
|
||||||
}
|
|
||||||
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
||||||
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
@@ -100,9 +94,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
|
hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
|
||||||
hnsw_pq_builder = hnsw_pq_builder.target_partition_size(target_partition_size);
|
|
||||||
}
|
|
||||||
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
||||||
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
@@ -120,9 +111,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
|
hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
|
||||||
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
|
||||||
}
|
|
||||||
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
||||||
},
|
},
|
||||||
not_supported => Err(PyValueError::new_err(format!(
|
not_supported => Err(PyValueError::new_err(format!(
|
||||||
@@ -156,7 +144,6 @@ struct IvfFlatParams {
|
|||||||
num_partitions: Option<u32>,
|
num_partitions: Option<u32>,
|
||||||
max_iterations: u32,
|
max_iterations: u32,
|
||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
target_partition_size: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -167,7 +154,6 @@ struct IvfPqParams {
|
|||||||
num_bits: u32,
|
num_bits: u32,
|
||||||
max_iterations: u32,
|
max_iterations: u32,
|
||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
target_partition_size: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -180,7 +166,6 @@ struct IvfHnswPqParams {
|
|||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
m: u32,
|
m: u32,
|
||||||
ef_construction: u32,
|
ef_construction: u32,
|
||||||
target_partition_size: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -191,7 +176,6 @@ struct IvfHnswSqParams {
|
|||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
m: u32,
|
m: u32,
|
||||||
ef_construction: u32,
|
ef_construction: u32,
|
||||||
target_partition_size: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
|
|||||||
@@ -591,11 +591,12 @@ impl Table {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Optimize the on-disk data by compacting and pruning old data, for better performance.
|
/// Optimize the on-disk data by compacting and pruning old data, for better performance.
|
||||||
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
|
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
|
||||||
pub fn optimize(
|
pub fn optimize(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
cleanup_since_ms: Option<u64>,
|
cleanup_since_ms: Option<u64>,
|
||||||
delete_unverified: Option<bool>,
|
delete_unverified: Option<bool>,
|
||||||
|
retrain: Option<bool>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
let older_than = if let Some(ms) = cleanup_since_ms {
|
let older_than = if let Some(ms) = cleanup_since_ms {
|
||||||
@@ -631,9 +632,10 @@ impl Table {
|
|||||||
.prune
|
.prune
|
||||||
.unwrap();
|
.unwrap();
|
||||||
inner
|
inner
|
||||||
.optimize(lancedb::table::OptimizeAction::Index(
|
.optimize(lancedb::table::OptimizeAction::Index(match retrain {
|
||||||
OptimizeOptions::default(),
|
Some(true) => OptimizeOptions::retrain(),
|
||||||
))
|
_ => OptimizeOptions::default(),
|
||||||
|
}))
|
||||||
.await
|
.await
|
||||||
.infer_error()?;
|
.infer_error()?;
|
||||||
Ok(OptimizeStats {
|
Ok(OptimizeStats {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.1-beta.3"
|
version = "0.22.1-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -86,11 +86,11 @@ rand = { version = "0.9", features = ["small_rng"] }
|
|||||||
random_word = { version = "0.4.3", features = ["en"] }
|
random_word = { version = "0.4.3", features = ["en"] }
|
||||||
uuid = { version = "1.7.0", features = ["v4"] }
|
uuid = { version = "1.7.0", features = ["v4"] }
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
aws-sdk-dynamodb = { version = "1.55.0" }
|
aws-sdk-dynamodb = { version = "1.38.0" }
|
||||||
aws-sdk-s3 = { version = "1.55.0" }
|
aws-sdk-s3 = { version = "1.38.0" }
|
||||||
aws-sdk-kms = { version = "1.48.0" }
|
aws-sdk-kms = { version = "1.37" }
|
||||||
aws-config = { version = "1.5.10" }
|
aws-config = { version = "1.0" }
|
||||||
aws-smithy-runtime = { version = "1.9.1" }
|
aws-smithy-runtime = { version = "1.3" }
|
||||||
datafusion.workspace = true
|
datafusion.workspace = true
|
||||||
http-body = "1" # Matching reqwest
|
http-body = "1" # Matching reqwest
|
||||||
rstest = "0.23.0"
|
rstest = "0.23.0"
|
||||||
|
|||||||
86
rust/lancedb/src/catalog.rs
Normal file
86
rust/lancedb/src/catalog.rs
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! Catalog implementation for managing databases
|
||||||
|
|
||||||
|
pub mod listing;
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::database::Database;
|
||||||
|
use crate::error::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
pub trait CatalogOptions {
|
||||||
|
fn serialize_into_map(&self, map: &mut HashMap<String, String>);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request parameters for listing databases
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct DatabaseNamesRequest {
|
||||||
|
/// Start listing after this name (exclusive)
|
||||||
|
pub start_after: Option<String>,
|
||||||
|
/// Maximum number of names to return
|
||||||
|
pub limit: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request to open an existing database
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct OpenDatabaseRequest {
|
||||||
|
/// The name of the database to open
|
||||||
|
pub name: String,
|
||||||
|
/// A map of database-specific options
|
||||||
|
///
|
||||||
|
/// Consult the catalog / database implementation to determine which options are available
|
||||||
|
pub database_options: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Database creation mode
|
||||||
|
///
|
||||||
|
/// The default behavior is Create
|
||||||
|
pub enum CreateDatabaseMode {
|
||||||
|
/// Create new database, error if exists
|
||||||
|
Create,
|
||||||
|
/// Open existing database if present
|
||||||
|
ExistOk,
|
||||||
|
/// Overwrite existing database
|
||||||
|
Overwrite,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for CreateDatabaseMode {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Create
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request to create a new database
|
||||||
|
pub struct CreateDatabaseRequest {
|
||||||
|
/// The name of the database to create
|
||||||
|
pub name: String,
|
||||||
|
/// The creation mode
|
||||||
|
pub mode: CreateDatabaseMode,
|
||||||
|
/// A map of catalog-specific options, consult your catalog implementation to determine what's available
|
||||||
|
pub options: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Catalog: Send + Sync + std::fmt::Debug + 'static {
|
||||||
|
/// List database names with pagination
|
||||||
|
async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>>;
|
||||||
|
|
||||||
|
/// Create a new database
|
||||||
|
async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>>;
|
||||||
|
|
||||||
|
/// Open existing database
|
||||||
|
async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>>;
|
||||||
|
|
||||||
|
/// Rename database
|
||||||
|
async fn rename_database(&self, old_name: &str, new_name: &str) -> Result<()>;
|
||||||
|
|
||||||
|
/// Delete database
|
||||||
|
async fn drop_database(&self, name: &str) -> Result<()>;
|
||||||
|
|
||||||
|
/// Delete all databases
|
||||||
|
async fn drop_all_databases(&self) -> Result<()>;
|
||||||
|
}
|
||||||
624
rust/lancedb/src/catalog/listing.rs
Normal file
624
rust/lancedb/src/catalog/listing.rs
Normal file
@@ -0,0 +1,624 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! Catalog implementation based on a local file system.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs::create_dir_all;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
Catalog, CatalogOptions, CreateDatabaseMode, CreateDatabaseRequest, DatabaseNamesRequest,
|
||||||
|
OpenDatabaseRequest,
|
||||||
|
};
|
||||||
|
use crate::connection::ConnectRequest;
|
||||||
|
use crate::database::listing::{ListingDatabase, ListingDatabaseOptions};
|
||||||
|
use crate::database::{Database, DatabaseOptions};
|
||||||
|
use crate::error::{CreateDirSnafu, Error, Result};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
|
||||||
|
use lance_io::local::to_local_path;
|
||||||
|
use object_store::path::Path as ObjectStorePath;
|
||||||
|
use snafu::ResultExt;
|
||||||
|
|
||||||
|
/// Options for the listing catalog
|
||||||
|
///
|
||||||
|
/// Note: the catalog will use the `storage_options` configured on
|
||||||
|
/// db_options to configure storage for listing / creating / deleting
|
||||||
|
/// databases.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct ListingCatalogOptions {
|
||||||
|
/// The options to use for databases opened by this catalog
|
||||||
|
///
|
||||||
|
/// This also contains the storage options used by the catalog
|
||||||
|
pub db_options: ListingDatabaseOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CatalogOptions for ListingCatalogOptions {
|
||||||
|
fn serialize_into_map(&self, map: &mut HashMap<String, String>) {
|
||||||
|
self.db_options.serialize_into_map(map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ListingCatalogOptions {
|
||||||
|
pub fn builder() -> ListingCatalogOptionsBuilder {
|
||||||
|
ListingCatalogOptionsBuilder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_from_map(map: &HashMap<String, String>) -> Result<Self> {
|
||||||
|
let db_options = ListingDatabaseOptions::parse_from_map(map)?;
|
||||||
|
Ok(Self { db_options })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct ListingCatalogOptionsBuilder {
|
||||||
|
options: ListingCatalogOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ListingCatalogOptionsBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
options: ListingCatalogOptions::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn db_options(mut self, db_options: ListingDatabaseOptions) -> Self {
|
||||||
|
self.options.db_options = db_options;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> ListingCatalogOptions {
|
||||||
|
self.options
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A catalog implementation that works by listing subfolders in a directory
|
||||||
|
///
|
||||||
|
/// The listing catalog will be created with a base folder specified by the URI. Every subfolder
|
||||||
|
/// in this base folder will be considered a database. These will be opened as a
|
||||||
|
/// [`crate::database::listing::ListingDatabase`]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ListingCatalog {
|
||||||
|
object_store: Arc<ObjectStore>,
|
||||||
|
|
||||||
|
uri: String,
|
||||||
|
|
||||||
|
base_path: ObjectStorePath,
|
||||||
|
|
||||||
|
options: ListingCatalogOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ListingCatalog {
|
||||||
|
/// Try to create a local directory to store the lancedb dataset
|
||||||
|
pub fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
|
||||||
|
let path = Path::new(path);
|
||||||
|
if !path.try_exists()? {
|
||||||
|
create_dir_all(path)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn uri(&self) -> &str {
|
||||||
|
&self.uri
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn open_path(path: &str) -> Result<Self> {
|
||||||
|
let (object_store, base_path) = ObjectStore::from_uri(path).await?;
|
||||||
|
if object_store.is_local() {
|
||||||
|
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
uri: path.to_string(),
|
||||||
|
base_path,
|
||||||
|
object_store,
|
||||||
|
options: ListingCatalogOptions::default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn connect(request: &ConnectRequest) -> Result<Self> {
|
||||||
|
let uri = &request.uri;
|
||||||
|
let parse_res = url::Url::parse(uri);
|
||||||
|
|
||||||
|
let options = ListingCatalogOptions::parse_from_map(&request.options)?;
|
||||||
|
|
||||||
|
match parse_res {
|
||||||
|
Ok(url) if url.scheme().len() == 1 && cfg!(windows) => Self::open_path(uri).await,
|
||||||
|
Ok(url) => {
|
||||||
|
let plain_uri = url.to_string();
|
||||||
|
|
||||||
|
let registry = Arc::new(ObjectStoreRegistry::default());
|
||||||
|
let storage_options = options.db_options.storage_options.clone();
|
||||||
|
let os_params = ObjectStoreParams {
|
||||||
|
storage_options: Some(storage_options.clone()),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let (object_store, base_path) =
|
||||||
|
ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
|
||||||
|
if object_store.is_local() {
|
||||||
|
Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
uri: String::from(url.clone()),
|
||||||
|
base_path,
|
||||||
|
object_store,
|
||||||
|
options,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Err(_) => Self::open_path(uri).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn database_path(&self, name: &str) -> ObjectStorePath {
|
||||||
|
self.base_path.child(name.replace('\\', "/"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Catalog for ListingCatalog {
|
||||||
|
async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>> {
|
||||||
|
let mut f = self
|
||||||
|
.object_store
|
||||||
|
.read_dir(self.base_path.clone())
|
||||||
|
.await?
|
||||||
|
.iter()
|
||||||
|
.map(Path::new)
|
||||||
|
.filter_map(|p| p.file_name().and_then(|s| s.to_str().map(String::from)))
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
f.sort();
|
||||||
|
|
||||||
|
if let Some(start_after) = request.start_after {
|
||||||
|
let index = f
|
||||||
|
.iter()
|
||||||
|
.position(|name| name.as_str() > start_after.as_str())
|
||||||
|
.unwrap_or(f.len());
|
||||||
|
f.drain(0..index);
|
||||||
|
}
|
||||||
|
if let Some(limit) = request.limit {
|
||||||
|
f.truncate(limit as usize);
|
||||||
|
}
|
||||||
|
Ok(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>> {
|
||||||
|
let db_path = self.database_path(&request.name);
|
||||||
|
let db_path_str = to_local_path(&db_path);
|
||||||
|
let exists = Path::new(&db_path_str).exists();
|
||||||
|
|
||||||
|
match request.mode {
|
||||||
|
CreateDatabaseMode::Create if exists => {
|
||||||
|
return Err(Error::DatabaseAlreadyExists { name: request.name })
|
||||||
|
}
|
||||||
|
CreateDatabaseMode::Create => {
|
||||||
|
create_dir_all(db_path.to_string()).unwrap();
|
||||||
|
}
|
||||||
|
CreateDatabaseMode::ExistOk => {
|
||||||
|
if !exists {
|
||||||
|
create_dir_all(db_path.to_string()).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CreateDatabaseMode::Overwrite => {
|
||||||
|
if exists {
|
||||||
|
self.drop_database(&request.name).await?;
|
||||||
|
}
|
||||||
|
create_dir_all(db_path.to_string()).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let db_uri = format!("/{}/{}", self.base_path, request.name);
|
||||||
|
|
||||||
|
let mut connect_request = ConnectRequest {
|
||||||
|
uri: db_uri,
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
options: Default::default(),
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add the db options to the connect request
|
||||||
|
self.options
|
||||||
|
.db_options
|
||||||
|
.serialize_into_map(&mut connect_request.options);
|
||||||
|
|
||||||
|
Ok(Arc::new(
|
||||||
|
ListingDatabase::connect_with_options(&connect_request).await?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>> {
|
||||||
|
let db_path = self.database_path(&request.name);
|
||||||
|
|
||||||
|
let db_path_str = to_local_path(&db_path);
|
||||||
|
let exists = Path::new(&db_path_str).exists();
|
||||||
|
if !exists {
|
||||||
|
return Err(Error::DatabaseNotFound { name: request.name });
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut connect_request = ConnectRequest {
|
||||||
|
uri: db_path.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
options: Default::default(),
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add the db options to the connect request
|
||||||
|
self.options
|
||||||
|
.db_options
|
||||||
|
.serialize_into_map(&mut connect_request.options);
|
||||||
|
|
||||||
|
Ok(Arc::new(
|
||||||
|
ListingDatabase::connect_with_options(&connect_request).await?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rename_database(&self, _old_name: &str, _new_name: &str) -> Result<()> {
|
||||||
|
Err(Error::NotSupported {
|
||||||
|
message: "rename_database is not supported in LanceDB OSS yet".to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_database(&self, name: &str) -> Result<()> {
|
||||||
|
let db_path = self.database_path(name);
|
||||||
|
self.object_store
|
||||||
|
.remove_dir_all(db_path.clone())
|
||||||
|
.await
|
||||||
|
.map_err(|err| match err {
|
||||||
|
lance::Error::NotFound { .. } => Error::DatabaseNotFound {
|
||||||
|
name: name.to_owned(),
|
||||||
|
},
|
||||||
|
_ => Error::from(err),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_all_databases(&self) -> Result<()> {
|
||||||
|
self.object_store
|
||||||
|
.remove_dir_all(self.base_path.clone())
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, not(windows)))]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// file:/// URIs with drive letters do not work correctly on Windows
|
||||||
|
#[cfg(windows)]
|
||||||
|
fn path_to_uri(path: PathBuf) -> String {
|
||||||
|
path.to_str().unwrap().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
fn path_to_uri(path: PathBuf) -> String {
|
||||||
|
Url::from_file_path(path).unwrap().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn setup_catalog() -> (TempDir, ListingCatalog) {
|
||||||
|
let tempdir = tempfile::tempdir().unwrap();
|
||||||
|
let catalog_path = tempdir.path().join("catalog");
|
||||||
|
std::fs::create_dir_all(&catalog_path).unwrap();
|
||||||
|
|
||||||
|
let uri = path_to_uri(catalog_path);
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: uri.clone(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
||||||
|
|
||||||
|
(tempdir, catalog)
|
||||||
|
}
|
||||||
|
|
||||||
|
use crate::database::{CreateTableData, CreateTableRequest, TableNamesRequest};
|
||||||
|
use crate::table::TableDefinition;
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tempfile::{tempdir, TempDir};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_database_names() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(names.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_database() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db1".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(names, vec!["db1"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_database_exist_ok() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
let db1 = catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db_exist_ok".into(),
|
||||||
|
mode: CreateDatabaseMode::ExistOk,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
||||||
|
db1.create_table(CreateTableRequest {
|
||||||
|
name: "test_table".parse().unwrap(),
|
||||||
|
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||||
|
mode: Default::default(),
|
||||||
|
write_options: Default::default(),
|
||||||
|
namespace: vec![],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let db2 = catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db_exist_ok".into(),
|
||||||
|
mode: CreateDatabaseMode::ExistOk,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let tables = db2.table_names(TableNamesRequest::default()).await.unwrap();
|
||||||
|
assert_eq!(tables, vec!["test_table".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_database_overwrite() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
let db = catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db_overwrite".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
||||||
|
db.create_table(CreateTableRequest {
|
||||||
|
name: "old_table".parse().unwrap(),
|
||||||
|
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||||
|
mode: Default::default(),
|
||||||
|
write_options: Default::default(),
|
||||||
|
namespace: vec![],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let tables = db.table_names(TableNamesRequest::default()).await.unwrap();
|
||||||
|
assert!(!tables.is_empty());
|
||||||
|
|
||||||
|
let new_db = catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db_overwrite".into(),
|
||||||
|
mode: CreateDatabaseMode::Overwrite,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let tables = new_db
|
||||||
|
.table_names(TableNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(tables.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_database_overwrite_non_existing() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "new_db".into(),
|
||||||
|
mode: CreateDatabaseMode::Overwrite,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(names.contains(&"new_db".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_open_database() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
// Test open non-existent
|
||||||
|
let result = catalog
|
||||||
|
.open_database(OpenDatabaseRequest {
|
||||||
|
name: "missing".into(),
|
||||||
|
database_options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
assert!(matches!(
|
||||||
|
result.unwrap_err(),
|
||||||
|
Error::DatabaseNotFound { name } if name == "missing"
|
||||||
|
));
|
||||||
|
|
||||||
|
// Create and open
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "valid_db".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let db = catalog
|
||||||
|
.open_database(OpenDatabaseRequest {
|
||||||
|
name: "valid_db".into(),
|
||||||
|
database_options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
db.table_names(TableNamesRequest::default()).await.unwrap(),
|
||||||
|
Vec::<String>::new()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_database() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
// Create test database
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "to_drop".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(!names.is_empty());
|
||||||
|
|
||||||
|
// Drop database
|
||||||
|
catalog.drop_database("to_drop").await.unwrap();
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(names.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_all_databases() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db1".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
catalog
|
||||||
|
.create_database(CreateDatabaseRequest {
|
||||||
|
name: "db2".into(),
|
||||||
|
mode: CreateDatabaseMode::Create,
|
||||||
|
options: HashMap::new(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
catalog.drop_all_databases().await.unwrap();
|
||||||
|
|
||||||
|
let names = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(names.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_rename_database_unsupported() {
|
||||||
|
let (_tempdir, catalog) = setup_catalog().await;
|
||||||
|
let result = catalog.rename_database("old", "new").await;
|
||||||
|
assert!(matches!(
|
||||||
|
result.unwrap_err(),
|
||||||
|
Error::NotSupported { message } if message.contains("rename_database")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_connect_local_path() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let path = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: path.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
||||||
|
assert!(catalog.object_store.is_local());
|
||||||
|
assert_eq!(catalog.uri, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_connect_file_scheme() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let path = tmp_dir.path();
|
||||||
|
let uri = path_to_uri(path.to_path_buf());
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: uri.clone(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
||||||
|
assert!(catalog.object_store.is_local());
|
||||||
|
assert_eq!(catalog.uri, uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_connect_invalid_uri_fallback() {
|
||||||
|
let invalid_uri = "invalid:///path";
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: invalid_uri.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = ListingCatalog::connect(&request).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,13 +13,15 @@ use lance::dataset::ReadParams;
|
|||||||
use object_store::aws::AwsCredential;
|
use object_store::aws::AwsCredential;
|
||||||
|
|
||||||
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
||||||
|
use crate::catalog::listing::ListingCatalog;
|
||||||
|
use crate::catalog::CatalogOptions;
|
||||||
use crate::database::listing::{
|
use crate::database::listing::{
|
||||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||||
};
|
};
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
|
CreateNamespaceRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||||
CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
|
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
||||||
OpenTableRequest, TableNamesRequest,
|
TableNamesRequest,
|
||||||
};
|
};
|
||||||
use crate::embeddings::{
|
use crate::embeddings::{
|
||||||
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
||||||
@@ -469,62 +471,6 @@ impl OpenTableBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Builder for cloning a table.
|
|
||||||
///
|
|
||||||
/// A shallow clone creates a new table that shares the underlying data files
|
|
||||||
/// with the source table but has its own independent manifest. Both the source
|
|
||||||
/// and cloned tables can evolve independently while initially sharing the same
|
|
||||||
/// data, deletion, and index files.
|
|
||||||
///
|
|
||||||
/// Use this builder to configure the clone operation before executing it.
|
|
||||||
pub struct CloneTableBuilder {
|
|
||||||
parent: Arc<dyn Database>,
|
|
||||||
request: CloneTableRequest,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CloneTableBuilder {
|
|
||||||
fn new(parent: Arc<dyn Database>, target_table_name: String, source_uri: String) -> Self {
|
|
||||||
Self {
|
|
||||||
parent,
|
|
||||||
request: CloneTableRequest::new(target_table_name, source_uri),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the source version to clone from
|
|
||||||
pub fn source_version(mut self, version: u64) -> Self {
|
|
||||||
self.request.source_version = Some(version);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the source tag to clone from
|
|
||||||
pub fn source_tag(mut self, tag: impl Into<String>) -> Self {
|
|
||||||
self.request.source_tag = Some(tag.into());
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the target namespace for the cloned table
|
|
||||||
pub fn target_namespace(mut self, namespace: Vec<String>) -> Self {
|
|
||||||
self.request.target_namespace = namespace;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set whether to perform a shallow clone (default: true)
|
|
||||||
///
|
|
||||||
/// When true, the cloned table shares data files with the source table.
|
|
||||||
/// When false, performs a deep clone (not yet implemented).
|
|
||||||
pub fn is_shallow(mut self, is_shallow: bool) -> Self {
|
|
||||||
self.request.is_shallow = is_shallow;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Execute the clone operation
|
|
||||||
pub async fn execute(self) -> Result<Table> {
|
|
||||||
Ok(Table::new(
|
|
||||||
self.parent.clone().clone_table(self.request).await?,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A connection to LanceDB
|
/// A connection to LanceDB
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Connection {
|
pub struct Connection {
|
||||||
@@ -631,30 +577,6 @@ impl Connection {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clone a table in the database
|
|
||||||
///
|
|
||||||
/// Creates a new table by cloning from an existing source table.
|
|
||||||
/// By default, this performs a shallow clone where the new table shares
|
|
||||||
/// the underlying data files with the source table.
|
|
||||||
///
|
|
||||||
/// # Parameters
|
|
||||||
/// - `target_table_name`: The name of the new table to create
|
|
||||||
/// - `source_uri`: The URI of the source table to clone from
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// A [`CloneTableBuilder`] that can be used to configure the clone operation
|
|
||||||
pub fn clone_table(
|
|
||||||
&self,
|
|
||||||
target_table_name: impl Into<String>,
|
|
||||||
source_uri: impl Into<String>,
|
|
||||||
) -> CloneTableBuilder {
|
|
||||||
CloneTableBuilder::new(
|
|
||||||
self.internal.clone(),
|
|
||||||
target_table_name.into(),
|
|
||||||
source_uri.into(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Rename a table in the database.
|
/// Rename a table in the database.
|
||||||
///
|
///
|
||||||
/// This is only supported in LanceDB Cloud.
|
/// This is only supported in LanceDB Cloud.
|
||||||
@@ -738,7 +660,7 @@ pub struct ConnectRequest {
|
|||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
pub client_config: ClientConfig,
|
pub client_config: ClientConfig,
|
||||||
|
|
||||||
/// Database specific options
|
/// Database/Catalog specific options
|
||||||
pub options: HashMap<String, String>,
|
pub options: HashMap<String, String>,
|
||||||
|
|
||||||
/// The interval at which to check for updates from other processes.
|
/// The interval at which to check for updates from other processes.
|
||||||
@@ -1015,6 +937,50 @@ pub fn connect(uri: &str) -> ConnectBuilder {
|
|||||||
ConnectBuilder::new(uri)
|
ConnectBuilder::new(uri)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A builder for configuring a connection to a LanceDB catalog
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct CatalogConnectBuilder {
|
||||||
|
request: ConnectRequest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CatalogConnectBuilder {
|
||||||
|
/// Create a new [`CatalogConnectBuilder`] with the given catalog URI.
|
||||||
|
pub fn new(uri: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
request: ConnectRequest {
|
||||||
|
uri: uri.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
read_consistency_interval: None,
|
||||||
|
options: HashMap::new(),
|
||||||
|
session: None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn catalog_options(mut self, catalog_options: &dyn CatalogOptions) -> Self {
|
||||||
|
catalog_options.serialize_into_map(&mut self.request.options);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Establishes a connection to the catalog
|
||||||
|
pub async fn execute(self) -> Result<Arc<ListingCatalog>> {
|
||||||
|
let catalog = ListingCatalog::connect(&self.request).await?;
|
||||||
|
Ok(Arc::new(catalog))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Connect to a LanceDB catalog.
|
||||||
|
///
|
||||||
|
/// A catalog is a container for databases, which in turn are containers for tables.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `uri` - URI where the catalog is located, can be a local directory or supported remote cloud storage.
|
||||||
|
pub fn connect_catalog(uri: &str) -> CatalogConnectBuilder {
|
||||||
|
CatalogConnectBuilder::new(uri)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "remote"))]
|
#[cfg(all(test, feature = "remote"))]
|
||||||
mod test_utils {
|
mod test_utils {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -1056,6 +1022,7 @@ mod test_utils {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::fs::create_dir_all;
|
use std::fs::create_dir_all;
|
||||||
|
|
||||||
|
use crate::catalog::{Catalog, DatabaseNamesRequest, OpenDatabaseRequest};
|
||||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||||
use crate::query::QueryBase;
|
use crate::query::QueryBase;
|
||||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||||
@@ -1363,48 +1330,89 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_clone_table() {
|
async fn test_connect_catalog() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
let db = connect(uri).execute().await.unwrap();
|
let catalog = connect_catalog(uri).execute().await.unwrap();
|
||||||
|
|
||||||
// Create a source table with some data
|
// Verify that we can get the uri from the catalog
|
||||||
let mut batch_gen = BatchGenerator::new()
|
let catalog_uri = catalog.uri();
|
||||||
.col(Box::new(IncrementingInt32::new().named("id")))
|
assert_eq!(catalog_uri, uri);
|
||||||
.col(Box::new(IncrementingInt32::new().named("value")));
|
|
||||||
let reader = batch_gen.batches(5, 100);
|
|
||||||
|
|
||||||
let source_table = db
|
// Check that the catalog is initially empty
|
||||||
.create_table("source_table", reader)
|
let dbs = catalog
|
||||||
.execute()
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(dbs.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
async fn test_catalog_create_database() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
let catalog = connect_catalog(uri).execute().await.unwrap();
|
||||||
|
|
||||||
|
let db_name = "test_db";
|
||||||
|
catalog
|
||||||
|
.create_database(crate::catalog::CreateDatabaseRequest {
|
||||||
|
name: db_name.to_string(),
|
||||||
|
mode: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Get the source table URI
|
let dbs = catalog
|
||||||
let source_table_path = tmp_dir.path().join("source_table.lance");
|
.database_names(DatabaseNamesRequest::default())
|
||||||
let source_uri = source_table_path.to_str().unwrap();
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(dbs.len(), 1);
|
||||||
|
assert_eq!(dbs[0], db_name);
|
||||||
|
|
||||||
// Clone the table
|
let db = catalog
|
||||||
let cloned_table = db
|
.open_database(OpenDatabaseRequest {
|
||||||
.clone_table("cloned_table", source_uri)
|
name: db_name.to_string(),
|
||||||
.execute()
|
database_options: HashMap::new(),
|
||||||
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Verify the cloned table exists
|
let tables = db.table_names(Default::default()).await.unwrap();
|
||||||
let table_names = db.table_names().execute().await.unwrap();
|
assert_eq!(tables.len(), 0);
|
||||||
assert!(table_names.contains(&"source_table".to_string()));
|
}
|
||||||
assert!(table_names.contains(&"cloned_table".to_string()));
|
|
||||||
|
|
||||||
// Verify the cloned table has the same schema
|
#[tokio::test]
|
||||||
assert_eq!(
|
#[cfg(not(windows))]
|
||||||
source_table.schema().await.unwrap(),
|
async fn test_catalog_drop_database() {
|
||||||
cloned_table.schema().await.unwrap()
|
let tmp_dir = tempdir().unwrap();
|
||||||
);
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
let catalog = connect_catalog(uri).execute().await.unwrap();
|
||||||
|
|
||||||
// Verify the cloned table has the same data
|
// Create and then drop a database
|
||||||
let source_count = source_table.count_rows(None).await.unwrap();
|
let db_name = "test_db_to_drop";
|
||||||
let cloned_count = cloned_table.count_rows(None).await.unwrap();
|
catalog
|
||||||
assert_eq!(source_count, cloned_count);
|
.create_database(crate::catalog::CreateDatabaseRequest {
|
||||||
|
name: db_name.to_string(),
|
||||||
|
mode: Default::default(),
|
||||||
|
options: Default::default(),
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let dbs = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(dbs.len(), 1);
|
||||||
|
|
||||||
|
catalog.drop_database(db_name).await.unwrap();
|
||||||
|
|
||||||
|
let dbs_after = catalog
|
||||||
|
.database_names(DatabaseNamesRequest::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(dbs_after.len(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -176,42 +176,6 @@ impl CreateTableRequest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Request to clone a table from a source table.
|
|
||||||
///
|
|
||||||
/// A shallow clone creates a new table that shares the underlying data files
|
|
||||||
/// with the source table but has its own independent manifest. This allows
|
|
||||||
/// both the source and cloned tables to evolve independently while initially
|
|
||||||
/// sharing the same data, deletion, and index files.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct CloneTableRequest {
|
|
||||||
/// The name of the target table to create
|
|
||||||
pub target_table_name: String,
|
|
||||||
/// The namespace for the target table. Empty list represents root namespace.
|
|
||||||
pub target_namespace: Vec<String>,
|
|
||||||
/// The URI of the source table to clone from.
|
|
||||||
pub source_uri: String,
|
|
||||||
/// Optional version of the source table to clone.
|
|
||||||
pub source_version: Option<u64>,
|
|
||||||
/// Optional tag of the source table to clone.
|
|
||||||
pub source_tag: Option<String>,
|
|
||||||
/// Whether to perform a shallow clone (true) or deep clone (false). Defaults to true.
|
|
||||||
/// Currently only shallow clone is supported.
|
|
||||||
pub is_shallow: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CloneTableRequest {
|
|
||||||
pub fn new(target_table_name: String, source_uri: String) -> Self {
|
|
||||||
Self {
|
|
||||||
target_table_name,
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The `Database` trait defines the interface for database implementations.
|
/// The `Database` trait defines the interface for database implementations.
|
||||||
///
|
///
|
||||||
/// A database is responsible for managing tables and their metadata.
|
/// A database is responsible for managing tables and their metadata.
|
||||||
@@ -229,13 +193,6 @@ pub trait Database:
|
|||||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
|
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
|
||||||
/// Create a table in the database
|
/// Create a table in the database
|
||||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>>;
|
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>>;
|
||||||
/// Clone a table in the database.
|
|
||||||
///
|
|
||||||
/// Creates a shallow clone of the source table, sharing underlying data files
|
|
||||||
/// but with an independent manifest. Both tables can evolve separately after cloning.
|
|
||||||
///
|
|
||||||
/// See [`CloneTableRequest`] for detailed documentation and examples.
|
|
||||||
async fn clone_table(&self, request: CloneTableRequest) -> Result<Arc<dyn BaseTable>>;
|
|
||||||
/// Open a table in the database
|
/// Open a table in the database
|
||||||
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>>;
|
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>>;
|
||||||
/// Rename a table in the database
|
/// Rename a table in the database
|
||||||
|
|||||||
@@ -7,8 +7,7 @@ use std::fs::create_dir_all;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use lance::dataset::refs::Ref;
|
use lance::dataset::{ReadParams, WriteMode};
|
||||||
use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
|
|
||||||
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
||||||
use lance_datafusion::utils::StreamingWriteSource;
|
use lance_datafusion::utils::StreamingWriteSource;
|
||||||
use lance_encoding::version::LanceFileVersion;
|
use lance_encoding::version::LanceFileVersion;
|
||||||
@@ -23,8 +22,8 @@ use crate::table::NativeTable;
|
|||||||
use crate::utils::validate_table_name;
|
use crate::utils::validate_table_name;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
BaseTable, CloneTableRequest, CreateNamespaceRequest, CreateTableMode, CreateTableRequest,
|
BaseTable, CreateNamespaceRequest, CreateTableMode, CreateTableRequest, Database,
|
||||||
Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
||||||
TableNamesRequest,
|
TableNamesRequest,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -588,13 +587,7 @@ impl ListingDatabase {
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl Database for ListingDatabase {
|
impl Database for ListingDatabase {
|
||||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
async fn list_namespaces(&self, _request: ListNamespacesRequest) -> Result<Vec<String>> {
|
||||||
if !request.namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace operations are not supported for listing database".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Vec::new())
|
Ok(Vec::new())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -685,65 +678,6 @@ impl Database for ListingDatabase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn clone_table(&self, request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
|
|
||||||
if !request.target_namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: support deep clone
|
|
||||||
if !request.is_shallow {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Deep clone is not yet implemented".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
validate_table_name(&request.target_table_name)?;
|
|
||||||
|
|
||||||
let storage_params = ObjectStoreParams {
|
|
||||||
storage_options: Some(self.storage_options.clone()),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let read_params = ReadParams {
|
|
||||||
store_options: Some(storage_params.clone()),
|
|
||||||
session: Some(self.session.clone()),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut source_dataset = DatasetBuilder::from_uri(&request.source_uri)
|
|
||||||
.with_read_params(read_params.clone())
|
|
||||||
.load()
|
|
||||||
.await
|
|
||||||
.map_err(|e| Error::Lance { source: e })?;
|
|
||||||
|
|
||||||
let version_ref = match (request.source_version, request.source_tag) {
|
|
||||||
(Some(v), None) => Ok(Ref::Version(v)),
|
|
||||||
(None, Some(tag)) => Ok(Ref::Tag(tag)),
|
|
||||||
(None, None) => Ok(Ref::Version(source_dataset.version().version)),
|
|
||||||
_ => Err(Error::InvalidInput {
|
|
||||||
message: "Cannot specify both source_version and source_tag".to_string(),
|
|
||||||
}),
|
|
||||||
}?;
|
|
||||||
|
|
||||||
let target_uri = self.table_uri(&request.target_table_name)?;
|
|
||||||
source_dataset
|
|
||||||
.shallow_clone(&target_uri, version_ref, storage_params)
|
|
||||||
.await
|
|
||||||
.map_err(|e| Error::Lance { source: e })?;
|
|
||||||
|
|
||||||
let cloned_table = NativeTable::open_with_params(
|
|
||||||
&target_uri,
|
|
||||||
&request.target_table_name,
|
|
||||||
self.store_wrapper.clone(),
|
|
||||||
None,
|
|
||||||
self.read_consistency_interval,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(Arc::new(cloned_table))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||||
if !request.namespace.is_empty() {
|
if !request.namespace.is_empty() {
|
||||||
return Err(Error::NotSupported {
|
return Err(Error::NotSupported {
|
||||||
@@ -845,694 +779,3 @@ impl Database for ListingDatabase {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use crate::connection::ConnectRequest;
|
|
||||||
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest};
|
|
||||||
use crate::table::{Table, TableDefinition};
|
|
||||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
|
||||||
use tempfile::tempdir;
|
|
||||||
|
|
||||||
async fn setup_database() -> (tempfile::TempDir, ListingDatabase) {
|
|
||||||
let tempdir = tempdir().unwrap();
|
|
||||||
let uri = tempdir.path().to_str().unwrap();
|
|
||||||
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: uri.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let db = ListingDatabase::connect_with_options(&request)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
(tempdir, db)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_basic() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with schema
|
|
||||||
let schema = Arc::new(Schema::new(vec![
|
|
||||||
Field::new("id", DataType::Int32, false),
|
|
||||||
Field::new("name", DataType::Utf8, false),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "source_table".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Get the source table URI
|
|
||||||
let source_uri = db.table_uri("source_table").unwrap();
|
|
||||||
|
|
||||||
// Clone the table
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned_table".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri: source_uri.clone(),
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify both tables exist
|
|
||||||
let table_names = db.table_names(TableNamesRequest::default()).await.unwrap();
|
|
||||||
assert!(table_names.contains(&"source_table".to_string()));
|
|
||||||
assert!(table_names.contains(&"cloned_table".to_string()));
|
|
||||||
|
|
||||||
// Verify schemas match
|
|
||||||
assert_eq!(
|
|
||||||
source_table.schema().await.unwrap(),
|
|
||||||
cloned_table.schema().await.unwrap()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_data() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with actual data
|
|
||||||
let schema = Arc::new(Schema::new(vec![
|
|
||||||
Field::new("id", DataType::Int32, false),
|
|
||||||
Field::new("name", DataType::Utf8, false),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let batch = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![1, 2, 3])),
|
|
||||||
Arc::new(StringArray::from(vec!["a", "b", "c"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch)],
|
|
||||||
schema.clone(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "source_with_data".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Data(reader),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source_with_data").unwrap();
|
|
||||||
|
|
||||||
// Clone the table
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned_with_data".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify data counts match
|
|
||||||
let source_count = source_table.count_rows(None).await.unwrap();
|
|
||||||
let cloned_count = cloned_table.count_rows(None).await.unwrap();
|
|
||||||
assert_eq!(source_count, cloned_count);
|
|
||||||
assert_eq!(source_count, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_storage_options() {
|
|
||||||
let tempdir = tempdir().unwrap();
|
|
||||||
let uri = tempdir.path().to_str().unwrap();
|
|
||||||
|
|
||||||
// Create database with storage options
|
|
||||||
let mut options = HashMap::new();
|
|
||||||
options.insert("test_option".to_string(), "test_value".to_string());
|
|
||||||
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: uri.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: options.clone(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let db = ListingDatabase::connect_with_options(&request)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Create source table
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source").unwrap();
|
|
||||||
|
|
||||||
// Clone should work with storage options
|
|
||||||
let cloned = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(cloned.is_ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_deep_not_supported() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source").unwrap();
|
|
||||||
|
|
||||||
// Try deep clone (should fail)
|
|
||||||
let result = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: false, // Request deep clone
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(matches!(
|
|
||||||
result.unwrap_err(),
|
|
||||||
Error::NotSupported { message } if message.contains("Deep clone")
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_namespace_not_supported() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source").unwrap();
|
|
||||||
|
|
||||||
// Try clone with namespace (should fail for listing database)
|
|
||||||
let result = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned".to_string(),
|
|
||||||
target_namespace: vec!["namespace".to_string()], // Non-empty namespace
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(matches!(
|
|
||||||
result.unwrap_err(),
|
|
||||||
Error::NotSupported { message } if message.contains("Namespace parameter is not supported")
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_invalid_target_name() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source").unwrap();
|
|
||||||
|
|
||||||
// Try clone with invalid target name
|
|
||||||
let result = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "invalid/name".to_string(), // Invalid name with slash
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_source_not_found() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Try to clone from non-existent source
|
|
||||||
let result = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri: "/nonexistent/table.lance".to_string(),
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_version_and_tag_error() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("source").unwrap();
|
|
||||||
|
|
||||||
// Try clone with both version and tag (should fail)
|
|
||||||
let result = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: Some(1),
|
|
||||||
source_tag: Some("v1.0".to_string()),
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(matches!(
|
|
||||||
result.unwrap_err(),
|
|
||||||
Error::InvalidInput { message } if message.contains("Cannot specify both source_version and source_tag")
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_specific_version() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with initial data
|
|
||||||
let schema = Arc::new(Schema::new(vec![
|
|
||||||
Field::new("id", DataType::Int32, false),
|
|
||||||
Field::new("value", DataType::Utf8, false),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let batch1 = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![1, 2])),
|
|
||||||
Arc::new(StringArray::from(vec!["a", "b"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch1)],
|
|
||||||
schema.clone(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "versioned_source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Data(reader),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Get the initial version
|
|
||||||
let initial_version = source_table.version().await.unwrap();
|
|
||||||
|
|
||||||
// Add more data to create a new version
|
|
||||||
let batch2 = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![3, 4])),
|
|
||||||
Arc::new(StringArray::from(vec!["c", "d"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_table_obj = Table::new(source_table.clone());
|
|
||||||
source_table_obj
|
|
||||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch2)],
|
|
||||||
schema.clone(),
|
|
||||||
)))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify source table now has 4 rows
|
|
||||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4);
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("versioned_source").unwrap();
|
|
||||||
|
|
||||||
// Clone from the initial version (should have only 2 rows)
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned_from_version".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: Some(initial_version),
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify cloned table has only the initial 2 rows
|
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 2);
|
|
||||||
|
|
||||||
// Source table should still have 4 rows
|
|
||||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_tag() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with initial data
|
|
||||||
let schema = Arc::new(Schema::new(vec![
|
|
||||||
Field::new("id", DataType::Int32, false),
|
|
||||||
Field::new("value", DataType::Utf8, false),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let batch1 = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![1, 2])),
|
|
||||||
Arc::new(StringArray::from(vec!["a", "b"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch1)],
|
|
||||||
schema.clone(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "tagged_source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Data(reader),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Create a tag for the current version
|
|
||||||
let source_table_obj = Table::new(source_table.clone());
|
|
||||||
let mut tags = source_table_obj.tags().await.unwrap();
|
|
||||||
tags.create("v1.0", source_table.version().await.unwrap())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Add more data after the tag
|
|
||||||
let batch2 = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![3, 4])),
|
|
||||||
Arc::new(StringArray::from(vec!["c", "d"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_table_obj = Table::new(source_table.clone());
|
|
||||||
source_table_obj
|
|
||||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch2)],
|
|
||||||
schema.clone(),
|
|
||||||
)))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Source table should have 4 rows
|
|
||||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4);
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("tagged_source").unwrap();
|
|
||||||
|
|
||||||
// Clone from the tag (should have only 2 rows)
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned_from_tag".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: Some("v1.0".to_string()),
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify cloned table has only the tagged version's 2 rows
|
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_cloned_tables_evolve_independently() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with initial data
|
|
||||||
let schema = Arc::new(Schema::new(vec![
|
|
||||||
Field::new("id", DataType::Int32, false),
|
|
||||||
Field::new("value", DataType::Utf8, false),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let batch1 = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![1, 2])),
|
|
||||||
Arc::new(StringArray::from(vec!["a", "b"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch1)],
|
|
||||||
schema.clone(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "independent_source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Data(reader),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("independent_source").unwrap();
|
|
||||||
|
|
||||||
// Clone the table
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "independent_clone".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Both should start with 2 rows
|
|
||||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 2);
|
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 2);
|
|
||||||
|
|
||||||
// Add data to the cloned table
|
|
||||||
let batch_clone = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![3, 4, 5])),
|
|
||||||
Arc::new(StringArray::from(vec!["c", "d", "e"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let cloned_table_obj = Table::new(cloned_table.clone());
|
|
||||||
cloned_table_obj
|
|
||||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch_clone)],
|
|
||||||
schema.clone(),
|
|
||||||
)))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Add different data to the source table
|
|
||||||
let batch_source = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![
|
|
||||||
Arc::new(Int32Array::from(vec![10, 11])),
|
|
||||||
Arc::new(StringArray::from(vec!["x", "y"])),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_table_obj = Table::new(source_table.clone());
|
|
||||||
source_table_obj
|
|
||||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch_source)],
|
|
||||||
schema.clone(),
|
|
||||||
)))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Verify they have evolved independently
|
|
||||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4); // 2 + 2
|
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 5); // 2 + 3
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_latest_version() {
|
|
||||||
let (_tempdir, db) = setup_database().await;
|
|
||||||
|
|
||||||
// Create a source table with initial data
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
|
||||||
|
|
||||||
let batch1 =
|
|
||||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![1, 2]))])
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch1)],
|
|
||||||
schema.clone(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let source_table = db
|
|
||||||
.create_table(CreateTableRequest {
|
|
||||||
name: "latest_version_source".to_string(),
|
|
||||||
namespace: vec![],
|
|
||||||
data: CreateTableData::Data(reader),
|
|
||||||
mode: CreateTableMode::Create,
|
|
||||||
write_options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Add more data to create new versions
|
|
||||||
for i in 0..3 {
|
|
||||||
let batch = RecordBatch::try_new(
|
|
||||||
schema.clone(),
|
|
||||||
vec![Arc::new(Int32Array::from(vec![i * 10, i * 10 + 1]))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_table_obj = Table::new(source_table.clone());
|
|
||||||
source_table_obj
|
|
||||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
|
||||||
vec![Ok(batch)],
|
|
||||||
schema.clone(),
|
|
||||||
)))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Source should have 8 rows total (2 + 2 + 2 + 2)
|
|
||||||
let source_count = source_table.count_rows(None).await.unwrap();
|
|
||||||
assert_eq!(source_count, 8);
|
|
||||||
|
|
||||||
let source_uri = db.table_uri("latest_version_source").unwrap();
|
|
||||||
|
|
||||||
// Clone without specifying version or tag (should get latest)
|
|
||||||
let cloned_table = db
|
|
||||||
.clone_table(CloneTableRequest {
|
|
||||||
target_table_name: "cloned_latest".to_string(),
|
|
||||||
target_namespace: vec![],
|
|
||||||
source_uri,
|
|
||||||
source_version: None,
|
|
||||||
source_tag: None,
|
|
||||||
is_shallow: true,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Cloned table should have all 8 rows from the latest version
|
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -45,10 +45,10 @@ use crate::{
|
|||||||
pub trait EmbeddingFunction: std::fmt::Debug + Send + Sync {
|
pub trait EmbeddingFunction: std::fmt::Debug + Send + Sync {
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
/// The type of the input data
|
/// The type of the input data
|
||||||
fn source_type(&self) -> Result<Cow<'_, DataType>>;
|
fn source_type(&self) -> Result<Cow<DataType>>;
|
||||||
/// The type of the output data
|
/// The type of the output data
|
||||||
/// This should **always** match the output of the `embed` function
|
/// This should **always** match the output of the `embed` function
|
||||||
fn dest_type(&self) -> Result<Cow<'_, DataType>>;
|
fn dest_type(&self) -> Result<Cow<DataType>>;
|
||||||
/// Compute the embeddings for the source column in the database
|
/// Compute the embeddings for the source column in the database
|
||||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>>;
|
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>>;
|
||||||
/// Compute the embeddings for a given user query
|
/// Compute the embeddings for a given user query
|
||||||
|
|||||||
@@ -75,11 +75,11 @@ impl EmbeddingFunction for BedrockEmbeddingFunction {
|
|||||||
"bedrock"
|
"bedrock"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
fn source_type(&self) -> Result<Cow<DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
fn dest_type(&self) -> Result<Cow<DataType>> {
|
||||||
let n_dims = self.model.ndims();
|
let n_dims = self.model.ndims();
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
DataType::Float32,
|
DataType::Float32,
|
||||||
|
|||||||
@@ -144,11 +144,11 @@ impl EmbeddingFunction for OpenAIEmbeddingFunction {
|
|||||||
"openai"
|
"openai"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
fn source_type(&self) -> Result<Cow<DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
fn dest_type(&self) -> Result<Cow<DataType>> {
|
||||||
let n_dims = self.model.ndims();
|
let n_dims = self.model.ndims();
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
DataType::Float32,
|
DataType::Float32,
|
||||||
|
|||||||
@@ -407,11 +407,11 @@ impl EmbeddingFunction for SentenceTransformersEmbeddings {
|
|||||||
"sentence-transformers"
|
"sentence-transformers"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
|
fn source_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
|
fn dest_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
|
||||||
let (n_dims, dtype) = self.compute_ndims_and_dtype()?;
|
let (n_dims, dtype) = self.compute_ndims_and_dtype()?;
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
dtype,
|
dtype,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
//! values
|
//! values
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
|
|
||||||
use lance::table::format::{IndexMetadata, Manifest};
|
use lance::table::format::{Index, Manifest};
|
||||||
|
|
||||||
use crate::DistanceType;
|
use crate::DistanceType;
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ pub struct VectorIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl VectorIndex {
|
impl VectorIndex {
|
||||||
pub fn new_from_format(manifest: &Manifest, index: &IndexMetadata) -> Self {
|
pub fn new_from_format(manifest: &Manifest, index: &Index) -> Self {
|
||||||
let fields = index
|
let fields = index
|
||||||
.fields
|
.fields
|
||||||
.iter()
|
.iter()
|
||||||
@@ -112,15 +112,6 @@ macro_rules! impl_ivf_params_setter {
|
|||||||
self.max_iterations = max_iterations;
|
self.max_iterations = max_iterations;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The target size of each partition.
|
|
||||||
///
|
|
||||||
/// This value controls the tradeoff between search performance and accuracy.
|
|
||||||
/// The higher the value the faster the search but the less accurate the results will be.
|
|
||||||
pub fn target_partition_size(mut self, target_partition_size: u32) -> Self {
|
|
||||||
self.target_partition_size = Some(target_partition_size);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,7 +182,6 @@ pub struct IvfFlatIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for IvfFlatIndexBuilder {
|
impl Default for IvfFlatIndexBuilder {
|
||||||
@@ -201,7 +191,6 @@ impl Default for IvfFlatIndexBuilder {
|
|||||||
num_partitions: None,
|
num_partitions: None,
|
||||||
sample_rate: 256,
|
sample_rate: 256,
|
||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
target_partition_size: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -239,7 +228,6 @@ pub struct IvfPqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
|
||||||
|
|
||||||
// PQ
|
// PQ
|
||||||
pub(crate) num_sub_vectors: Option<u32>,
|
pub(crate) num_sub_vectors: Option<u32>,
|
||||||
@@ -255,7 +243,6 @@ impl Default for IvfPqIndexBuilder {
|
|||||||
num_bits: None,
|
num_bits: None,
|
||||||
sample_rate: 256,
|
sample_rate: 256,
|
||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
target_partition_size: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -306,7 +293,6 @@ pub struct IvfHnswPqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
pub(crate) m: u32,
|
pub(crate) m: u32,
|
||||||
@@ -328,7 +314,6 @@ impl Default for IvfHnswPqIndexBuilder {
|
|||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
m: 20,
|
m: 20,
|
||||||
ef_construction: 300,
|
ef_construction: 300,
|
||||||
target_partition_size: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -356,7 +341,6 @@ pub struct IvfHnswSqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
pub(crate) m: u32,
|
pub(crate) m: u32,
|
||||||
@@ -374,7 +358,6 @@ impl Default for IvfHnswSqIndexBuilder {
|
|||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
m: 20,
|
m: 20,
|
||||||
ef_construction: 300,
|
ef_construction: 300,
|
||||||
target_partition_size: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -191,6 +191,7 @@
|
|||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
pub mod arrow;
|
pub mod arrow;
|
||||||
|
pub mod catalog;
|
||||||
pub mod connection;
|
pub mod connection;
|
||||||
pub mod data;
|
pub mod data;
|
||||||
pub mod database;
|
pub mod database;
|
||||||
|
|||||||
@@ -14,9 +14,9 @@ use serde::Deserialize;
|
|||||||
use tokio::task::spawn_blocking;
|
use tokio::task::spawn_blocking;
|
||||||
|
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
|
CreateNamespaceRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||||
CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
|
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
||||||
OpenTableRequest, TableNamesRequest,
|
TableNamesRequest,
|
||||||
};
|
};
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
@@ -27,18 +27,6 @@ use super::table::RemoteTable;
|
|||||||
use super::util::{batches_to_ipc_bytes, parse_server_version};
|
use super::util::{batches_to_ipc_bytes, parse_server_version};
|
||||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||||
|
|
||||||
// Request structure for the remote clone table API
|
|
||||||
#[derive(serde::Serialize)]
|
|
||||||
struct RemoteCloneTableRequest {
|
|
||||||
source_location: String,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
source_version: Option<u64>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
source_tag: Option<String>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
is_shallow: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// the versions of the server that we support
|
// the versions of the server that we support
|
||||||
// for any new feature that we need to change the SDK behavior, we should bump the server version,
|
// for any new feature that we need to change the SDK behavior, we should bump the server version,
|
||||||
// and add a feature flag as method of `ServerVersion` here.
|
// and add a feature flag as method of `ServerVersion` here.
|
||||||
@@ -442,51 +430,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
Ok(table)
|
Ok(table)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn clone_table(&self, request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
|
|
||||||
let table_identifier = build_table_identifier(
|
|
||||||
&request.target_table_name,
|
|
||||||
&request.target_namespace,
|
|
||||||
&self.client.id_delimiter,
|
|
||||||
);
|
|
||||||
|
|
||||||
let remote_request = RemoteCloneTableRequest {
|
|
||||||
source_location: request.source_uri,
|
|
||||||
source_version: request.source_version,
|
|
||||||
source_tag: request.source_tag,
|
|
||||||
is_shallow: Some(request.is_shallow),
|
|
||||||
};
|
|
||||||
|
|
||||||
let req = self
|
|
||||||
.client
|
|
||||||
.post(&format!("/v1/table/{}/clone", table_identifier.clone()))
|
|
||||||
.json(&remote_request);
|
|
||||||
|
|
||||||
let (request_id, rsp) = self.client.send(req).await?;
|
|
||||||
|
|
||||||
let status = rsp.status();
|
|
||||||
if status != StatusCode::OK {
|
|
||||||
let body = rsp.text().await.err_to_http(request_id.clone())?;
|
|
||||||
return Err(crate::Error::Http {
|
|
||||||
source: format!("Failed to clone table: {}", body).into(),
|
|
||||||
request_id,
|
|
||||||
status_code: Some(status),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let version = parse_server_version(&request_id, &rsp)?;
|
|
||||||
let cache_key = build_cache_key(&request.target_table_name, &request.target_namespace);
|
|
||||||
let table = Arc::new(RemoteTable::new(
|
|
||||||
self.client.clone(),
|
|
||||||
request.target_table_name.clone(),
|
|
||||||
request.target_namespace.clone(),
|
|
||||||
table_identifier,
|
|
||||||
version,
|
|
||||||
));
|
|
||||||
self.table_cache.insert(cache_key, table.clone()).await;
|
|
||||||
|
|
||||||
Ok(table)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||||
let identifier =
|
let identifier =
|
||||||
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
||||||
@@ -1278,146 +1221,4 @@ mod tests {
|
|||||||
_ => panic!("Expected Runtime error from header provider"),
|
_ => panic!("Expected Runtime error from header provider"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/cloned_table/clone");
|
|
||||||
assert_eq!(
|
|
||||||
request.headers().get("Content-Type").unwrap(),
|
|
||||||
JSON_CONTENT_TYPE
|
|
||||||
);
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["source_location"], "s3://bucket/source_table");
|
|
||||||
assert_eq!(body["is_shallow"], true);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let table = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "cloned_table");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_version() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/cloned_table/clone");
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["source_location"], "s3://bucket/source_table");
|
|
||||||
assert_eq!(body["source_version"], 42);
|
|
||||||
assert_eq!(body["is_shallow"], true);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let table = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.source_version(42)
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "cloned_table");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_tag() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/cloned_table/clone");
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["source_location"], "s3://bucket/source_table");
|
|
||||||
assert_eq!(body["source_tag"], "v1.0");
|
|
||||||
assert_eq!(body["is_shallow"], true);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let table = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.source_tag("v1.0")
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "cloned_table");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_deep_clone() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/cloned_table/clone");
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["source_location"], "s3://bucket/source_table");
|
|
||||||
assert_eq!(body["is_shallow"], false);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let table = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.is_shallow(false)
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "cloned_table");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$ns2$cloned_table/clone");
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["source_location"], "s3://bucket/source_table");
|
|
||||||
assert_eq!(body["is_shallow"], true);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let table = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.target_namespace(vec!["ns1".to_string(), "ns2".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "cloned_table");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_clone_table_error() {
|
|
||||||
let conn = Connection::new_with_handler(|_| {
|
|
||||||
http::Response::builder()
|
|
||||||
.status(500)
|
|
||||||
.body("Internal server error")
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let result = conn
|
|
||||||
.clone_table("cloned_table", "s3://bucket/source_table")
|
|
||||||
.execute()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
assert!(result.is_err());
|
|
||||||
if let Err(crate::Error::Http { source, .. }) = result {
|
|
||||||
assert!(source.to_string().contains("Failed to clone table"));
|
|
||||||
} else {
|
|
||||||
panic!("Expected HTTP error");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -242,15 +242,17 @@ pub struct OptimizeStats {
|
|||||||
/// Describes what happens when a vector either contains NaN or
|
/// Describes what happens when a vector either contains NaN or
|
||||||
/// does not have enough values
|
/// does not have enough values
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
|
||||||
enum BadVectorHandling {
|
enum BadVectorHandling {
|
||||||
/// An error is returned
|
/// An error is returned
|
||||||
#[default]
|
#[default]
|
||||||
Error,
|
Error,
|
||||||
|
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
||||||
/// The offending row is droppped
|
/// The offending row is droppped
|
||||||
Drop,
|
Drop,
|
||||||
|
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
||||||
/// The invalid/missing items are replaced by fill_value
|
/// The invalid/missing items are replaced by fill_value
|
||||||
Fill(f32),
|
Fill(f32),
|
||||||
|
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
||||||
/// The invalid items are replaced by NULL
|
/// The invalid items are replaced by NULL
|
||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
@@ -1976,8 +1978,6 @@ impl NativeTable {
|
|||||||
/// Delete keys from the config
|
/// Delete keys from the config
|
||||||
pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> {
|
pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> {
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
// TODO: update this when we implement metadata APIs
|
|
||||||
#[allow(deprecated)]
|
|
||||||
dataset.delete_config_keys(delete_keys).await?;
|
dataset.delete_config_keys(delete_keys).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -1988,8 +1988,6 @@ impl NativeTable {
|
|||||||
upsert_values: impl IntoIterator<Item = (String, String)>,
|
upsert_values: impl IntoIterator<Item = (String, String)>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
// TODO: update this when we implement metadata APIs
|
|
||||||
#[allow(deprecated)]
|
|
||||||
dataset.replace_schema_metadata(upsert_values).await?;
|
dataset.replace_schema_metadata(upsert_values).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,8 +20,6 @@ use datafusion_physical_plan::SendableRecordBatchStream;
|
|||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
||||||
static ref NAMESPACE_NAME_REGEX: regex::Regex =
|
|
||||||
regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait PatchStoreParam {
|
pub trait PatchStoreParam {
|
||||||
@@ -100,53 +98,6 @@ pub fn validate_table_name(name: &str) -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Validate a namespace name component
|
|
||||||
///
|
|
||||||
/// Namespace names must:
|
|
||||||
/// - Not be empty
|
|
||||||
/// - Only contain alphanumeric characters, underscores, hyphens, and periods
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
/// * `name` - A single namespace component (not the full path)
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// * `Ok(())` if the namespace name is valid
|
|
||||||
/// * `Err(Error)` if the namespace name is invalid
|
|
||||||
pub fn validate_namespace_name(name: &str) -> Result<()> {
|
|
||||||
if name.is_empty() {
|
|
||||||
return Err(Error::InvalidInput {
|
|
||||||
message: "Namespace names cannot be empty strings".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if !NAMESPACE_NAME_REGEX.is_match(name) {
|
|
||||||
return Err(Error::InvalidInput {
|
|
||||||
message: format!(
|
|
||||||
"Invalid namespace name '{}': Namespace names can only contain alphanumeric characters, underscores, hyphens, and periods",
|
|
||||||
name
|
|
||||||
),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validate all components of a namespace
|
|
||||||
///
|
|
||||||
/// Iterates through all namespace components and validates each one.
|
|
||||||
/// Returns an error if any component is invalid.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
/// * `namespace` - The namespace components to validate
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// * `Ok(())` if all namespace components are valid
|
|
||||||
/// * `Err(Error)` if any component is invalid
|
|
||||||
pub fn validate_namespace(namespace: &[String]) -> Result<()> {
|
|
||||||
for component in namespace {
|
|
||||||
validate_namespace_name(component)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Find one default column to create index or perform vector query.
|
/// Find one default column to create index or perform vector query.
|
||||||
pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
|
pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
|
||||||
// Try to find a vector column.
|
// Try to find a vector column.
|
||||||
@@ -394,61 +345,6 @@ mod tests {
|
|||||||
assert!(validate_table_name("name with space").is_err());
|
assert!(validate_table_name("name with space").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_validate_namespace_name() {
|
|
||||||
// Valid namespace names
|
|
||||||
assert!(validate_namespace_name("ns1").is_ok());
|
|
||||||
assert!(validate_namespace_name("namespace_123").is_ok());
|
|
||||||
assert!(validate_namespace_name("my-namespace").is_ok());
|
|
||||||
assert!(validate_namespace_name("my.namespace").is_ok());
|
|
||||||
assert!(validate_namespace_name("NS_1.2.3").is_ok());
|
|
||||||
assert!(validate_namespace_name("a").is_ok());
|
|
||||||
assert!(validate_namespace_name("123").is_ok());
|
|
||||||
assert!(validate_namespace_name("_underscore").is_ok());
|
|
||||||
assert!(validate_namespace_name("-hyphen").is_ok());
|
|
||||||
assert!(validate_namespace_name(".period").is_ok());
|
|
||||||
|
|
||||||
// Invalid namespace names
|
|
||||||
assert!(validate_namespace_name("").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace with spaces").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace/with/slashes").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace\\with\\backslashes").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace$with$delimiter").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace@special").is_err());
|
|
||||||
assert!(validate_namespace_name("namespace#hash").is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_validate_namespace() {
|
|
||||||
// Valid namespace with single component
|
|
||||||
assert!(validate_namespace(&["ns1".to_string()]).is_ok());
|
|
||||||
|
|
||||||
// Valid namespace with multiple components
|
|
||||||
assert!(
|
|
||||||
validate_namespace(&["ns1".to_string(), "ns2".to_string(), "ns3".to_string()]).is_ok()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Empty namespace (root) is valid
|
|
||||||
assert!(validate_namespace(&[]).is_ok());
|
|
||||||
|
|
||||||
// Invalid: contains empty component
|
|
||||||
assert!(validate_namespace(&["ns1".to_string(), "".to_string()]).is_err());
|
|
||||||
|
|
||||||
// Invalid: contains component with spaces
|
|
||||||
assert!(validate_namespace(&["ns1".to_string(), "ns 2".to_string()]).is_err());
|
|
||||||
|
|
||||||
// Invalid: contains component with special characters
|
|
||||||
assert!(validate_namespace(&["ns1".to_string(), "ns@2".to_string()]).is_err());
|
|
||||||
assert!(validate_namespace(&["ns1".to_string(), "ns/2".to_string()]).is_err());
|
|
||||||
assert!(validate_namespace(&["ns1".to_string(), "ns$2".to_string()]).is_err());
|
|
||||||
|
|
||||||
// Valid: underscores, hyphens, and periods are allowed
|
|
||||||
assert!(
|
|
||||||
validate_namespace(&["ns_1".to_string(), "ns-2".to_string(), "ns.3".to_string()])
|
|
||||||
.is_ok()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_string_to_datatype() {
|
fn test_string_to_datatype() {
|
||||||
let string = "int32";
|
let string = "int32";
|
||||||
|
|||||||
@@ -341,10 +341,10 @@ impl EmbeddingFunction for MockEmbed {
|
|||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
&self.name
|
&self.name
|
||||||
}
|
}
|
||||||
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
fn source_type(&self) -> Result<Cow<DataType>> {
|
||||||
Ok(Cow::Borrowed(&self.source_type))
|
Ok(Cow::Borrowed(&self.source_type))
|
||||||
}
|
}
|
||||||
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
fn dest_type(&self) -> Result<Cow<DataType>> {
|
||||||
Ok(Cow::Borrowed(&self.dest_type))
|
Ok(Cow::Borrowed(&self.dest_type))
|
||||||
}
|
}
|
||||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||||
|
|||||||
Reference in New Issue
Block a user