mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
7 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0097c5f0a | ||
|
|
c199708e64 | ||
|
|
4a47150ae7 | ||
|
|
f86b20a564 | ||
|
|
cc81f3e1a5 | ||
|
|
bc49c4db82 | ||
|
|
d2eec46f17 |
32
.github/workflows/python.yml
vendored
32
.github/workflows/python.yml
vendored
@@ -33,11 +33,41 @@ jobs:
|
||||
python-version: "3.12"
|
||||
- name: Install ruff
|
||||
run: |
|
||||
pip install ruff==0.8.4
|
||||
pip install ruff==0.9.9
|
||||
- name: Format check
|
||||
run: ruff format --check .
|
||||
- name: Lint
|
||||
run: ruff check .
|
||||
|
||||
type-check:
|
||||
name: "Type Check"
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-22.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: python
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install protobuf compiler
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
pip install toml
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python ../ci/parse_requirements.py pyproject.toml --extras dev,tests,embeddings > requirements.txt
|
||||
pip install -r requirements.txt
|
||||
- name: Run pyright
|
||||
run: pyright
|
||||
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
timeout-minutes: 30
|
||||
|
||||
@@ -1,21 +1,27 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.2.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.8.4
|
||||
rev: v0.9.9
|
||||
hooks:
|
||||
- id: ruff
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: local-biome-check
|
||||
name: biome check
|
||||
entry: npx @biomejs/biome@1.8.3 check --config-path nodejs/biome.json nodejs/
|
||||
language: system
|
||||
types: [text]
|
||||
files: "nodejs/.*"
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
|
||||
- id: ruff
|
||||
# - repo: https://github.com/RobertCraigie/pyright-python
|
||||
# rev: v1.1.395
|
||||
# hooks:
|
||||
# - id: pyright
|
||||
# args: ["--project", "python"]
|
||||
# additional_dependencies: [pyarrow-stubs]
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: local-biome-check
|
||||
name: biome check
|
||||
entry: npx @biomejs/biome@1.8.3 check --config-path nodejs/biome.json nodejs/
|
||||
language: system
|
||||
types: [text]
|
||||
files: "nodejs/.*"
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
|
||||
|
||||
322
Cargo.lock
generated
322
Cargo.lock
generated
@@ -176,9 +176,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eaf3437355979f1e93ba84ba108c38be5767713051f3c8ffbf07c094e2e61f9f"
|
||||
checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -198,24 +198,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-arith"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681"
|
||||
checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"half",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-array"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79"
|
||||
checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-buffer",
|
||||
@@ -230,9 +229,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1"
|
||||
checksum = "4e899dade2c3b7f5642eb8366cfd898958bcca099cde6dfea543c7e8d3ad88d4"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"half",
|
||||
@@ -241,9 +240,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-cast"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3"
|
||||
checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -262,28 +261,25 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-csv"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec222848d70fea5a32af9c3602b08f5d740d5e2d33fbd76bf6fd88759b5b13a7"
|
||||
checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"csv",
|
||||
"csv-core",
|
||||
"lazy_static",
|
||||
"lexical-core",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-data"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79"
|
||||
checksum = "0a329fb064477c9ec5f0870d2f5130966f91055c7c5bce2b3a084f116bc28c3b"
|
||||
dependencies = [
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
@@ -293,13 +289,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ipc"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b"
|
||||
checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"flatbuffers",
|
||||
@@ -309,9 +304,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-json"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eff38eeb8a971ad3a4caf62c5d57f0cff8a48b64a55e3207c4fd696a9234aad"
|
||||
checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -329,26 +324,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ord"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee"
|
||||
checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"half",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-row"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f936954991c360ba762dff23f5dda16300774fafd722353d9683abd97630ae"
|
||||
checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
@@ -358,18 +350,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-schema"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e"
|
||||
checksum = "85934a9d0261e0fa5d4e2a5295107d743b543a6e0484a835d4b8db2da15306f9"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1"
|
||||
checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array",
|
||||
@@ -381,9 +373,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-string"
|
||||
version = "53.4.0"
|
||||
version = "54.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b"
|
||||
checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -1708,9 +1700,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9"
|
||||
checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -1719,7 +1711,6 @@ dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"dashmap",
|
||||
"datafusion-catalog",
|
||||
"datafusion-common",
|
||||
"datafusion-common-runtime",
|
||||
@@ -1738,7 +1729,7 @@ dependencies = [
|
||||
"datafusion-sql",
|
||||
"futures",
|
||||
"glob",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"object_store",
|
||||
"parking_lot",
|
||||
@@ -1753,30 +1744,38 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-catalog"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e"
|
||||
checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
|
||||
dependencies = [
|
||||
"arrow-schema",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"dashmap",
|
||||
"datafusion-common",
|
||||
"datafusion-execution",
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-plan",
|
||||
"datafusion-sql",
|
||||
"futures",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"parking_lot",
|
||||
"sqlparser 0.53.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b"
|
||||
checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-ipc",
|
||||
"arrow-schema",
|
||||
"base64 0.22.1",
|
||||
"half",
|
||||
"hashbrown 0.14.5",
|
||||
"indexmap 2.7.1",
|
||||
@@ -1791,9 +1790,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-common-runtime"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281"
|
||||
checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
|
||||
dependencies = [
|
||||
"log",
|
||||
"tokio",
|
||||
@@ -1801,15 +1800,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-doc"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3"
|
||||
checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-execution"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486"
|
||||
checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"dashmap",
|
||||
@@ -1826,9 +1825,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c"
|
||||
checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -1846,20 +1845,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-expr-common"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55"
|
||||
checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da"
|
||||
checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-buffer",
|
||||
@@ -1875,7 +1875,7 @@ dependencies = [
|
||||
"datafusion-macros",
|
||||
"hashbrown 0.14.5",
|
||||
"hex",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"md-5",
|
||||
"rand",
|
||||
@@ -1887,12 +1887,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-aggregate"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9"
|
||||
checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
@@ -1909,9 +1910,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-aggregate-common"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c"
|
||||
checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
@@ -1922,9 +1923,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-nested"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5"
|
||||
checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -1932,21 +1933,23 @@ dependencies = [
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
"datafusion-execution",
|
||||
"datafusion-expr",
|
||||
"datafusion-functions",
|
||||
"datafusion-functions-aggregate",
|
||||
"datafusion-macros",
|
||||
"datafusion-physical-expr-common",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-table"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1"
|
||||
checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -1960,9 +1963,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-window"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8"
|
||||
checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
|
||||
dependencies = [
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
@@ -1977,9 +1980,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-functions-window-common"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6"
|
||||
checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
|
||||
dependencies = [
|
||||
"datafusion-common",
|
||||
"datafusion-physical-expr-common",
|
||||
@@ -1987,19 +1990,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-macros"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c"
|
||||
checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
|
||||
dependencies = [
|
||||
"datafusion-expr",
|
||||
"quote",
|
||||
"syn 2.0.98",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75"
|
||||
checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2007,7 +2011,7 @@ dependencies = [
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-expr",
|
||||
"indexmap 2.7.1",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"regex",
|
||||
"regex-syntax 0.8.5",
|
||||
@@ -2015,9 +2019,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd"
|
||||
checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
@@ -2032,47 +2036,53 @@ dependencies = [
|
||||
"half",
|
||||
"hashbrown 0.14.5",
|
||||
"indexmap 2.7.1",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"paste",
|
||||
"petgraph 0.6.5",
|
||||
"petgraph",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-expr-common"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5"
|
||||
checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
"arrow-buffer",
|
||||
"datafusion-common",
|
||||
"datafusion-expr-common",
|
||||
"hashbrown 0.14.5",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-optimizer"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351"
|
||||
checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
"datafusion-common",
|
||||
"datafusion-execution",
|
||||
"datafusion-expr",
|
||||
"datafusion-expr-common",
|
||||
"datafusion-physical-expr",
|
||||
"datafusion-physical-expr-common",
|
||||
"datafusion-physical-plan",
|
||||
"itertools 0.13.0",
|
||||
"futures",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-plan"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da"
|
||||
checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
@@ -2093,7 +2103,7 @@ dependencies = [
|
||||
"half",
|
||||
"hashbrown 0.14.5",
|
||||
"indexmap 2.7.1",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
@@ -2102,9 +2112,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "44.0.0"
|
||||
version = "45.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc"
|
||||
checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -2484,12 +2494,6 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.5.7"
|
||||
@@ -2570,8 +2574,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "499049427ae23480696a1b728a292fec9fa554742ee26c0f35acbdade47801be"
|
||||
dependencies = [
|
||||
"rand",
|
||||
]
|
||||
@@ -3532,8 +3537,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "748d2bd8e36f25a7fc130398e115678d01a1b821aceafe790965a0511d2443fe"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -3592,8 +3598,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ca5041940b2623daf6398c1e297aedd99a9fc38070222e69a69f600dd374c9"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -3610,8 +3617,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc3cd36bd1de369dd957e98fbacf8963fd1b147595fd4d2ba5e9f5866f143db9"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -3647,8 +3655,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72791574cb837c5d2e89f3688b71c5fc9cc584f8f79cc50005787f5cbe285506"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3673,8 +3682,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4739cefb4757f5405c09c848d71e5a3ec05fbff26ac1a6e799774a1e878ba72"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrow",
|
||||
@@ -3712,8 +3722,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "005824228e561b4fda8f3bfd9d755a6cbd14b2d68da93c4f094af9ee6981977f"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -3747,8 +3758,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49177175c77cc1201366aba59ca4aa457893d1a0aad0fa6cbf7095741be7aeba"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3800,8 +3812,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea5e9a5bbbab0e5efc3038abb49c74add55fe7d72541a448e8c8dc45d7cbe406"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -3839,8 +3852,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6958010c1f35babc9c68c5a4a49c154a3ecc2d0a9a11a11a970c06e2f2bdb5"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ord",
|
||||
@@ -3863,8 +3877,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c08830bb24cadbbd24069e76d17570bfa7518eca9f2aa3651afb72035b74331"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3903,8 +3918,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "0.24.0"
|
||||
source = "git+https://github.com/lancedb/lance.git?tag=v0.24.0-beta.1#33ae43b2944c12e0dbd139e8aa098cffa74edef5"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae923e5c09091bcf48aa166ac362b0dd189b55779ff553281094510728ea0c66"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4044,7 +4060,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.21.0-beta.0"
|
||||
version = "0.21.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"env_logger",
|
||||
@@ -4053,7 +4069,7 @@ dependencies = [
|
||||
"pin-project",
|
||||
"pyo3",
|
||||
"pyo3-async-runtimes",
|
||||
"pyo3-build-config 0.20.3",
|
||||
"pyo3-build-config",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -4974,23 +4990,13 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
|
||||
dependencies = [
|
||||
"fixedbitset 0.4.2",
|
||||
"indexmap 2.7.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
|
||||
dependencies = [
|
||||
"fixedbitset 0.5.7",
|
||||
"fixedbitset",
|
||||
"indexmap 2.7.1",
|
||||
]
|
||||
|
||||
@@ -5508,7 +5514,7 @@ dependencies = [
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
"petgraph 0.7.1",
|
||||
"petgraph",
|
||||
"prettyplease",
|
||||
"prost",
|
||||
"prost-types",
|
||||
@@ -5562,9 +5568,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.22.6"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884"
|
||||
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"indoc",
|
||||
@@ -5572,7 +5578,7 @@ dependencies = [
|
||||
"memoffset",
|
||||
"once_cell",
|
||||
"portable-atomic",
|
||||
"pyo3-build-config 0.22.6",
|
||||
"pyo3-build-config",
|
||||
"pyo3-ffi",
|
||||
"pyo3-macros",
|
||||
"unindent",
|
||||
@@ -5580,9 +5586,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-async-runtimes"
|
||||
version = "0.22.0"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031"
|
||||
checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"once_cell",
|
||||
@@ -5594,9 +5600,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-async-runtimes-macros"
|
||||
version = "0.22.0"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22c26fd8e9fc19f53f0c1e00bf61471de6789f7eb263056f7f944a9cceb5823e"
|
||||
checksum = "b2df2884957d2476731f987673befac5d521dff10abb0a7cbe12015bc7702fe9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -5605,19 +5611,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.20.3"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.22.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38"
|
||||
checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"target-lexicon",
|
||||
@@ -5625,19 +5621,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.22.6"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636"
|
||||
checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config 0.22.6",
|
||||
"pyo3-build-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.22.6"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453"
|
||||
checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
@@ -5647,13 +5643,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.22.6"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe"
|
||||
checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"pyo3-build-config 0.22.6",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn 2.0.98",
|
||||
]
|
||||
|
||||
46
Cargo.toml
46
Cargo.toml
@@ -21,32 +21,30 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.24.0", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.24.0-beta.1" }
|
||||
lance-io = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-index = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-linalg = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-table = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-testing = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-datafusion = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance-encoding = { version = "=0.24.0", tag = "v0.24.0-beta.1", git = "https://github.com/lancedb/lance.git" }
|
||||
lance = { "version" = "=0.24.1", "features" = ["dynamodb"] }
|
||||
lance-io = { version = "=0.24.1" }
|
||||
lance-index = { version = "=0.24.1" }
|
||||
lance-linalg = { version = "=0.24.1" }
|
||||
lance-table = { version = "=0.24.1" }
|
||||
lance-testing = { version = "=0.24.1" }
|
||||
lance-datafusion = { version = "=0.24.1" }
|
||||
lance-encoding = { version = "=0.24.1" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "53.2", optional = false }
|
||||
arrow-array = "53.2"
|
||||
arrow-data = "53.2"
|
||||
arrow-ipc = "53.2"
|
||||
arrow-ord = "53.2"
|
||||
arrow-schema = "53.2"
|
||||
arrow-arith = "53.2"
|
||||
arrow-cast = "53.2"
|
||||
arrow = { version = "54.1", optional = false }
|
||||
arrow-array = "54.1"
|
||||
arrow-data = "54.1"
|
||||
arrow-ipc = "54.1"
|
||||
arrow-ord = "54.1"
|
||||
arrow-schema = "54.1"
|
||||
arrow-arith = "54.1"
|
||||
arrow-cast = "54.1"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "44.0", default-features = false }
|
||||
datafusion-catalog = "44.0"
|
||||
datafusion-common = { version = "44.0", default-features = false }
|
||||
datafusion-execution = "44.0"
|
||||
datafusion-expr = "44.0"
|
||||
datafusion-physical-plan = "44.0"
|
||||
datafusion = { version = "45.0", default-features = false }
|
||||
datafusion-catalog = "45.0"
|
||||
datafusion-common = { version = "45.0", default-features = false }
|
||||
datafusion-execution = "45.0"
|
||||
datafusion-expr = "45.0"
|
||||
datafusion-physical-plan = "45.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
|
||||
41
ci/parse_requirements.py
Normal file
41
ci/parse_requirements.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import argparse
|
||||
import toml
|
||||
|
||||
|
||||
def parse_dependencies(pyproject_path, extras=None):
|
||||
with open(pyproject_path, "r") as file:
|
||||
pyproject = toml.load(file)
|
||||
|
||||
dependencies = pyproject.get("project", {}).get("dependencies", [])
|
||||
for dependency in dependencies:
|
||||
print(dependency)
|
||||
|
||||
optional_dependencies = pyproject.get("project", {}).get(
|
||||
"optional-dependencies", {}
|
||||
)
|
||||
|
||||
if extras:
|
||||
for extra in extras.split(","):
|
||||
for dep in optional_dependencies.get(extra, []):
|
||||
print(dep)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate requirements.txt from pyproject.toml"
|
||||
)
|
||||
parser.add_argument("path", type=str, help="Path to pyproject.toml")
|
||||
parser.add_argument(
|
||||
"--extras",
|
||||
type=str,
|
||||
help="Comma-separated list of extras to include",
|
||||
default="",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
parse_dependencies(args.path, args.extras)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -175,6 +175,8 @@ maybeDescribe("storage_options", () => {
|
||||
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
|
||||
it("can configure encryption at connection and table level", async () => {
|
||||
@@ -210,6 +212,8 @@ maybeDescribe("storage_options", () => {
|
||||
await table.add([{ a: 2, b: 3 }]);
|
||||
|
||||
await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -298,5 +302,32 @@ maybeDescribe("DynamoDB Lock", () => {
|
||||
|
||||
const rowCount = await table.countRows();
|
||||
expect(rowCount).toBe(6);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
|
||||
it("clears dynamodb state after dropping all tables", async () => {
|
||||
const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
|
||||
const db = await connect(uri, {
|
||||
storageOptions: CONFIG,
|
||||
readConsistencyInterval: 0,
|
||||
});
|
||||
|
||||
await db.createTable("foo", [{ a: 1, b: 2 }]);
|
||||
await db.createTable("bar", [{ a: 1, b: 2 }]);
|
||||
|
||||
let tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual(["bar", "foo"]);
|
||||
|
||||
await db.dropAllTables();
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
|
||||
// We can create a new table with the same name as the one we dropped.
|
||||
await db.createTable("foo", [{ a: 1, b: 2 }]);
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual(["foo"]);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
});
|
||||
|
||||
56
pyright_report.csv
Normal file
56
pyright_report.csv
Normal file
@@ -0,0 +1,56 @@
|
||||
file,errors,warnings,total_issues
|
||||
python/python/lancedb/arrow.py,0,0,0
|
||||
python/python/lancedb/background_loop.py,0,0,0
|
||||
python/python/lancedb/embeddings/__init__.py,0,0,0
|
||||
python/python/lancedb/exceptions.py,0,0,0
|
||||
python/python/lancedb/index.py,0,0,0
|
||||
python/python/lancedb/integrations/__init__.py,0,0,0
|
||||
python/python/lancedb/remote/__init__.py,0,0,0
|
||||
python/python/lancedb/remote/errors.py,0,0,0
|
||||
python/python/lancedb/rerankers/__init__.py,0,0,0
|
||||
python/python/lancedb/rerankers/answerdotai.py,0,0,0
|
||||
python/python/lancedb/rerankers/cohere.py,0,0,0
|
||||
python/python/lancedb/rerankers/colbert.py,0,0,0
|
||||
python/python/lancedb/rerankers/cross_encoder.py,0,0,0
|
||||
python/python/lancedb/rerankers/openai.py,0,0,0
|
||||
python/python/lancedb/rerankers/util.py,0,0,0
|
||||
python/python/lancedb/rerankers/voyageai.py,0,0,0
|
||||
python/python/lancedb/schema.py,0,0,0
|
||||
python/python/lancedb/types.py,0,0,0
|
||||
python/python/lancedb/__init__.py,0,1,1
|
||||
python/python/lancedb/conftest.py,1,0,1
|
||||
python/python/lancedb/embeddings/bedrock.py,1,0,1
|
||||
python/python/lancedb/merge.py,1,0,1
|
||||
python/python/lancedb/rerankers/base.py,1,0,1
|
||||
python/python/lancedb/rerankers/jinaai.py,0,1,1
|
||||
python/python/lancedb/rerankers/linear_combination.py,1,0,1
|
||||
python/python/lancedb/embeddings/instructor.py,2,0,2
|
||||
python/python/lancedb/embeddings/openai.py,2,0,2
|
||||
python/python/lancedb/embeddings/watsonx.py,2,0,2
|
||||
python/python/lancedb/embeddings/registry.py,3,0,3
|
||||
python/python/lancedb/embeddings/sentence_transformers.py,3,0,3
|
||||
python/python/lancedb/integrations/pyarrow.py,3,0,3
|
||||
python/python/lancedb/rerankers/rrf.py,3,0,3
|
||||
python/python/lancedb/dependencies.py,4,0,4
|
||||
python/python/lancedb/embeddings/gemini_text.py,4,0,4
|
||||
python/python/lancedb/embeddings/gte.py,4,0,4
|
||||
python/python/lancedb/embeddings/gte_mlx_model.py,4,0,4
|
||||
python/python/lancedb/embeddings/ollama.py,4,0,4
|
||||
python/python/lancedb/embeddings/transformers.py,4,0,4
|
||||
python/python/lancedb/remote/db.py,5,0,5
|
||||
python/python/lancedb/context.py,6,0,6
|
||||
python/python/lancedb/embeddings/cohere.py,6,0,6
|
||||
python/python/lancedb/fts.py,6,0,6
|
||||
python/python/lancedb/db.py,9,0,9
|
||||
python/python/lancedb/embeddings/utils.py,9,0,9
|
||||
python/python/lancedb/common.py,11,0,11
|
||||
python/python/lancedb/util.py,13,0,13
|
||||
python/python/lancedb/embeddings/imagebind.py,14,0,14
|
||||
python/python/lancedb/embeddings/voyageai.py,15,0,15
|
||||
python/python/lancedb/embeddings/open_clip.py,16,0,16
|
||||
python/python/lancedb/pydantic.py,16,0,16
|
||||
python/python/lancedb/embeddings/base.py,17,0,17
|
||||
python/python/lancedb/embeddings/jinaai.py,18,1,19
|
||||
python/python/lancedb/remote/table.py,23,0,23
|
||||
python/python/lancedb/query.py,47,1,48
|
||||
python/python/lancedb/table.py,61,0,61
|
||||
|
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.21.0-beta.1"
|
||||
current_version = "0.21.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -8,9 +8,9 @@ For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
|
||||
The Python package is a wrapper around the Rust library, `lancedb`. We use
|
||||
[pyo3](https://pyo3.rs/) to create the bindings between Rust and Python.
|
||||
|
||||
* `src/`: Rust bindings source code
|
||||
* `python/lancedb`: Python package source code
|
||||
* `python/tests`: Unit tests
|
||||
- `src/`: Rust bindings source code
|
||||
- `python/lancedb`: Python package source code
|
||||
- `python/tests`: Unit tests
|
||||
|
||||
## Development environment
|
||||
|
||||
@@ -61,6 +61,12 @@ make test
|
||||
make doctest
|
||||
```
|
||||
|
||||
Run type checking:
|
||||
|
||||
```shell
|
||||
make typecheck
|
||||
```
|
||||
|
||||
To run a single test, you can use the `pytest` command directly. Provide the path
|
||||
to the test file, and optionally the test name after `::`.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.21.0-beta.1"
|
||||
version = "0.21.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -14,21 +14,20 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "53.2", features = ["pyarrow"] }
|
||||
arrow = { version = "54.1", features = ["pyarrow"] }
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.22.2", features = [
|
||||
"extension-module",
|
||||
"abi3-py39",
|
||||
"gil-refs"
|
||||
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.23", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
pyo3-async-runtimes = { version = "0.22", features = ["attributes", "tokio-runtime"] }
|
||||
pin-project = "1.1.5"
|
||||
futures.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.20.3", features = [
|
||||
pyo3-build-config = { version = "0.23", features = [
|
||||
"extension-module",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
@@ -23,6 +23,10 @@ check: ## Check formatting and lints.
|
||||
fix: ## Fix python lints
|
||||
ruff check python --fix
|
||||
|
||||
.PHONY: typecheck
|
||||
typecheck: ## Run type checking with pyright.
|
||||
pyright
|
||||
|
||||
.PHONY: doctest
|
||||
doctest: ## Run documentation tests.
|
||||
pytest --doctest-modules python/lancedb
|
||||
@@ -30,3 +34,7 @@ doctest: ## Run documentation tests.
|
||||
.PHONY: test
|
||||
test: ## Run tests.
|
||||
pytest python/tests -vv --durations=10 -m "not slow and not s3_test"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf data
|
||||
|
||||
@@ -54,7 +54,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance~=0.23.2",
|
||||
"pylance>=0.23.2",
|
||||
]
|
||||
dev = [
|
||||
"ruff",
|
||||
@@ -92,7 +92,7 @@ requires = ["maturin>=1.4"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["F", "E", "W", "G", "TCH", "PERF"]
|
||||
select = ["F", "E", "W", "G", "PERF"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
|
||||
@@ -103,5 +103,28 @@ markers = [
|
||||
]
|
||||
|
||||
[tool.pyright]
|
||||
include = ["python/lancedb/table.py"]
|
||||
include = [
|
||||
"python/lancedb/index.py",
|
||||
"python/lancedb/rerankers/util.py",
|
||||
"python/lancedb/rerankers/__init__.py",
|
||||
"python/lancedb/rerankers/voyageai.py",
|
||||
"python/lancedb/rerankers/jinaai.py",
|
||||
"python/lancedb/rerankers/openai.py",
|
||||
"python/lancedb/rerankers/cross_encoder.py",
|
||||
"python/lancedb/rerankers/colbert.py",
|
||||
"python/lancedb/rerankers/answerdotai.py",
|
||||
"python/lancedb/rerankers/cohere.py",
|
||||
"python/lancedb/arrow.py",
|
||||
"python/lancedb/__init__.py",
|
||||
"python/lancedb/types.py",
|
||||
"python/lancedb/integrations/__init__.py",
|
||||
"python/lancedb/exceptions.py",
|
||||
"python/lancedb/background_loop.py",
|
||||
"python/lancedb/schema.py",
|
||||
"python/lancedb/remote/__init__.py",
|
||||
"python/lancedb/remote/errors.py",
|
||||
"python/lancedb/embeddings/__init__.py",
|
||||
"python/lancedb/_lancedb.pyi",
|
||||
]
|
||||
exclude = ["python/tests/"]
|
||||
pythonVersion = "3.12"
|
||||
|
||||
@@ -14,6 +14,7 @@ from ._lancedb import connect as lancedb_connect
|
||||
from .common import URI, sanitize_uri
|
||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||
from .remote import ClientConfig
|
||||
from .remote.db import RemoteDBConnection
|
||||
from .schema import vector
|
||||
from .table import AsyncTable
|
||||
|
||||
@@ -86,8 +87,6 @@ def connect(
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
from .remote.db import RemoteDBConnection
|
||||
|
||||
if isinstance(uri, str) and uri.startswith("db://"):
|
||||
if api_key is None:
|
||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Dict, List, Optional, Tuple, Any, Union, Literal
|
||||
import pyarrow as pa
|
||||
|
||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||
from .remote import ClientConfig
|
||||
|
||||
class Connection(object):
|
||||
uri: str
|
||||
@@ -71,11 +72,15 @@ async def connect(
|
||||
region: Optional[str],
|
||||
host_override: Optional[str],
|
||||
read_consistency_interval: Optional[float],
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
) -> Connection: ...
|
||||
|
||||
class RecordBatchStream:
|
||||
@property
|
||||
def schema(self) -> pa.Schema: ...
|
||||
async def next(self) -> Optional[pa.RecordBatch]: ...
|
||||
def __aiter__(self) -> "RecordBatchStream": ...
|
||||
async def __anext__(self) -> pa.RecordBatch: ...
|
||||
|
||||
class Query:
|
||||
def where(self, filter: str): ...
|
||||
|
||||
@@ -9,7 +9,8 @@ from typing import Any, Dict, Iterable, List, Optional, Union
|
||||
from urllib.parse import urlparse
|
||||
import warnings
|
||||
|
||||
from lancedb import connect_async
|
||||
# Remove this import to fix circular dependency
|
||||
# from lancedb import connect_async
|
||||
from lancedb.remote import ClientConfig
|
||||
import pyarrow as pa
|
||||
from overrides import override
|
||||
@@ -78,6 +79,9 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
self.client_config = client_config
|
||||
|
||||
# Import connect_async here to avoid circular import
|
||||
from lancedb import connect_async
|
||||
|
||||
self._conn = LOOP.run(
|
||||
connect_async(
|
||||
db_url,
|
||||
|
||||
@@ -76,12 +76,21 @@ if TYPE_CHECKING:
|
||||
from .index import IndexConfig
|
||||
import pandas
|
||||
import PIL
|
||||
from .types import (
|
||||
QueryType,
|
||||
OnBadVectorsType,
|
||||
AddMode,
|
||||
CreateMode,
|
||||
VectorIndexType,
|
||||
ScalarIndexType,
|
||||
BaseTokenizerType,
|
||||
DistanceType,
|
||||
)
|
||||
|
||||
|
||||
pd = safe_import_pandas()
|
||||
pl = safe_import_polars()
|
||||
|
||||
QueryType = Literal["vector", "fts", "hybrid", "auto"]
|
||||
|
||||
|
||||
def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
|
||||
from lancedb.dependencies import datasets
|
||||
@@ -178,7 +187,7 @@ def _sanitize_data(
|
||||
data: "DATA",
|
||||
target_schema: Optional[pa.Schema] = None,
|
||||
metadata: Optional[dict] = None, # embedding metadata
|
||||
on_bad_vectors: Literal["error", "drop", "fill", "null"] = "error",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
*,
|
||||
allow_subschema: bool = False,
|
||||
@@ -324,7 +333,7 @@ def sanitize_create_table(
|
||||
data,
|
||||
schema: Union[pa.Schema, LanceModel],
|
||||
metadata=None,
|
||||
on_bad_vectors: str = "error",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
if inspect.isclass(schema) and issubclass(schema, LanceModel):
|
||||
@@ -576,9 +585,7 @@ class Table(ABC):
|
||||
accelerator: Optional[str] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
*,
|
||||
index_type: Literal[
|
||||
"IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
|
||||
] = "IVF_PQ",
|
||||
index_type: VectorIndexType = "IVF_PQ",
|
||||
num_bits: int = 8,
|
||||
max_iterations: int = 50,
|
||||
sample_rate: int = 256,
|
||||
@@ -643,7 +650,7 @@ class Table(ABC):
|
||||
column: str,
|
||||
*,
|
||||
replace: bool = True,
|
||||
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST"] = "BTREE",
|
||||
index_type: ScalarIndexType = "BTREE",
|
||||
):
|
||||
"""Create a scalar index on a column.
|
||||
|
||||
@@ -708,7 +715,7 @@ class Table(ABC):
|
||||
tokenizer_name: Optional[str] = None,
|
||||
with_position: bool = True,
|
||||
# tokenizer configs:
|
||||
base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple",
|
||||
base_tokenizer: BaseTokenizerType = "simple",
|
||||
language: str = "English",
|
||||
max_token_length: Optional[int] = 40,
|
||||
lower_case: bool = True,
|
||||
@@ -777,8 +784,8 @@ class Table(ABC):
|
||||
def add(
|
||||
self,
|
||||
data: DATA,
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
"""Add more data to the [Table](Table).
|
||||
@@ -960,7 +967,7 @@ class Table(ABC):
|
||||
self,
|
||||
merge: LanceMergeInsertBuilder,
|
||||
new_data: DATA,
|
||||
on_bad_vectors: str,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
): ...
|
||||
|
||||
@@ -1572,10 +1579,10 @@ class LanceTable(Table):
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
metric: DistanceType = "l2",
|
||||
num_partitions=None,
|
||||
num_sub_vectors=None,
|
||||
vector_column_name=VECTOR_COLUMN_NAME,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
accelerator: Optional[str] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
@@ -1661,7 +1668,7 @@ class LanceTable(Table):
|
||||
column: str,
|
||||
*,
|
||||
replace: bool = True,
|
||||
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST"] = "BTREE",
|
||||
index_type: ScalarIndexType = "BTREE",
|
||||
):
|
||||
if index_type == "BTREE":
|
||||
config = BTree()
|
||||
@@ -1686,7 +1693,7 @@ class LanceTable(Table):
|
||||
tokenizer_name: Optional[str] = None,
|
||||
with_position: bool = True,
|
||||
# tokenizer configs:
|
||||
base_tokenizer: str = "simple",
|
||||
base_tokenizer: BaseTokenizerType = "simple",
|
||||
language: str = "English",
|
||||
max_token_length: Optional[int] = 40,
|
||||
lower_case: bool = True,
|
||||
@@ -1820,8 +1827,8 @@ class LanceTable(Table):
|
||||
def add(
|
||||
self,
|
||||
data: DATA,
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
"""Add data to the table.
|
||||
@@ -2059,7 +2066,7 @@ class LanceTable(Table):
|
||||
query_type,
|
||||
vector_column_name=vector_column_name,
|
||||
ordering_field_name=ordering_field_name,
|
||||
fts_columns=fts_columns,
|
||||
fts_columns=fts_columns or [],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -2069,13 +2076,13 @@ class LanceTable(Table):
|
||||
name: str,
|
||||
data: Optional[DATA] = None,
|
||||
schema: Optional[pa.Schema] = None,
|
||||
mode: Literal["create", "overwrite"] = "create",
|
||||
mode: CreateMode = "create",
|
||||
exist_ok: bool = False,
|
||||
on_bad_vectors: str = "error",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options: Optional[Dict[str, str | bool]] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
):
|
||||
@@ -2229,7 +2236,7 @@ class LanceTable(Table):
|
||||
self,
|
||||
merge: LanceMergeInsertBuilder,
|
||||
new_data: DATA,
|
||||
on_bad_vectors: str,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
):
|
||||
LOOP.run(self._table._do_merge(merge, new_data, on_bad_vectors, fill_value))
|
||||
@@ -2880,7 +2887,7 @@ class AsyncTable:
|
||||
data: DATA,
|
||||
*,
|
||||
mode: Optional[Literal["append", "overwrite"]] = "append",
|
||||
on_bad_vectors: Optional[str] = None,
|
||||
on_bad_vectors: Optional[OnBadVectorsType] = None,
|
||||
fill_value: Optional[float] = None,
|
||||
):
|
||||
"""Add more data to the [Table](Table).
|
||||
@@ -2986,7 +2993,7 @@ class AsyncTable:
|
||||
@overload
|
||||
async def search(
|
||||
self,
|
||||
query: Optional[Union[str]] = None,
|
||||
query: Optional[str] = None,
|
||||
vector_column_name: Optional[str] = None,
|
||||
query_type: Literal["auto"] = ...,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
@@ -2996,7 +3003,7 @@ class AsyncTable:
|
||||
@overload
|
||||
async def search(
|
||||
self,
|
||||
query: Optional[Union[str]] = None,
|
||||
query: Optional[str] = None,
|
||||
vector_column_name: Optional[str] = None,
|
||||
query_type: Literal["hybrid"] = ...,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
@@ -3040,7 +3047,7 @@ class AsyncTable:
|
||||
query_type: QueryType = "auto",
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||
) -> AsyncQuery:
|
||||
) -> Union[AsyncHybridQuery | AsyncFTSQuery | AsyncVectorQuery]:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector. We currently support [vector search][search]
|
||||
and [full-text search][experimental-full-text-search].
|
||||
@@ -3279,7 +3286,7 @@ class AsyncTable:
|
||||
self,
|
||||
merge: LanceMergeInsertBuilder,
|
||||
new_data: DATA,
|
||||
on_bad_vectors: str,
|
||||
on_bad_vectors: OnBadVectorsType,
|
||||
fill_value: float,
|
||||
):
|
||||
schema = await self.schema()
|
||||
|
||||
28
python/python/lancedb/types.py
Normal file
28
python/python/lancedb/types.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
from typing import Literal
|
||||
|
||||
# Query type literals
|
||||
QueryType = Literal["vector", "fts", "hybrid", "auto"]
|
||||
|
||||
# Distance type literals
|
||||
DistanceType = Literal["l2", "cosine", "dot"]
|
||||
DistanceTypeWithHamming = Literal["l2", "cosine", "dot", "hamming"]
|
||||
|
||||
# Vector handling literals
|
||||
OnBadVectorsType = Literal["error", "drop", "fill", "null"]
|
||||
|
||||
# Mode literals
|
||||
AddMode = Literal["append", "overwrite"]
|
||||
CreateMode = Literal["create", "overwrite"]
|
||||
|
||||
# Index type literals
|
||||
VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"]
|
||||
ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
|
||||
IndexType = Literal[
|
||||
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
|
||||
]
|
||||
|
||||
# Tokenizer literals
|
||||
BaseTokenizerType = Literal["simple", "raw", "whitespace"]
|
||||
@@ -419,17 +419,17 @@ def test_embedding_function_safe_model_dump(embedding_type):
|
||||
|
||||
dumped_model = model.safe_model_dump()
|
||||
|
||||
assert all(
|
||||
not k.startswith("_") for k in dumped_model.keys()
|
||||
), f"{embedding_type}: Dumped model contains keys starting with underscore"
|
||||
assert all(not k.startswith("_") for k in dumped_model.keys()), (
|
||||
f"{embedding_type}: Dumped model contains keys starting with underscore"
|
||||
)
|
||||
|
||||
assert (
|
||||
"max_retries" in dumped_model
|
||||
), f"{embedding_type}: Essential field 'max_retries' is missing from dumped model"
|
||||
assert "max_retries" in dumped_model, (
|
||||
f"{embedding_type}: Essential field 'max_retries' is missing from dumped model"
|
||||
)
|
||||
|
||||
assert isinstance(
|
||||
dumped_model, dict
|
||||
), f"{embedding_type}: Dumped model is not a dictionary"
|
||||
assert isinstance(dumped_model, dict), (
|
||||
f"{embedding_type}: Dumped model is not a dictionary"
|
||||
)
|
||||
|
||||
for key in model.__dict__:
|
||||
if key.startswith("_"):
|
||||
|
||||
@@ -129,6 +129,6 @@ def test_normalize_scores():
|
||||
if invert:
|
||||
expected = pc.subtract(1.0, expected)
|
||||
|
||||
assert pc.equal(
|
||||
result, expected
|
||||
), f"Expected {expected} but got {result} for invert={invert}"
|
||||
assert pc.equal(result, expected), (
|
||||
f"Expected {expected} but got {result} for invert={invert}"
|
||||
)
|
||||
|
||||
@@ -784,8 +784,7 @@ async def test_query_search_auto(mem_db_async: AsyncConnection):
|
||||
with pytest.raises(
|
||||
Exception,
|
||||
match=(
|
||||
"Cannot perform full text search unless an INVERTED index has "
|
||||
"been created"
|
||||
"Cannot perform full text search unless an INVERTED index has been created"
|
||||
),
|
||||
):
|
||||
query = await (await tbl2.search("0.1")).to_arrow()
|
||||
|
||||
@@ -131,9 +131,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
"represents the relevance of the result to the query & should "
|
||||
"be descending."
|
||||
)
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
|
||||
# Vector search setting
|
||||
result = (
|
||||
@@ -143,9 +143,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
.to_arrow()
|
||||
)
|
||||
assert len(result) == 30
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
result_explicit = (
|
||||
table.search(query_vector, vector_column_name="vector")
|
||||
.rerank(reranker=reranker, query_string=query)
|
||||
@@ -168,9 +168,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
.to_arrow()
|
||||
)
|
||||
assert len(result) > 0
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
|
||||
# empty FTS results
|
||||
query = "abcxyz" * 100
|
||||
@@ -185,9 +185,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
|
||||
# should return _relevance_score column
|
||||
assert "_relevance_score" in result.column_names
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
|
||||
# Multi-vector search setting
|
||||
rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
|
||||
@@ -262,9 +262,9 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
|
||||
"represents the relevance of the result to the query & should "
|
||||
"be descending."
|
||||
)
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
|
||||
# Test with empty FTS results
|
||||
query = "abcxyz" * 100
|
||||
@@ -278,9 +278,9 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
|
||||
)
|
||||
# should return _relevance_score column
|
||||
assert "_relevance_score" in result.column_names
|
||||
assert np.all(
|
||||
np.diff(result.column("_relevance_score").to_numpy()) <= 0
|
||||
), ascending_relevance_err
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
ascending_relevance_err
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
|
||||
@@ -252,3 +252,27 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
|
||||
db.drop_table("test_ddb_sync")
|
||||
assert db.table_names() == []
|
||||
db.drop_database()
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_s3_dynamodb_drop_all_tables(s3_bucket: str, commit_table: str, monkeypatch):
|
||||
for key, value in CONFIG.items():
|
||||
monkeypatch.setenv(key.upper(), value)
|
||||
|
||||
uri = f"s3+ddb://{s3_bucket}/test2?ddbTableName={commit_table}"
|
||||
db = lancedb.connect(uri, read_consistency_interval=timedelta(0))
|
||||
data = pa.table({"x": ["a", "b", "c"]})
|
||||
|
||||
db.create_table("foo", data)
|
||||
db.create_table("bar", data)
|
||||
assert db.table_names() == ["bar", "foo"]
|
||||
|
||||
# dropping all tables should clear multiple tables
|
||||
db.drop_all_tables()
|
||||
assert db.table_names() == []
|
||||
|
||||
# create a new table with the same name to ensure DDB is clean
|
||||
db.create_table("foo", data)
|
||||
assert db.table_names() == ["foo"]
|
||||
|
||||
db.drop_all_tables()
|
||||
|
||||
@@ -43,7 +43,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
} => Python::with_gil(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import_bound(intern!(py, "lancedb.remote.errors"))?
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr(intern!(py, "HttpError"))?;
|
||||
let err = http_err_cls.call1((
|
||||
message,
|
||||
@@ -63,7 +63,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
|
||||
Err(PyErr::from_value_bound(err))
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
LanceError::Retry {
|
||||
request_id,
|
||||
@@ -85,7 +85,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
|
||||
let message = err.to_string();
|
||||
let retry_error_cls = py
|
||||
.import_bound(intern!(py, "lancedb.remote.errors"))?
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr("RetryError")?;
|
||||
let err = retry_error_cls.call1((
|
||||
message,
|
||||
@@ -100,7 +100,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
))?;
|
||||
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
Err(PyErr::from_value_bound(err))
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
_ => self.runtime_error(),
|
||||
},
|
||||
@@ -127,18 +127,16 @@ fn http_from_rust_error(
|
||||
status_code: Option<u16>,
|
||||
) -> PyResult<PyErr> {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import_bound("lancedb.remote.errors")?
|
||||
.getattr("HttpError")?;
|
||||
let http_err_cls = py.import("lancedb.remote.errors")?.getattr("HttpError")?;
|
||||
let py_err = http_err_cls.call1((message, request_id, status_code))?;
|
||||
|
||||
// Reset the traceback since it doesn't provide additional information.
|
||||
let py_err = py_err.call_method1(intern!(py, "with_traceback"), (PyNone::get_bound(py),))?;
|
||||
let py_err = py_err.call_method1(intern!(py, "with_traceback"), (PyNone::get(py),))?;
|
||||
|
||||
if let Some(cause) = err.source() {
|
||||
let cause_err = http_from_rust_error(py, cause, request_id, status_code)?;
|
||||
py_err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
|
||||
Ok(PyErr::from_value_bound(py_err))
|
||||
Ok(PyErr::from_value(py_err))
|
||||
}
|
||||
|
||||
@@ -7,29 +7,32 @@ use lancedb::index::{
|
||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
||||
Index as LanceDbIndex,
|
||||
};
|
||||
use pyo3::types::PyStringMethods;
|
||||
use pyo3::IntoPyObject;
|
||||
use pyo3::{
|
||||
exceptions::{PyKeyError, PyValueError},
|
||||
intern, pyclass, pymethods,
|
||||
types::PyAnyMethods,
|
||||
Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python,
|
||||
Bound, FromPyObject, PyAny, PyResult, Python,
|
||||
};
|
||||
|
||||
use crate::util::parse_distance_type;
|
||||
|
||||
pub fn class_name<'a>(ob: &'a Bound<'_, PyAny>) -> PyResult<&'a str> {
|
||||
let full_name: &str = ob
|
||||
pub fn class_name(ob: &'_ Bound<'_, PyAny>) -> PyResult<String> {
|
||||
let full_name = ob
|
||||
.getattr(intern!(ob.py(), "__class__"))?
|
||||
.getattr(intern!(ob.py(), "__name__"))?
|
||||
.extract()?;
|
||||
.getattr(intern!(ob.py(), "__name__"))?;
|
||||
let full_name = full_name.downcast()?.to_string_lossy();
|
||||
|
||||
match full_name.rsplit_once('.') {
|
||||
Some((_, name)) => Ok(name),
|
||||
None => Ok(full_name),
|
||||
Some((_, name)) => Ok(name.to_string()),
|
||||
None => Ok(full_name.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<LanceDbIndex> {
|
||||
if let Some(source) = source {
|
||||
match class_name(source)? {
|
||||
match class_name(source)?.as_str() {
|
||||
"BTree" => Ok(LanceDbIndex::BTree(BTreeIndexBuilder::default())),
|
||||
"Bitmap" => Ok(LanceDbIndex::Bitmap(Default::default())),
|
||||
"LabelList" => Ok(LanceDbIndex::LabelList(Default::default())),
|
||||
@@ -196,11 +199,11 @@ impl IndexConfig {
|
||||
|
||||
// For backwards-compatibility with the old sync SDK, we also support getting
|
||||
// attributes via __getitem__.
|
||||
pub fn __getitem__(&self, key: String, py: Python<'_>) -> PyResult<PyObject> {
|
||||
pub fn __getitem__<'a>(&self, key: String, py: Python<'a>) -> PyResult<Bound<'a, PyAny>> {
|
||||
match key.as_str() {
|
||||
"index_type" => Ok(self.index_type.clone().into_py(py)),
|
||||
"columns" => Ok(self.columns.clone().into_py(py)),
|
||||
"name" | "index_name" => Ok(self.name.clone().into_py(py)),
|
||||
"index_type" => Ok(self.index_type.clone().into_pyobject(py)?.into_any()),
|
||||
"columns" => Ok(self.columns.clone().into_pyobject(py)?.into_any()),
|
||||
"name" | "index_name" => Ok(self.name.clone().into_pyobject(py)?.into_any()),
|
||||
_ => Err(PyKeyError::new_err(format!("Invalid key: {}", key))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ use pyo3::{
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python, ToPyObject,
|
||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
use std::collections::HashMap;
|
||||
@@ -222,7 +222,7 @@ impl Table {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
Python::with_gil(|py| {
|
||||
let dict = PyDict::new_bound(py);
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
dict.set_item("index_type", stats.index_type.to_string())?;
|
||||
@@ -235,7 +235,7 @@ impl Table {
|
||||
dict.set_item("num_indices", num_indices)?;
|
||||
}
|
||||
|
||||
Ok(Some(dict.to_object(py)))
|
||||
Ok(Some(dict.unbind()))
|
||||
})
|
||||
} else {
|
||||
Ok(None)
|
||||
@@ -266,7 +266,7 @@ impl Table {
|
||||
versions
|
||||
.iter()
|
||||
.map(|v| {
|
||||
let dict = PyDict::new_bound(py);
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("version", v.version).unwrap();
|
||||
dict.set_item(
|
||||
"timestamp",
|
||||
@@ -275,14 +275,13 @@ impl Table {
|
||||
.unwrap();
|
||||
|
||||
let tup: Vec<(&String, &String)> = v.metadata.iter().collect();
|
||||
dict.set_item("metadata", tup.into_py_dict_bound(py))
|
||||
.unwrap();
|
||||
dict.to_object(py)
|
||||
dict.set_item("metadata", tup.into_py_dict(py)?).unwrap();
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.collect::<PyResult<Vec<_>>>()
|
||||
});
|
||||
|
||||
Ok(versions_as_dict)
|
||||
versions_as_dict
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -4,12 +4,14 @@
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
|
||||
pub use arrow_schema;
|
||||
use futures::{Stream, StreamExt};
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
|
||||
#[cfg(feature = "polars")]
|
||||
use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::{error::Result, Error};
|
||||
|
||||
/// An iterator of batches that also has a schema
|
||||
pub trait RecordBatchReader: Iterator<Item = Result<arrow_array::RecordBatch>> {
|
||||
@@ -65,6 +67,20 @@ impl<I: lance::io::RecordBatchStream + 'static> From<I> for SendableRecordBatchS
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SendableRecordBatchStreamExt {
|
||||
fn into_df_stream(self) -> datafusion_physical_plan::SendableRecordBatchStream;
|
||||
}
|
||||
|
||||
impl SendableRecordBatchStreamExt for SendableRecordBatchStream {
|
||||
fn into_df_stream(self) -> datafusion_physical_plan::SendableRecordBatchStream {
|
||||
let schema = self.schema();
|
||||
Box::pin(RecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
self.map_err(|ldb_err| DataFusionError::External(ldb_err.into())),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple RecordBatchStream formed from the two parts (stream + schema)
|
||||
#[pin_project::pin_project]
|
||||
pub struct SimpleRecordBatchStream<S: Stream<Item = Result<arrow_array::RecordBatch>>> {
|
||||
@@ -101,7 +117,7 @@ impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> RecordBatchStream
|
||||
/// used in methods like [`crate::connection::Connection::create_table`]
|
||||
/// or [`crate::table::Table::add`]
|
||||
pub trait IntoArrow {
|
||||
/// Convert the data into an Arrow array
|
||||
/// Convert the data into an iterator of Arrow batches
|
||||
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>>;
|
||||
}
|
||||
|
||||
@@ -113,11 +129,38 @@ impl<T: arrow_array::RecordBatchReader + Send + 'static> IntoArrow for T {
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for converting incoming data to Arrow asynchronously
|
||||
///
|
||||
/// Serves the same purpose as [`IntoArrow`], but for asynchronous data.
|
||||
///
|
||||
/// Note: Arrow has no async equivalent to RecordBatchReader and so
|
||||
pub trait IntoArrowStream {
|
||||
/// Convert the data into a stream of Arrow batches
|
||||
fn into_arrow(self) -> Result<SendableRecordBatchStream>;
|
||||
}
|
||||
|
||||
impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> SimpleRecordBatchStream<S> {
|
||||
pub fn new(stream: S, schema: Arc<arrow_schema::Schema>) -> Self {
|
||||
Self { schema, stream }
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoArrowStream for SendableRecordBatchStream {
|
||||
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoArrowStream for datafusion_physical_plan::SendableRecordBatchStream {
|
||||
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema();
|
||||
let stream = self.map_err(|df_err| Error::Runtime {
|
||||
message: df_err.to_string(),
|
||||
});
|
||||
Ok(Box::pin(SimpleRecordBatchStream::new(stream, schema)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "polars")]
|
||||
/// An iterator of record batches formed from a Polars DataFrame.
|
||||
pub struct PolarsDataFrameRecordBatchReader {
|
||||
|
||||
@@ -11,7 +11,7 @@ use arrow_schema::{Field, SchemaRef};
|
||||
use lance::dataset::ReadParams;
|
||||
use object_store::aws::AwsCredential;
|
||||
|
||||
use crate::arrow::IntoArrow;
|
||||
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
||||
use crate::database::listing::{
|
||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||
};
|
||||
@@ -75,6 +75,14 @@ impl IntoArrow for NoData {
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the value given from the initial CreateTableBuilder::new call
|
||||
// and defers errors until `execute` is called
|
||||
enum CreateTableBuilderInitialData {
|
||||
None,
|
||||
Iterator(Result<Box<dyn RecordBatchReader + Send>>),
|
||||
Stream(Result<SendableRecordBatchStream>),
|
||||
}
|
||||
|
||||
/// A builder for configuring a [`Connection::create_table`] operation
|
||||
pub struct CreateTableBuilder<const HAS_DATA: bool> {
|
||||
parent: Arc<dyn Database>,
|
||||
@@ -83,7 +91,7 @@ pub struct CreateTableBuilder<const HAS_DATA: bool> {
|
||||
request: CreateTableRequest,
|
||||
// This is a bit clumsy but we defer errors until `execute` is called
|
||||
// to maintain backwards compatibility
|
||||
data: Option<Result<Box<dyn RecordBatchReader + Send>>>,
|
||||
data: CreateTableBuilderInitialData,
|
||||
}
|
||||
|
||||
// Builder methods that only apply when we have initial data
|
||||
@@ -103,7 +111,26 @@ impl CreateTableBuilder<true> {
|
||||
),
|
||||
embeddings: Vec::new(),
|
||||
embedding_registry,
|
||||
data: Some(data.into_arrow()),
|
||||
data: CreateTableBuilderInitialData::Iterator(data.into_arrow()),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_streaming<T: IntoArrowStream>(
|
||||
parent: Arc<dyn Database>,
|
||||
name: String,
|
||||
data: T,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
) -> Self {
|
||||
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
||||
Self {
|
||||
parent,
|
||||
request: CreateTableRequest::new(
|
||||
name,
|
||||
CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||
),
|
||||
embeddings: Vec::new(),
|
||||
embedding_registry,
|
||||
data: CreateTableBuilderInitialData::Stream(data.into_arrow()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,17 +152,37 @@ impl CreateTableBuilder<true> {
|
||||
}
|
||||
|
||||
fn into_request(self) -> Result<CreateTableRequest> {
|
||||
let data = if self.embeddings.is_empty() {
|
||||
self.data.unwrap()?
|
||||
if self.embeddings.is_empty() {
|
||||
match self.data {
|
||||
CreateTableBuilderInitialData::Iterator(maybe_iter) => {
|
||||
let data = maybe_iter?;
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Data(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
CreateTableBuilderInitialData::None => {
|
||||
unreachable!("No data provided for CreateTableBuilder<true>")
|
||||
}
|
||||
CreateTableBuilderInitialData::Stream(maybe_stream) => {
|
||||
let data = maybe_stream?;
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::StreamingData(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let data = self.data.unwrap()?;
|
||||
Box::new(WithEmbeddings::new(data, self.embeddings))
|
||||
};
|
||||
let req = self.request;
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Data(data),
|
||||
..req
|
||||
})
|
||||
let CreateTableBuilderInitialData::Iterator(maybe_iter) = self.data else {
|
||||
return Err(Error::NotSupported { message: "Creating a table with embeddings is currently not support when the input is streaming".to_string() });
|
||||
};
|
||||
let data = maybe_iter?;
|
||||
let data = Box::new(WithEmbeddings::new(data, self.embeddings));
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Data(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,7 +198,7 @@ impl CreateTableBuilder<false> {
|
||||
Self {
|
||||
parent,
|
||||
request: CreateTableRequest::new(name, CreateTableData::Empty(table_definition)),
|
||||
data: None,
|
||||
data: CreateTableBuilderInitialData::None,
|
||||
embeddings: Vec::default(),
|
||||
embedding_registry,
|
||||
}
|
||||
@@ -432,7 +479,7 @@ impl Connection {
|
||||
TableNamesBuilder::new(self.internal.clone())
|
||||
}
|
||||
|
||||
/// Create a new table from data
|
||||
/// Create a new table from an iterator of data
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
@@ -451,6 +498,25 @@ impl Connection {
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new table from a stream of data
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `name` - The name of the table
|
||||
/// * `initial_data` - The initial data to write to the table
|
||||
pub fn create_table_streaming<T: IntoArrowStream>(
|
||||
&self,
|
||||
name: impl Into<String>,
|
||||
initial_data: T,
|
||||
) -> CreateTableBuilder<true> {
|
||||
CreateTableBuilder::<true>::new_streaming(
|
||||
self.internal.clone(),
|
||||
name.into(),
|
||||
initial_data,
|
||||
self.embedding_registry.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create an empty table with a given schema
|
||||
///
|
||||
/// # Parameters
|
||||
@@ -788,12 +854,16 @@ mod test_utils {
|
||||
mod tests {
|
||||
use std::fs::create_dir_all;
|
||||
|
||||
use arrow::compute::concat_batches;
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::{stream, TryStreamExt};
|
||||
use lance::error::{ArrowResult, DataFusionResult};
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::arrow::SimpleRecordBatchStream;
|
||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
@@ -976,6 +1046,63 @@ mod tests {
|
||||
assert_eq!(batches.len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_streaming() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
let batches = make_data().collect::<ArrowResult<Vec<_>>>().unwrap();
|
||||
|
||||
let schema = batches.first().unwrap().schema();
|
||||
let one_batch = concat_batches(&schema, batches.iter()).unwrap();
|
||||
|
||||
let ldb_stream = stream::iter(batches.clone().into_iter().map(Result::Ok));
|
||||
let ldb_stream: SendableRecordBatchStream =
|
||||
Box::pin(SimpleRecordBatchStream::new(ldb_stream, schema.clone()));
|
||||
|
||||
let tbl1 = db
|
||||
.create_table_streaming("one", ldb_stream)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let df_stream = stream::iter(batches.into_iter().map(DataFusionResult::Ok));
|
||||
let df_stream: datafusion_physical_plan::SendableRecordBatchStream =
|
||||
Box::pin(RecordBatchStreamAdapter::new(schema.clone(), df_stream));
|
||||
|
||||
let tbl2 = db
|
||||
.create_table_streaming("two", df_stream)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl1_data = tbl1
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl1_data = concat_batches(&schema, tbl1_data.iter()).unwrap();
|
||||
assert_eq!(tbl1_data, one_batch);
|
||||
|
||||
let tbl2_data = tbl2
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl2_data = concat_batches(&schema, tbl2_data.iter()).unwrap();
|
||||
assert_eq!(tbl2_data, one_batch);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drop_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
@@ -18,8 +18,13 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use async_trait::async_trait;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::stream;
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
|
||||
use crate::arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt};
|
||||
use crate::error::Result;
|
||||
use crate::table::{BaseTable, TableDefinition, WriteOptions};
|
||||
|
||||
@@ -81,12 +86,41 @@ impl Default for CreateTableMode {
|
||||
|
||||
/// The data to start a table or a schema to create an empty table
|
||||
pub enum CreateTableData {
|
||||
/// Creates a table using data, no schema required as it will be obtained from the data
|
||||
/// Creates a table using an iterator of data, the schema will be obtained from the data
|
||||
Data(Box<dyn RecordBatchReader + Send>),
|
||||
/// Creates a table using a stream of data, the schema will be obtained from the data
|
||||
StreamingData(SendableRecordBatchStream),
|
||||
/// Creates an empty table, the definition / schema must be provided separately
|
||||
Empty(TableDefinition),
|
||||
}
|
||||
|
||||
impl CreateTableData {
|
||||
pub fn schema(&self) -> Arc<arrow_schema::Schema> {
|
||||
match self {
|
||||
Self::Data(reader) => reader.schema(),
|
||||
Self::StreamingData(stream) => stream.schema(),
|
||||
Self::Empty(definition) => definition.schema.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StreamingWriteSource for CreateTableData {
|
||||
fn arrow_schema(&self) -> Arc<arrow_schema::Schema> {
|
||||
self.schema()
|
||||
}
|
||||
fn into_stream(self) -> datafusion_physical_plan::SendableRecordBatchStream {
|
||||
match self {
|
||||
Self::Data(reader) => reader.into_stream(),
|
||||
Self::StreamingData(stream) => stream.into_df_stream(),
|
||||
Self::Empty(table_definition) => {
|
||||
let schema = table_definition.schema.clone();
|
||||
Box::pin(RecordBatchStreamAdapter::new(schema, stream::empty()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A request to create a table
|
||||
pub struct CreateTableRequest {
|
||||
/// The name of the new table
|
||||
|
||||
@@ -7,9 +7,9 @@ use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use arrow_array::RecordBatchIterator;
|
||||
use lance::dataset::{ReadParams, WriteMode};
|
||||
use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry, WrappingObjectStore};
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_encoding::version::LanceFileVersion;
|
||||
use lance_table::io::commit::commit_handler_from_url;
|
||||
use object_store::local::LocalFileSystem;
|
||||
@@ -22,8 +22,8 @@ use crate::table::NativeTable;
|
||||
use crate::utils::validate_table_name;
|
||||
|
||||
use super::{
|
||||
BaseTable, CreateTableData, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||
OpenTableRequest, TableNamesRequest,
|
||||
BaseTable, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, OpenTableRequest,
|
||||
TableNamesRequest,
|
||||
};
|
||||
|
||||
/// File extension to indicate a lance table
|
||||
@@ -322,6 +322,37 @@ impl ListingDatabase {
|
||||
|
||||
Ok(uri)
|
||||
}
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options: Some(self.storage_options.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let mut uri = self.uri.clone();
|
||||
if let Some(query_string) = &self.query_string {
|
||||
uri.push_str(&format!("?{}", query_string));
|
||||
}
|
||||
let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params)).await?;
|
||||
for name in names {
|
||||
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||
let full_path = self.base_path.child(dir_name.clone());
|
||||
|
||||
commit_handler.delete(&full_path).await?;
|
||||
|
||||
self.object_store
|
||||
.remove_dir_all(full_path.clone())
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
// this error is not lance::Error::DatasetNotFound, as the method
|
||||
// `remove_dir_all` may be used to remove something not be a dataset
|
||||
lance::Error::NotFound { .. } => Error::TableNotFound {
|
||||
name: name.to_owned(),
|
||||
},
|
||||
_ => Error::from(err),
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -401,19 +432,12 @@ impl Database for ListingDatabase {
|
||||
write_params.mode = WriteMode::Overwrite;
|
||||
}
|
||||
|
||||
let data = match request.data {
|
||||
CreateTableData::Data(data) => data,
|
||||
CreateTableData::Empty(table_definition) => {
|
||||
let schema = table_definition.schema.clone();
|
||||
Box::new(RecordBatchIterator::new(vec![], schema))
|
||||
}
|
||||
};
|
||||
let data_schema = data.schema();
|
||||
let data_schema = request.data.arrow_schema();
|
||||
|
||||
match NativeTable::create(
|
||||
&table_uri,
|
||||
&request.name,
|
||||
data,
|
||||
request.data,
|
||||
self.store_wrapper.clone(),
|
||||
Some(write_params),
|
||||
self.read_consistency_interval,
|
||||
@@ -500,40 +524,12 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn drop_table(&self, name: &str) -> Result<()> {
|
||||
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||
let full_path = self.base_path.child(dir_name.clone());
|
||||
self.object_store
|
||||
.remove_dir_all(full_path.clone())
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
// this error is not lance::Error::DatasetNotFound,
|
||||
// as the method `remove_dir_all` may be used to remove something not be a dataset
|
||||
lance::Error::NotFound { .. } => Error::TableNotFound {
|
||||
name: name.to_owned(),
|
||||
},
|
||||
_ => Error::from(err),
|
||||
})?;
|
||||
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options: Some(self.storage_options.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let mut uri = self.uri.clone();
|
||||
if let Some(query_string) = &self.query_string {
|
||||
uri.push_str(&format!("?{}", query_string));
|
||||
}
|
||||
let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params))
|
||||
.await
|
||||
.unwrap();
|
||||
commit_handler.delete(&full_path).await.unwrap();
|
||||
Ok(())
|
||||
self.drop_tables(vec![name.to_string()]).await
|
||||
}
|
||||
|
||||
async fn drop_all_tables(&self) -> Result<()> {
|
||||
self.object_store
|
||||
.remove_dir_all(self.base_path.clone())
|
||||
.await?;
|
||||
Ok(())
|
||||
let tables = self.table_names(TableNamesRequest::default()).await?;
|
||||
self.drop_tables(tables).await
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
|
||||
@@ -164,6 +164,11 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let data = match request.data {
|
||||
CreateTableData::Data(data) => data,
|
||||
CreateTableData::StreamingData(_) => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Creating a remote table from a streaming source".to_string(),
|
||||
})
|
||||
}
|
||||
CreateTableData::Empty(table_definition) => {
|
||||
let schema = table_definition.schema.clone();
|
||||
Box::new(RecordBatchIterator::new(vec![], schema))
|
||||
|
||||
@@ -28,13 +28,13 @@ pub use lance::dataset::NewColumnTransform;
|
||||
pub use lance::dataset::ReadParams;
|
||||
pub use lance::dataset::Version;
|
||||
use lance::dataset::{
|
||||
Dataset, InsertBuilder, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode,
|
||||
WriteParams,
|
||||
InsertBuilder, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
|
||||
};
|
||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||
use lance::index::vector::utils::infer_vector_dim;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use lance_datafusion::exec::execute_plan;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
@@ -1264,7 +1264,7 @@ impl NativeTable {
|
||||
pub async fn create(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
batches: impl RecordBatchReader + Send + 'static,
|
||||
batches: impl StreamingWriteSource,
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
@@ -1279,7 +1279,9 @@ impl NativeTable {
|
||||
None => params,
|
||||
};
|
||||
|
||||
let dataset = Dataset::write(batches, uri, Some(params))
|
||||
let insert_builder = InsertBuilder::new(uri).with_params(¶ms);
|
||||
let dataset = insert_builder
|
||||
.execute_stream(batches)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
|
||||
@@ -1287,6 +1289,7 @@ impl NativeTable {
|
||||
},
|
||||
source => Error::Lance { source },
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
name: name.to_string(),
|
||||
uri: uri.to_string(),
|
||||
@@ -2391,8 +2394,9 @@ mod tests {
|
||||
use arrow_data::ArrayDataBuilder;
|
||||
use arrow_schema::{DataType, Field, Schema, TimeUnit};
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::{Dataset, WriteMode};
|
||||
use lance::dataset::WriteMode;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use lance::Dataset;
|
||||
use rand::Rng;
|
||||
use tempfile::tempdir;
|
||||
|
||||
@@ -2442,6 +2446,7 @@ mod tests {
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let batches = make_test_batches();
|
||||
let batches = Box::new(batches) as Box<dyn RecordBatchReader + Send>;
|
||||
let table = NativeTable::create(uri, "test", batches, None, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -151,7 +151,7 @@ impl TableProvider for BaseTableAdapter {
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_state: &dyn Session,
|
||||
state: &dyn Session,
|
||||
projection: Option<&Vec<usize>>,
|
||||
filters: &[Expr],
|
||||
limit: Option<usize>,
|
||||
@@ -177,9 +177,15 @@ impl TableProvider for BaseTableAdapter {
|
||||
// Need to override the default of 10
|
||||
query.limit = None;
|
||||
}
|
||||
|
||||
let options = QueryExecutionOptions {
|
||||
max_batch_length: state.config().batch_size() as u32,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let plan = self
|
||||
.table
|
||||
.create_plan(&AnyQuery::Query(query), QueryExecutionOptions::default())
|
||||
.create_plan(&AnyQuery::Query(query), options)
|
||||
.map_err(|err| DataFusionError::External(err.into()))
|
||||
.await?;
|
||||
Ok(Arc::new(MetadataEraserExec::new(plan)))
|
||||
@@ -208,11 +214,14 @@ pub mod tests {
|
||||
RecordBatchReader, StringArray, UInt32Array,
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use datafusion::{datasource::provider_as_source, prelude::SessionContext};
|
||||
use datafusion::{
|
||||
datasource::provider_as_source,
|
||||
prelude::{SessionConfig, SessionContext},
|
||||
};
|
||||
use datafusion_catalog::TableProvider;
|
||||
use datafusion_execution::SendableRecordBatchStream;
|
||||
use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
|
||||
use futures::TryStreamExt;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
@@ -332,7 +341,14 @@ pub mod tests {
|
||||
}
|
||||
|
||||
async fn plan_to_stream(plan: LogicalPlan) -> SendableRecordBatchStream {
|
||||
SessionContext::new()
|
||||
Self::plan_to_stream_with_config(plan, SessionConfig::default()).await
|
||||
}
|
||||
|
||||
async fn plan_to_stream_with_config(
|
||||
plan: LogicalPlan,
|
||||
config: SessionConfig,
|
||||
) -> SendableRecordBatchStream {
|
||||
SessionContext::new_with_config(config)
|
||||
.execute_logical_plan(plan)
|
||||
.await
|
||||
.unwrap()
|
||||
@@ -382,6 +398,30 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_size() {
|
||||
let fixture = TestFixture::new().await;
|
||||
|
||||
let plan = LogicalPlanBuilder::scan("foo", provider_as_source(fixture.adapter2), None)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = SessionConfig::default().with_batch_size(100);
|
||||
|
||||
let stream = TestFixture::plan_to_stream_with_config(plan.clone(), config).await;
|
||||
|
||||
let batch_count = stream.count().await;
|
||||
assert_eq!(batch_count, 10);
|
||||
|
||||
let config = SessionConfig::default().with_batch_size(250);
|
||||
|
||||
let stream = TestFixture::plan_to_stream_with_config(plan, config).await;
|
||||
|
||||
let batch_count = stream.count().await;
|
||||
assert_eq!(batch_count, 4);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metadata_erased() {
|
||||
let fixture = TestFixture::new().await;
|
||||
|
||||
Reference in New Issue
Block a user