mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
23 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e4c52b8a6 | ||
|
|
c8bebf4776 | ||
|
|
c14ad91df0 | ||
|
|
ad48242ffb | ||
|
|
1a9a392e20 | ||
|
|
b489edc576 | ||
|
|
8708fde3ef | ||
|
|
cc7e54298b | ||
|
|
d1e8a97a2a | ||
|
|
01dadb0862 | ||
|
|
0724d41c4b | ||
|
|
cbb56e25ab | ||
|
|
78de8f5782 | ||
|
|
a6544c2a31 | ||
|
|
39ed70896a | ||
|
|
ae672df1b7 | ||
|
|
15c3f42387 | ||
|
|
f65d85efcc | ||
|
|
6b5c046c3b | ||
|
|
d00f4e51d0 | ||
|
|
fbc44d4243 | ||
|
|
b53eee42ce | ||
|
|
7e0d6088ca |
12
.bumpversion.cfg
Normal file
12
.bumpversion.cfg
Normal file
@@ -0,0 +1,12 @@
|
||||
[bumpversion]
|
||||
current_version = 0.1.9
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
tag_name = v{new_version}
|
||||
|
||||
[bumpversion:file:node/package.json]
|
||||
|
||||
[bumpversion:file:rust/ffi/node/Cargo.toml]
|
||||
|
||||
[bumpversion:file:rust/vectordb/Cargo.toml]
|
||||
29
.github/workflows/cargo-publish.yml
vendored
Normal file
29
.github/workflows/cargo-publish.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Cargo Publish
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [ published ]
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 30
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Publish the package
|
||||
run: |
|
||||
cargo publish -p vectordb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
55
.github/workflows/make-release-commit.yml
vendored
Normal file
55
.github/workflows/make-release-commit.yml
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
name: Create release commit
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: 'Dry run (create the local commit/tags but do not push it)'
|
||||
required: true
|
||||
default: "false"
|
||||
type: choice
|
||||
options:
|
||||
- "true"
|
||||
- "false"
|
||||
part:
|
||||
description: 'What kind of release is this?'
|
||||
required: true
|
||||
default: 'patch'
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out main
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: main
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set git configs for bumpversion
|
||||
shell: bash
|
||||
run: |
|
||||
git config user.name 'Lance Release'
|
||||
git config user.email 'lance-dev@lancedb.com'
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Bump version, create tag and commit
|
||||
run: |
|
||||
pip install bump2version
|
||||
bumpversion --verbose ${{ inputs.part }}
|
||||
- name: Push new version and tag
|
||||
if: ${{ inputs.dry_run }} == "false"
|
||||
uses: ad-m/github-push-action@master
|
||||
with:
|
||||
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
||||
branch: main
|
||||
tags: true
|
||||
|
||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -3,12 +3,12 @@ name: PyPI Publish
|
||||
on:
|
||||
release:
|
||||
types: [ published ]
|
||||
tags:
|
||||
- 'python-v*' # Push events that matches the python-make-release action
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the python-make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
4
.github/workflows/python.yml
vendored
4
.github/workflows/python.yml
vendored
@@ -32,9 +32,11 @@ jobs:
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
pip install pytest pytest-mock black
|
||||
pip install pytest pytest-mock black isort
|
||||
- name: Black
|
||||
run: black --check --diff --no-color --quiet .
|
||||
- name: isort
|
||||
run: isort --check --diff --quiet .
|
||||
- name: Run tests
|
||||
run: pytest -x -v --durations=30 tests
|
||||
- name: doctest
|
||||
|
||||
67
.github/workflows/rust.yml
vendored
Normal file
67
.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
timeout-minutes: 30
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: rust
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
macos:
|
||||
runs-on: macos-12
|
||||
timeout-minutes: 30
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: rust
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: CPU features
|
||||
run: sysctl -a | grep cpu
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: brew install protobuf
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
242
Cargo.lock
generated
242
Cargo.lock
generated
@@ -2,12 +2,6 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "accelerate-src"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "415ed64958754dbe991900f3940677e6a7eefb4d7367afd70d642677b0c7d19d"
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
@@ -68,9 +62,9 @@ checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1aea9fcb25bbb70f7f922f95b99ca29c1013dab47f6df61a6f24861842dd7f2e"
|
||||
checksum = "6619cab21a0cdd8c9b9f1d9e09bfaa9b1974e5ef809a6566aef0b998caf38ace"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-arith",
|
||||
@@ -90,9 +84,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-arith"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d967b42f7b12c91fd78acd396b20c2973b184c8866846674abbb00c963e93ab"
|
||||
checksum = "e0dc95485623a76e00929bda8caa40c1f838190952365c4f43a7b9ae86d03e94"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -105,9 +99,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-array"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3190f208ee7aa0f3596fa0098d42911dec5e123ca88c002a08b24877ad14c71e"
|
||||
checksum = "3267847f53d3042473cfd2c769afd8d74a6d7d201fc3a34f5cb84c0282ef47a7"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-buffer",
|
||||
@@ -122,9 +116,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d33c733c5b6c44a0fc526f29c09546e04eb56772a7a21e48e602f368be381f6"
|
||||
checksum = "c5f66553e66e120ac4b21570368ee9ebf35ff3f5399f872b0667699e145678f5"
|
||||
dependencies = [
|
||||
"half",
|
||||
"num",
|
||||
@@ -132,9 +126,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-cast"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abd349520b6a1ed4924ae2afc9d23330a3044319e4ec3d5b124c09e4d440ae87"
|
||||
checksum = "65e6f3579dbf0d97c683d451b2550062b0f0e62a3169bf74238b5f59f44ad6d8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -149,9 +143,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-csv"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c80af3c3e290a2a7e1cc518f1471dff331878cb4af9a5b088bf030b89debf649"
|
||||
checksum = "373579c4c1a8f5307d3125b7a89c700fcf8caf85821c77eb4baab3855ae0aba5"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -168,9 +162,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-data"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1c8361947aaa96d331da9df3f7a08bdd8ab805a449994c97f5c4d24c4b7e2cf"
|
||||
checksum = "61bc8df9912cca6642665fdf989d6fa0de2570f18a7f709bcf59d29de96d2097"
|
||||
dependencies = [
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
@@ -180,9 +174,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ipc"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a46ee000b9fbd1e8db6e8b26acb8c760838512b39d8c9f9d73892cb55351d50"
|
||||
checksum = "0105dcf5f91daa7182d87b713ee0b32b3bfc88e0c48e7dc3e9d6f1277a07d1ae"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -190,13 +184,14 @@ dependencies = [
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"flatbuffers",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-json"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bf2366607be867ced681ad7f272371a5cf1fc2941328eef7b4fee14565166fb"
|
||||
checksum = "e73134fb5b5ec8770f8cbb214c2c487b2d350081e403ca4eeeb6f8f5e19846ac"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -214,9 +209,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ord"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "304069901c867200e21ec868ae7521165875470ef2f1f6d58f979a443d63997e"
|
||||
checksum = "89f25bc66e18d4c2aa1fe2f9bb03e2269da60e636213210385ae41a107f9965a"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -229,9 +224,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-row"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d57fe8ceef3392fdd493269d8a2d589de17bafce151aacbffbddac7a57f441a"
|
||||
checksum = "1095ff85ea4f5ff02d17b30b089de31b51a50be01c6b674f0a0509ab771232f1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array",
|
||||
@@ -244,15 +239,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-schema"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a16b88a93ac8350f0200b1cd336a1f887315925b8dd7aa145a37b8bdbd8497a4"
|
||||
checksum = "25187bbef474151a2e4ddec67b9e34bda5cbfba292dc571392fa3a1f71ff5a82"
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98e8a4d6ca37d5212439b24caad4d80743fcbb706706200dd174bb98e68fe9d8"
|
||||
checksum = "fd0d4ee884aec3aa05e41478e3cd312bf609de9babb5d187a43fb45931da4da4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -263,9 +258,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow-string"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbb594efa397eb6a546f42b1f8df3d242ea84dbfda5232e06035dc2b2e2c8459"
|
||||
checksum = "d6d71c3ffe4c07e66ce8fdc6aed5b00e0e60c5144911879b10546f5b72d8fa1c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -273,7 +268,7 @@ dependencies = [
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"regex",
|
||||
"regex-syntax 0.6.29",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -654,6 +649,12 @@ version = "3.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
@@ -676,26 +677,6 @@ dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cblas"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3de46dff748ed7e891bc46faae117f48d2a7911041c6630aed3c61a3fe12326f"
|
||||
dependencies = [
|
||||
"cblas-sys",
|
||||
"libc",
|
||||
"num-complex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cblas-sys"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
@@ -922,9 +903,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8a7d4b334f4512ff2fdbce87f511f570ae895af1ac7c729e77c12583253b22a"
|
||||
checksum = "9992c267436551d40b52d65289b144712e7b0ebdc62c8c859fd1574e5f73efbb"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
@@ -949,7 +930,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"object_store 0.5.6",
|
||||
"parking_lot",
|
||||
"parquet",
|
||||
"percent-encoding",
|
||||
@@ -967,31 +948,31 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80abfcb1dbc6390f952f21de9069e6177ad6318fcae5fbceabb50666d96533dd"
|
||||
checksum = "c3be97f7a7c720cdbb71e9eeabf814fa6ad8102b9022390f6cac74d3b4af6392"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"chrono",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"object_store 0.5.6",
|
||||
"parquet",
|
||||
"sqlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-execution"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df2524f1b4b58319895b112809d2a59e54fa662d0e46330a455f22882c2cb7b9"
|
||||
checksum = "c77c4b14b809b0e4c5bb101b6834504f06cdbb0d3c643400c61d0d844b33264e"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"hashbrown 0.13.2",
|
||||
"log",
|
||||
"object_store",
|
||||
"object_store 0.5.6",
|
||||
"parking_lot",
|
||||
"rand",
|
||||
"tempfile",
|
||||
@@ -1000,21 +981,24 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af8040b7a75b04685f4db0a1b11ffa93cd163c1bc13751df3f5cf76baabaf5a1"
|
||||
checksum = "e6ec7409bd45cf4fae6395d7d1024c8a97e543cadc88363e405d2aad5330e5e7"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"lazy_static",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74ceae25accc0f640a4238283f55f3a9fd181d55398703a4330fb2c46261e6a2"
|
||||
checksum = "64b537c93f87989c212db92a448a0f5eb4f0995e27199bb7687ae94f8b64a7a8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -1025,14 +1009,14 @@ dependencies = [
|
||||
"hashbrown 0.13.2",
|
||||
"itertools",
|
||||
"log",
|
||||
"regex-syntax 0.6.29",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df4cf228b312f2758cb78e93fe3d2dc602345028efdf7cfa5b338cb370d0a347"
|
||||
checksum = "f60ee3f53340fdef36ee54d9e12d446ae2718b1d0196ac581f791d34808ec876"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
@@ -1058,9 +1042,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-row"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b52b486fb3d81bb132e400304be01af5aba0ad6737e3518045bb98944991fe32"
|
||||
checksum = "d58fc64058aa3bcb00077a0d19474a0d584d31dec8c7ac3406868f485f659af9"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
@@ -1070,9 +1054,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "23.0.0"
|
||||
version = "26.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773e985c182e41cfd68f7a7b483ab6bfb68beaac241c348cd4b1bf9f9d61b762"
|
||||
checksum = "1531f0314151a34bf6c0a83c7261525688b7c729876f53e7896b8f4ca8f57d07"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1483,6 +1467,12 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.26"
|
||||
@@ -1646,18 +1636,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "0.4.17"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86dda8185bd1ffae7b910c1f68035af23be9b717c52e9cc4de176cd30b47f772"
|
||||
checksum = "84dfe2a2af3e7b079a4743e303617c6ac19f43d212b7d6def8873305266f2bcd"
|
||||
dependencies = [
|
||||
"accelerate-src",
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-recursion",
|
||||
@@ -1666,15 +1657,15 @@ dependencies = [
|
||||
"aws-credential-types",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"cblas",
|
||||
"chrono",
|
||||
"dashmap",
|
||||
"datafusion",
|
||||
"futures",
|
||||
"lapack",
|
||||
"log",
|
||||
"lru_time_cache",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"object_store 0.6.1",
|
||||
"openblas-src",
|
||||
"ordered-float 3.7.0",
|
||||
"path-absolutize",
|
||||
@@ -1684,6 +1675,7 @@ dependencies = [
|
||||
"prost-types",
|
||||
"rand",
|
||||
"reqwest",
|
||||
"roaring",
|
||||
"shellexpand",
|
||||
"snafu",
|
||||
"sqlparser-lance",
|
||||
@@ -1693,26 +1685,6 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lapack"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad676a6b4df7e76a9fd80a0c50c619a3948d6105b62a0ab135f064d99c51d207"
|
||||
dependencies = [
|
||||
"lapack-sys",
|
||||
"libc",
|
||||
"num-complex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lapack-sys"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "447f56c85fb410a7a3d36701b2153c1018b1d2b908c5fbaf01c1b04fac33bcbe"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
@@ -2057,13 +2029,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"futures",
|
||||
"itertools",
|
||||
"parking_lot",
|
||||
"percent-encoding",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object_store"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.21.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"futures",
|
||||
"humantime",
|
||||
"hyper",
|
||||
"itertools",
|
||||
"parking_lot",
|
||||
"percent-encoding",
|
||||
@@ -2212,9 +2203,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "37.0.0"
|
||||
version = "40.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5022d98333271f4ca3e87bab760498e61726bf5a6ca919123c80517e20ded29"
|
||||
checksum = "d6a656fcc17e641657c955742c689732684e096f790ff30865d9f8dcc39f7c4a"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array",
|
||||
@@ -2234,6 +2225,7 @@ dependencies = [
|
||||
"lz4",
|
||||
"num",
|
||||
"num-bigint",
|
||||
"object_store 0.5.6",
|
||||
"paste",
|
||||
"seq-macro",
|
||||
"snap",
|
||||
@@ -2539,15 +2531,9 @@ checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax 0.7.1",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.1"
|
||||
@@ -2598,6 +2584,12 @@ dependencies = [
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "retain_mut"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.16.20"
|
||||
@@ -2613,6 +2605,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef0fb5e826a8bde011ecae6a8539dd333884335c57ff0f003fbe27c25bbe8f71"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"retain_mut",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
@@ -2901,9 +2904,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.33.0"
|
||||
version = "0.34.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
|
||||
checksum = "37d3706eefb17039056234df6b566b0014f303f867f2656108334a55b8096f59"
|
||||
dependencies = [
|
||||
"log",
|
||||
"sqlparser_derive",
|
||||
@@ -2940,6 +2943,9 @@ name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
@@ -3358,13 +3364,13 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "vectordb"
|
||||
version = "0.0.1"
|
||||
version = "0.1.8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"lance",
|
||||
"object_store",
|
||||
"object_store 0.5.6",
|
||||
"rand",
|
||||
"snafu",
|
||||
"tempfile",
|
||||
@@ -3373,7 +3379,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vectordb-node"
|
||||
version = "0.1.0"
|
||||
version = "0.1.8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
|
||||
@@ -6,11 +6,13 @@ docs_dir: src
|
||||
theme:
|
||||
name: "material"
|
||||
logo: assets/logo.png
|
||||
favicon: assets/logo.png
|
||||
features:
|
||||
- content.code.copy
|
||||
- content.tabs.link
|
||||
icon:
|
||||
repo: fontawesome/brands/github
|
||||
custom_dir: overrides
|
||||
|
||||
plugins:
|
||||
- search
|
||||
|
||||
176
docs/overrides/partials/header.html
Normal file
176
docs/overrides/partials/header.html
Normal file
@@ -0,0 +1,176 @@
|
||||
<!--
|
||||
Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
-->
|
||||
|
||||
{% set class = "md-header" %}
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||
{% elif "navigation.tabs" not in features %}
|
||||
{% set class = class ~ " md-header--shadow" %}
|
||||
{% endif %}
|
||||
|
||||
<!-- Header -->
|
||||
<header class="{{ class }}" data-md-component="header">
|
||||
<nav
|
||||
class="md-header__inner md-grid"
|
||||
aria-label="{{ lang.t('header') }}"
|
||||
>
|
||||
|
||||
<!-- Link to home -->
|
||||
<a
|
||||
href="{{ config.extra.homepage | d(nav.homepage.url, true) | url }}"
|
||||
title="{{ config.site_name | e }}"
|
||||
class="md-header__button md-logo"
|
||||
aria-label="{{ config.site_name }}"
|
||||
data-md-component="logo"
|
||||
>
|
||||
{% include "partials/logo.html" %}
|
||||
</a>
|
||||
|
||||
<!-- Button to open drawer -->
|
||||
<label class="md-header__button md-icon" for="__drawer">
|
||||
{% include ".icons/material/menu" ~ ".svg" %}
|
||||
</label>
|
||||
|
||||
<!-- Header title -->
|
||||
<div class="md-header__title" style="width: auto !important;" data-md-component="header-title">
|
||||
<div class="md-header__ellipsis">
|
||||
<div class="md-header__topic">
|
||||
<span class="md-ellipsis">
|
||||
{{ config.site_name }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="md-header__topic" data-md-component="header-topic">
|
||||
<span class="md-ellipsis">
|
||||
{% if page.meta and page.meta.title %}
|
||||
{{ page.meta.title }}
|
||||
{% else %}
|
||||
{{ page.title }}
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Color palette -->
|
||||
{% if config.theme.palette %}
|
||||
{% if not config.theme.palette is mapping %}
|
||||
<form class="md-header__option" data-md-component="palette">
|
||||
{% for option in config.theme.palette %}
|
||||
{% set scheme = option.scheme | d("default", true) %}
|
||||
{% set primary = option.primary | d("indigo", true) %}
|
||||
{% set accent = option.accent | d("indigo", true) %}
|
||||
<input
|
||||
class="md-option"
|
||||
data-md-color-media="{{ option.media }}"
|
||||
data-md-color-scheme="{{ scheme | replace(' ', '-') }}"
|
||||
data-md-color-primary="{{ primary | replace(' ', '-') }}"
|
||||
data-md-color-accent="{{ accent | replace(' ', '-') }}"
|
||||
{% if option.toggle %}
|
||||
aria-label="{{ option.toggle.name }}"
|
||||
{% else %}
|
||||
aria-hidden="true"
|
||||
{% endif %}
|
||||
type="radio"
|
||||
name="__palette"
|
||||
id="__palette_{{ loop.index }}"
|
||||
/>
|
||||
{% if option.toggle %}
|
||||
<label
|
||||
class="md-header__button md-icon"
|
||||
title="{{ option.toggle.name }}"
|
||||
for="__palette_{{ loop.index0 or loop.length }}"
|
||||
hidden
|
||||
>
|
||||
{% include ".icons/" ~ option.toggle.icon ~ ".svg" %}
|
||||
</label>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</form>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
<!-- Site language selector -->
|
||||
{% if config.extra.alternate %}
|
||||
<div class="md-header__option">
|
||||
<div class="md-select">
|
||||
{% set icon = config.theme.icon.alternate or "material/translate" %}
|
||||
<button
|
||||
class="md-header__button md-icon"
|
||||
aria-label="{{ lang.t('select.language') }}"
|
||||
>
|
||||
{% include ".icons/" ~ icon ~ ".svg" %}
|
||||
</button>
|
||||
<div class="md-select__inner">
|
||||
<ul class="md-select__list">
|
||||
{% for alt in config.extra.alternate %}
|
||||
<li class="md-select__item">
|
||||
<a
|
||||
href="{{ alt.link | url }}"
|
||||
hreflang="{{ alt.lang }}"
|
||||
class="md-select__link"
|
||||
>
|
||||
{{ alt.name }}
|
||||
</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Button to open search modal -->
|
||||
{% if "material/search" in config.plugins %}
|
||||
<label class="md-header__button md-icon" for="__search">
|
||||
{% include ".icons/material/magnify.svg" %}
|
||||
</label>
|
||||
|
||||
<!-- Search interface -->
|
||||
{% include "partials/search.html" %}
|
||||
{% endif %}
|
||||
|
||||
<div style="margin-left: 10px; margin-right: 5px;">
|
||||
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<div style="margin-left: 5px; margin-right: 5px;">
|
||||
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<!-- Repository information -->
|
||||
{% if config.repo_url %}
|
||||
<div class="md-header__source" style="margin-left: -5px !important;">
|
||||
{% include "partials/source.html" %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</nav>
|
||||
|
||||
<!-- Navigation tabs (sticky) -->
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% if "navigation.tabs" in features %}
|
||||
{% include "partials/tabs.html" %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</header>
|
||||
@@ -67,7 +67,7 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
|
||||
Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
||||
A higher number makes search more accurate but also slower. If you find the recall is less than idea, try refine_factor=10 to start.<br/>
|
||||
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
||||
e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
|
||||
Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
import sys
|
||||
from modal import Secret, Stub, Image, web_endpoint
|
||||
import lancedb
|
||||
import re
|
||||
import pickle
|
||||
import requests
|
||||
import re
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.document_loaders import UnstructuredHTMLLoader
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain.vectorstores import LanceDB
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import RetrievalQA
|
||||
from modal import Image, Secret, Stub, web_endpoint
|
||||
|
||||
import lancedb
|
||||
|
||||
lancedb_image = Image.debian_slim().pip_install(
|
||||
"lancedb", "langchain", "openai", "pandas", "tiktoken", "unstructured", "tabulate"
|
||||
|
||||
@@ -14,7 +14,7 @@ The key features of LanceDB include:
|
||||
|
||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||
|
||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||
|
||||
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ data = pd.DataFrame({
|
||||
table = db.create_table("pd_table", data=data)
|
||||
```
|
||||
|
||||
You will find detailed instructions of creating dataset and index in [Basic Operations](basic.md) and [Indexing](indexing.md)
|
||||
You will find detailed instructions of creating dataset and index in [Basic Operations](basic.md) and [Indexing](ann_indexes.md)
|
||||
sections.
|
||||
|
||||
|
||||
|
||||
@@ -21,12 +21,13 @@ from argparse import ArgumentParser
|
||||
from multiprocessing import Pool
|
||||
|
||||
import lance
|
||||
import lancedb
|
||||
import pyarrow as pa
|
||||
from datasets import load_dataset
|
||||
from PIL import Image
|
||||
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast
|
||||
|
||||
import lancedb
|
||||
|
||||
MODEL_ID = "openai/clip-vit-base-patch32"
|
||||
|
||||
device = "cuda"
|
||||
|
||||
133
node/package-lock.json
generated
133
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.8",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.8",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^12.0.0",
|
||||
@@ -23,7 +23,7 @@
|
||||
"chai": "^4.3.7",
|
||||
"eslint": "^8.39.0",
|
||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-import": "^2.26.0",
|
||||
"eslint-plugin-n": "^15.7.0",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"mocha": "^10.2.0",
|
||||
@@ -787,24 +787,6 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/array.prototype.flatmap": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"call-bind": "^1.0.2",
|
||||
"define-properties": "^1.1.4",
|
||||
"es-abstract": "^1.20.4",
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/assertion-error": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||
@@ -1633,25 +1615,23 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import": {
|
||||
"version": "2.27.5",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
||||
"version": "2.26.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"array-includes": "^3.1.6",
|
||||
"array.prototype.flat": "^1.3.1",
|
||||
"array.prototype.flatmap": "^1.3.1",
|
||||
"debug": "^3.2.7",
|
||||
"array-includes": "^3.1.4",
|
||||
"array.prototype.flat": "^1.2.5",
|
||||
"debug": "^2.6.9",
|
||||
"doctrine": "^2.1.0",
|
||||
"eslint-import-resolver-node": "^0.3.7",
|
||||
"eslint-module-utils": "^2.7.4",
|
||||
"eslint-import-resolver-node": "^0.3.6",
|
||||
"eslint-module-utils": "^2.7.3",
|
||||
"has": "^1.0.3",
|
||||
"is-core-module": "^2.11.0",
|
||||
"is-core-module": "^2.8.1",
|
||||
"is-glob": "^4.0.3",
|
||||
"minimatch": "^3.1.2",
|
||||
"object.values": "^1.1.6",
|
||||
"resolve": "^1.22.1",
|
||||
"semver": "^6.3.0",
|
||||
"object.values": "^1.1.5",
|
||||
"resolve": "^1.22.0",
|
||||
"tsconfig-paths": "^3.14.1"
|
||||
},
|
||||
"engines": {
|
||||
@@ -1662,12 +1642,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/debug": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"ms": "^2.1.1"
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/doctrine": {
|
||||
@@ -1682,14 +1662,11 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/semver": {
|
||||
"version": "6.3.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"semver": "bin/semver.js"
|
||||
}
|
||||
"node_modules/eslint-plugin-import/node_modules/ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/eslint-plugin-n": {
|
||||
"version": "15.7.0",
|
||||
@@ -3619,9 +3596,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.5.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
||||
"version": "7.5.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"lru-cache": "^6.0.0"
|
||||
@@ -5038,18 +5015,6 @@
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"array.prototype.flatmap": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"call-bind": "^1.0.2",
|
||||
"define-properties": "^1.1.4",
|
||||
"es-abstract": "^1.20.4",
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"assertion-error": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||
@@ -5707,35 +5672,33 @@
|
||||
}
|
||||
},
|
||||
"eslint-plugin-import": {
|
||||
"version": "2.27.5",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
||||
"version": "2.26.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"array-includes": "^3.1.6",
|
||||
"array.prototype.flat": "^1.3.1",
|
||||
"array.prototype.flatmap": "^1.3.1",
|
||||
"debug": "^3.2.7",
|
||||
"array-includes": "^3.1.4",
|
||||
"array.prototype.flat": "^1.2.5",
|
||||
"debug": "^2.6.9",
|
||||
"doctrine": "^2.1.0",
|
||||
"eslint-import-resolver-node": "^0.3.7",
|
||||
"eslint-module-utils": "^2.7.4",
|
||||
"eslint-import-resolver-node": "^0.3.6",
|
||||
"eslint-module-utils": "^2.7.3",
|
||||
"has": "^1.0.3",
|
||||
"is-core-module": "^2.11.0",
|
||||
"is-core-module": "^2.8.1",
|
||||
"is-glob": "^4.0.3",
|
||||
"minimatch": "^3.1.2",
|
||||
"object.values": "^1.1.6",
|
||||
"resolve": "^1.22.1",
|
||||
"semver": "^6.3.0",
|
||||
"object.values": "^1.1.5",
|
||||
"resolve": "^1.22.0",
|
||||
"tsconfig-paths": "^3.14.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"debug": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"ms": "^2.1.1"
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"doctrine": {
|
||||
@@ -5747,10 +5710,10 @@
|
||||
"esutils": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"semver": {
|
||||
"version": "6.3.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
||||
"ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
@@ -7078,9 +7041,9 @@
|
||||
}
|
||||
},
|
||||
"semver": {
|
||||
"version": "7.5.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
||||
"version": "7.5.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"lru-cache": "^6.0.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.9",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -35,7 +35,7 @@
|
||||
"chai": "^4.3.7",
|
||||
"eslint": "^8.39.0",
|
||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-import": "^2.26.0",
|
||||
"eslint-plugin-n": "^15.7.0",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"mocha": "^10.2.0",
|
||||
|
||||
@@ -22,7 +22,7 @@ import { fromRecordsToBuffer } from './arrow'
|
||||
import type { EmbeddingFunction } from './embedding/embedding_function'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
|
||||
|
||||
export type { EmbeddingFunction }
|
||||
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
||||
@@ -111,6 +111,14 @@ export class Connection {
|
||||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
||||
return await this.openTable(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param name The name of the table to drop.
|
||||
*/
|
||||
async dropTable (name: string): Promise<void> {
|
||||
await databaseDropTable.call(this._db, name)
|
||||
}
|
||||
}
|
||||
|
||||
export class Table<T = number[]> {
|
||||
@@ -178,6 +186,22 @@ export class Table<T = number[]> {
|
||||
async create_index (indexParams: VectorIndexParams): Promise<any> {
|
||||
return await this.createIndex(indexParams)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of rows in this table.
|
||||
*/
|
||||
async countRows (): Promise<number> {
|
||||
return tableCountRows.call(this._tbl)
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete rows from this table.
|
||||
*
|
||||
* @param filter The filter to be applied to this table.
|
||||
*/
|
||||
async delete (filter: string): Promise<void> {
|
||||
return tableDelete.call(this._tbl, filter)
|
||||
}
|
||||
}
|
||||
|
||||
interface IvfPQIndexConfig {
|
||||
@@ -293,6 +317,8 @@ export class Query<T = number[]> {
|
||||
return this
|
||||
}
|
||||
|
||||
where = this.filter
|
||||
|
||||
/** Return only the specified columns.
|
||||
*
|
||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
||||
|
||||
@@ -64,13 +64,20 @@ describe('LanceDB client', function () {
|
||||
assert.equal(results[0].id, 1)
|
||||
})
|
||||
|
||||
it('uses a filter', async function () {
|
||||
it('uses a filter / where clause', async function () {
|
||||
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
|
||||
const assertResults = (results: Array<Record<string, unknown>>) => {
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 2)
|
||||
}
|
||||
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 2)
|
||||
let results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
||||
assertResults(results)
|
||||
results = await table.search([0.1, 0.1]).where('id == 2').execute()
|
||||
assertResults(results)
|
||||
})
|
||||
|
||||
it('select only a subset of columns', async function () {
|
||||
@@ -103,9 +110,7 @@ describe('LanceDB client', function () {
|
||||
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
|
||||
const table = await con.createTable(tableName, data)
|
||||
assert.equal(table.name, tableName)
|
||||
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
})
|
||||
|
||||
it('appends records to an existing table ', async function () {
|
||||
@@ -118,16 +123,14 @@ describe('LanceDB client', function () {
|
||||
]
|
||||
|
||||
const table = await con.createTable('vectors', data)
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
const dataAdd = [
|
||||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
|
||||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
|
||||
]
|
||||
await table.add(dataAdd)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 4)
|
||||
assert.equal(await table.countRows(), 4)
|
||||
})
|
||||
|
||||
it('overwrite all records in a table', async function () {
|
||||
@@ -135,16 +138,25 @@ describe('LanceDB client', function () {
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
const dataOver = [
|
||||
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
|
||||
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
|
||||
]
|
||||
await table.overwrite(dataOver)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
})
|
||||
|
||||
it('can delete records from a table', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const table = await con.openTable('vectors')
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
await table.delete('price = 10')
|
||||
assert.equal(await table.countRows(), 1)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -223,3 +235,22 @@ async function createTestDB (numDimensions: number = 2, numRows: number = 2): Pr
|
||||
await con.createTable('vectors', data)
|
||||
return dir
|
||||
}
|
||||
|
||||
describe('Drop table', function () {
|
||||
it('drop a table', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ price: 10, name: 'foo', vector: [1, 2, 3] },
|
||||
{ price: 50, name: 'bar', vector: [4, 5, 6] }
|
||||
]
|
||||
await con.createTable('t1', data)
|
||||
await con.createTable('t2', data)
|
||||
|
||||
assert.deepEqual(await con.tableNames(), ['t1', 't2'])
|
||||
|
||||
await con.dropTable('t1')
|
||||
assert.deepEqual(await con.tableNames(), ['t2'])
|
||||
})
|
||||
})
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
from .exceptions import MissingValueError, MissingColumnError
|
||||
|
||||
from .exceptions import MissingColumnError, MissingValueError
|
||||
|
||||
|
||||
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
@@ -42,34 +43,38 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
paragraphs, messages, etc.
|
||||
|
||||
>>> contextualize(data).window(3).stride(1).text_col('token').to_df()
|
||||
token document_id
|
||||
0 The quick brown 1
|
||||
1 quick brown fox 1
|
||||
2 brown fox jumped 1
|
||||
3 fox jumped over 1
|
||||
4 jumped over the 1
|
||||
5 over the lazy 1
|
||||
6 the lazy dog 1
|
||||
7 lazy dog I 1
|
||||
8 dog I love 1
|
||||
>>> contextualize(data).window(7).stride(1).text_col('token').to_df()
|
||||
token document_id
|
||||
0 The quick brown 1
|
||||
1 quick brown fox 1
|
||||
2 brown fox jumped 1
|
||||
3 fox jumped over 1
|
||||
4 jumped over the 1
|
||||
5 over the lazy 1
|
||||
6 the lazy dog 1
|
||||
7 lazy dog I 1
|
||||
8 dog I love 1
|
||||
9 I love sandwiches 2
|
||||
10 love sandwiches 2
|
||||
>>> contextualize(data).window(7).stride(1).min_window_size(7).text_col('token').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over the 1
|
||||
1 quick brown fox jumped over the lazy 1
|
||||
2 brown fox jumped over the lazy dog 1
|
||||
3 fox jumped over the lazy dog I 1
|
||||
4 jumped over the lazy dog I love 1
|
||||
|
||||
5 over the lazy dog I love sandwiches 1
|
||||
|
||||
``stride`` determines how many rows to skip between each window start. This can
|
||||
be used to reduce the total number of windows generated.
|
||||
|
||||
>>> contextualize(data).window(4).stride(2).text_col('token').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox 1
|
||||
2 brown fox jumped over 1
|
||||
4 jumped over the lazy 1
|
||||
6 the lazy dog I 1
|
||||
token document_id
|
||||
0 The quick brown fox 1
|
||||
2 brown fox jumped over 1
|
||||
4 jumped over the lazy 1
|
||||
6 the lazy dog I 1
|
||||
8 dog I love sandwiches 1
|
||||
10 love sandwiches 2
|
||||
|
||||
``groupby`` determines how to group the rows. For example, we would like to have
|
||||
context windows that don't cross document boundaries. In this case, we can
|
||||
@@ -80,6 +85,25 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
0 The quick brown fox 1
|
||||
2 brown fox jumped over 1
|
||||
4 jumped over the lazy 1
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
``min_window_size`` determines the minimum size of the context windows that are generated
|
||||
This can be used to trim the last few context windows which have size less than
|
||||
``min_window_size``. By default context windows of size 1 are skipped.
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
|
||||
"""
|
||||
return Contextualizer(raw_df)
|
||||
|
||||
@@ -92,6 +116,7 @@ class Contextualizer:
|
||||
self._groupby = None
|
||||
self._stride = None
|
||||
self._window = None
|
||||
self._min_window_size = 2
|
||||
self._raw_df = raw_df
|
||||
|
||||
def window(self, window: int) -> Contextualizer:
|
||||
@@ -139,6 +164,17 @@ class Contextualizer:
|
||||
self._text_col = text_col
|
||||
return self
|
||||
|
||||
def min_window_size(self, min_window_size: int) -> Contextualizer:
|
||||
"""Set the (optional) min_window_size size for the context window.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_window_size: int
|
||||
The min_window_size.
|
||||
"""
|
||||
self._min_window_size = min_window_size
|
||||
return self
|
||||
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
"""Create the context windows and return a DataFrame."""
|
||||
|
||||
@@ -159,12 +195,19 @@ class Contextualizer:
|
||||
|
||||
def process_group(grp):
|
||||
# For each group, create the text rolling window
|
||||
# with values of size >= min_window_size
|
||||
text = grp[self._text_col].values
|
||||
contexts = grp.iloc[: -self._window : self._stride, :].copy()
|
||||
contexts[self._text_col] = [
|
||||
" ".join(text[start_i : start_i + self._window])
|
||||
for start_i in range(0, len(grp) - self._window, self._stride)
|
||||
contexts = grp.iloc[:: self._stride, :].copy()
|
||||
windows = [
|
||||
" ".join(text[start_i : min(start_i + self._window, len(grp))])
|
||||
for start_i in range(0, len(grp), self._stride)
|
||||
if start_i + self._window <= len(grp)
|
||||
or len(grp) - start_i >= self._min_window_size
|
||||
]
|
||||
# if last few rows dropped
|
||||
if len(windows) < len(contexts):
|
||||
contexts = contexts.iloc[: len(windows)]
|
||||
contexts[self._text_col] = windows
|
||||
return contexts
|
||||
|
||||
if self._groupby is None:
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import os
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
import pyarrow as pa
|
||||
from pyarrow import fs
|
||||
|
||||
from .common import DATA, URI
|
||||
from .table import LanceTable
|
||||
from .util import get_uri_scheme, get_uri_location
|
||||
from .util import get_uri_location, get_uri_scheme
|
||||
|
||||
|
||||
class LanceDBConnection:
|
||||
@@ -43,7 +43,7 @@ class LanceDBConnection:
|
||||
LanceTable(my_table)
|
||||
>>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
|
||||
LanceTable(another_table)
|
||||
>>> db.table_names()
|
||||
>>> sorted(db.table_names())
|
||||
['another_table', 'my_table']
|
||||
>>> len(db)
|
||||
2
|
||||
@@ -56,7 +56,16 @@ class LanceDBConnection:
|
||||
"""
|
||||
|
||||
def __init__(self, uri: URI):
|
||||
is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file"
|
||||
if not isinstance(uri, Path):
|
||||
scheme = get_uri_scheme(uri)
|
||||
is_local = isinstance(uri, Path) or scheme == "file"
|
||||
# managed lancedb remote uses schema like lancedb+[http|grpc|...]://
|
||||
self._is_managed_remote = not is_local and scheme.startswith("lancedb")
|
||||
if self._is_managed_remote:
|
||||
if len(scheme.split("+")) != 2:
|
||||
raise ValueError(
|
||||
f"Invalid LanceDB URI: {uri}, expected uri to have scheme like lancedb+<flavor>://..."
|
||||
)
|
||||
if is_local:
|
||||
if isinstance(uri, str):
|
||||
uri = Path(uri)
|
||||
@@ -64,10 +73,49 @@ class LanceDBConnection:
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
self._uri = str(uri)
|
||||
|
||||
self._entered = False
|
||||
|
||||
@property
|
||||
def uri(self) -> str:
|
||||
return self._uri
|
||||
|
||||
@functools.cached_property
|
||||
def is_managed_remote(self) -> bool:
|
||||
return self._is_managed_remote
|
||||
|
||||
@functools.cached_property
|
||||
def remote_flavor(self) -> str:
|
||||
if not self.is_managed_remote:
|
||||
raise ValueError(
|
||||
"Not a managed remote LanceDB, there should be no server flavor"
|
||||
)
|
||||
return get_uri_scheme(self.uri).split("+")[1]
|
||||
|
||||
@functools.cached_property
|
||||
def _client(self) -> "lancedb.remote.LanceDBClient":
|
||||
if not self.is_managed_remote:
|
||||
raise ValueError("Not a managed remote LanceDB, there should be no client")
|
||||
|
||||
# don't import unless we are really using remote
|
||||
from lancedb.remote.client import RestfulLanceDBClient
|
||||
|
||||
if self.remote_flavor == "http":
|
||||
return RestfulLanceDBClient(self._uri)
|
||||
|
||||
raise ValueError("Unsupported remote flavor: " + self.remote_flavor)
|
||||
|
||||
async def close(self):
|
||||
if self._entered:
|
||||
raise ValueError("Cannot re-enter the same LanceDBConnection twice")
|
||||
self._entered = True
|
||||
await self._client.close()
|
||||
|
||||
async def __aenter__(self) -> LanceDBConnection:
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
def table_names(self) -> list[str]:
|
||||
"""Get the names of all tables in the database.
|
||||
|
||||
|
||||
@@ -11,7 +11,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
|
||||
import asyncio
|
||||
from typing import Awaitable, Literal
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -168,8 +170,28 @@ class LanceQueryBuilder:
|
||||
and also the "score" column which is the distance between the query
|
||||
vector and the returned vector.
|
||||
"""
|
||||
|
||||
return self.to_arrow().to_pandas()
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""
|
||||
Execute the query and return the results as a arrow Table.
|
||||
In addition to the selected columns, LanceDB also returns a vector
|
||||
and also the "score" column which is the distance between the query
|
||||
vector and the returned vector.
|
||||
"""
|
||||
if self._table._conn.is_managed_remote:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.get_event_loop()
|
||||
result = self._table._conn._client.query(
|
||||
self._table.name, self.to_remote_query()
|
||||
)
|
||||
return loop.run_until_complete(result).to_arrow()
|
||||
|
||||
ds = self._table.to_lance()
|
||||
tbl = ds.to_table(
|
||||
return ds.to_table(
|
||||
columns=self._columns,
|
||||
filter=self._where,
|
||||
nearest={
|
||||
@@ -181,7 +203,20 @@ class LanceQueryBuilder:
|
||||
"refine_factor": self._refine_factor,
|
||||
},
|
||||
)
|
||||
return tbl.to_pandas()
|
||||
|
||||
def to_remote_query(self) -> "VectorQuery":
|
||||
# don't import unless we are connecting to remote
|
||||
from lancedb.remote.client import VectorQuery
|
||||
|
||||
return VectorQuery(
|
||||
vector=self._query.tolist(),
|
||||
filter=self._where,
|
||||
k=self._limit,
|
||||
_metric=self._metric,
|
||||
columns=self._columns,
|
||||
nprobes=self._nprobes,
|
||||
refine_factor=self._refine_factor,
|
||||
)
|
||||
|
||||
|
||||
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
61
python/lancedb/remote/__init__.py
Normal file
61
python/lancedb/remote/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
from typing import List, Optional
|
||||
|
||||
import attr
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from pydantic import BaseModel
|
||||
|
||||
__all__ = ["LanceDBClient", "VectorQuery", "VectorQueryResult"]
|
||||
|
||||
|
||||
class VectorQuery(BaseModel):
|
||||
# vector to search for
|
||||
vector: List[float]
|
||||
|
||||
# sql filter to refine the query with
|
||||
filter: Optional[str] = None
|
||||
|
||||
# top k results to return
|
||||
k: int
|
||||
|
||||
# # metrics
|
||||
_metric: str = "L2"
|
||||
|
||||
# which columns to return in the results
|
||||
columns: Optional[List[str]] = None
|
||||
|
||||
# optional query parameters for tuning the results,
|
||||
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||
nprobes: int = 10
|
||||
|
||||
refine_factor: Optional[int] = None
|
||||
|
||||
|
||||
@attr.define
|
||||
class VectorQueryResult:
|
||||
# for now the response is directly seralized into a pandas dataframe
|
||||
tbl: pa.Table
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
return self.tbl
|
||||
|
||||
|
||||
class LanceDBClient(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||
"""Query the LanceDB server for the given table and query."""
|
||||
pass
|
||||
79
python/lancedb/remote/client.py
Normal file
79
python/lancedb/remote/client.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import functools
|
||||
import urllib.parse
|
||||
|
||||
import aiohttp
|
||||
import attr
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.remote import VectorQuery, VectorQueryResult
|
||||
from lancedb.remote.errors import LanceDBClientError
|
||||
|
||||
|
||||
def _check_not_closed(f):
|
||||
@functools.wraps(f)
|
||||
def wrapped(self, *args, **kwargs):
|
||||
if self.closed:
|
||||
raise ValueError("Connection is closed")
|
||||
return f(self, *args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
@attr.define(slots=False)
|
||||
class RestfulLanceDBClient:
|
||||
url: str
|
||||
closed: bool = attr.field(default=False, init=False)
|
||||
|
||||
@functools.cached_property
|
||||
def session(self) -> aiohttp.ClientSession:
|
||||
parsed = urllib.parse.urlparse(self.url)
|
||||
scheme = parsed.scheme
|
||||
if not scheme.startswith("lancedb"):
|
||||
raise ValueError(
|
||||
f"Invalid scheme: {scheme}, must be like lancedb+<flavor>://"
|
||||
)
|
||||
flavor = scheme.split("+")[1]
|
||||
url = f"{flavor}://{parsed.hostname}:{parsed.port}"
|
||||
return aiohttp.ClientSession(url)
|
||||
|
||||
async def close(self):
|
||||
await self.session.close()
|
||||
self.closed = True
|
||||
|
||||
@_check_not_closed
|
||||
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||
async with self.session.post(
|
||||
f"/table/{table_name}/", json=query.dict(exclude_none=True)
|
||||
) as resp:
|
||||
resp: aiohttp.ClientResponse = resp
|
||||
if 400 <= resp.status < 500:
|
||||
raise LanceDBClientError(
|
||||
f"Bad Request: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
if 500 <= resp.status < 600:
|
||||
raise LanceDBClientError(
|
||||
f"Internal Server Error: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
if resp.status != 200:
|
||||
raise LanceDBClientError(
|
||||
f"Unknown Error: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
|
||||
resp_body = await resp.read()
|
||||
with pa.ipc.open_file(pa.BufferReader(resp_body)) as reader:
|
||||
tbl = reader.read_all()
|
||||
return VectorQueryResult(tbl)
|
||||
16
python/lancedb/remote/errors.py
Normal file
16
python/lancedb/remote/errors.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class LanceDBClientError(RuntimeError):
|
||||
pass
|
||||
@@ -14,7 +14,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from functools import cached_property
|
||||
from typing import List, Union
|
||||
|
||||
@@ -27,7 +26,6 @@ from lance.vector import vec_to_table
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .query import LanceFtsQueryBuilder, LanceQueryBuilder
|
||||
from .util import get_uri_scheme
|
||||
|
||||
|
||||
def _sanitize_data(data, schema):
|
||||
@@ -294,6 +292,34 @@ class LanceTable:
|
||||
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
|
||||
return tbl
|
||||
|
||||
def delete(self, where: str):
|
||||
"""Delete rows from the table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where: str
|
||||
The SQL where clause to use when deleting rows.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> import pandas as pd
|
||||
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> table = db.create_table("my_table", data)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 3 [5.0, 6.0]
|
||||
"""
|
||||
self._dataset.delete(where)
|
||||
|
||||
|
||||
def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
||||
"""Ensure that the table has the expected schema.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.1.8"
|
||||
dependencies = ["pylance>=0.4.20", "ratelimiter", "retry", "tqdm"]
|
||||
version = "0.1.9"
|
||||
dependencies = ["pylance~=0.5.0", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr"]
|
||||
description = "lancedb"
|
||||
authors = [
|
||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = [
|
||||
"pytest", "pytest-mock", "doctest"
|
||||
"pytest", "pytest-mock", "pytest-asyncio"
|
||||
]
|
||||
dev = [
|
||||
"ruff", "pre-commit", "black"
|
||||
|
||||
77
python/tests/test_context.py
Normal file
77
python/tests/test_context.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from lancedb.context import contextualize
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def raw_df() -> pd.DataFrame:
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"token": [
|
||||
"The",
|
||||
"quick",
|
||||
"brown",
|
||||
"fox",
|
||||
"jumped",
|
||||
"over",
|
||||
"the",
|
||||
"lazy",
|
||||
"dog",
|
||||
"I",
|
||||
"love",
|
||||
"sandwiches",
|
||||
],
|
||||
"document_id": [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_contextualizer(raw_df: pd.DataFrame):
|
||||
result = (
|
||||
contextualize(raw_df)
|
||||
.window(6)
|
||||
.stride(3)
|
||||
.text_col("token")
|
||||
.groupby("document_id")
|
||||
.to_df()["token"]
|
||||
.to_list()
|
||||
)
|
||||
|
||||
assert result == [
|
||||
"The quick brown fox jumped over",
|
||||
"fox jumped over the lazy dog",
|
||||
"the lazy dog",
|
||||
"I love sandwiches",
|
||||
]
|
||||
|
||||
|
||||
def test_contextualizer_with_threshold(raw_df: pd.DataFrame):
|
||||
result = (
|
||||
contextualize(raw_df)
|
||||
.window(6)
|
||||
.stride(3)
|
||||
.text_col("token")
|
||||
.groupby("document_id")
|
||||
.min_window_size(4)
|
||||
.to_df()["token"]
|
||||
.to_list()
|
||||
)
|
||||
|
||||
assert result == [
|
||||
"The quick brown fox jumped over",
|
||||
"fox jumped over the lazy dog",
|
||||
]
|
||||
27
python/tests/test_e2e_remote_db.py
Normal file
27
python/tests/test_e2e_remote_db.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from lancedb import LanceDBConnection
|
||||
|
||||
# TODO: setup integ test mark and script
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Need to set up a local server")
|
||||
def test_against_local_server():
|
||||
conn = LanceDBConnection("lancedb+http://localhost:10024")
|
||||
table = conn.open_table("sift1m_ivf1024_pq16")
|
||||
df = table.search(np.random.rand(128)).to_df()
|
||||
assert len(df) == 10
|
||||
@@ -14,6 +14,7 @@ import sys
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.embeddings import with_embeddings
|
||||
|
||||
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import lancedb.fts
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import tantivy
|
||||
|
||||
import lancedb as ldb
|
||||
import lancedb.fts
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
import lancedb
|
||||
|
||||
@@ -17,12 +17,15 @@ import pandas as pd
|
||||
import pandas.testing as tm
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.db import LanceDBConnection
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
|
||||
class MockTable:
|
||||
def __init__(self, tmp_path):
|
||||
self.uri = tmp_path
|
||||
self._conn = LanceDBConnection("/tmp/lance/")
|
||||
|
||||
def to_lance(self):
|
||||
return lance.dataset(self.uri)
|
||||
|
||||
95
python/tests/test_remote_client.py
Normal file
95
python/tests/test_remote_client.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import attr
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
|
||||
from lancedb.remote.client import RestfulLanceDBClient, VectorQuery
|
||||
|
||||
|
||||
@attr.define
|
||||
class MockLanceDBServer:
|
||||
runner: web.AppRunner = attr.field(init=False)
|
||||
site: web.TCPSite = attr.field(init=False)
|
||||
|
||||
async def query_handler(self, request: web.Request) -> web.Response:
|
||||
table_name = request.match_info["table_name"]
|
||||
assert table_name == "test_table"
|
||||
|
||||
request_json = await request.json()
|
||||
# TODO: do some matching
|
||||
|
||||
vecs = pd.Series([np.random.rand(128) for x in range(10)], name="vector")
|
||||
ids = pd.Series(range(10), name="id")
|
||||
df = pd.DataFrame([vecs, ids]).T
|
||||
|
||||
batch = pa.RecordBatch.from_pandas(
|
||||
df,
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), 128)),
|
||||
pa.field("id", pa.int64()),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
sink = pa.BufferOutputStream()
|
||||
with pa.ipc.new_file(sink, batch.schema) as writer:
|
||||
writer.write_batch(batch)
|
||||
|
||||
return web.Response(body=sink.getvalue().to_pybytes())
|
||||
|
||||
async def setup(self):
|
||||
app = web.Application()
|
||||
app.add_routes([web.post("/table/{table_name}", self.query_handler)])
|
||||
self.runner = web.AppRunner(app)
|
||||
await self.runner.setup()
|
||||
self.site = web.TCPSite(self.runner, "localhost", 8111)
|
||||
|
||||
async def start(self):
|
||||
await self.site.start()
|
||||
|
||||
async def stop(self):
|
||||
await self.runner.cleanup()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="flaky somehow, fix later")
|
||||
@pytest.mark.asyncio
|
||||
async def test_e2e_with_mock_server():
|
||||
mock_server = MockLanceDBServer()
|
||||
await mock_server.setup()
|
||||
await mock_server.start()
|
||||
|
||||
try:
|
||||
client = RestfulLanceDBClient("lancedb+http://localhost:8111")
|
||||
df = (
|
||||
await client.query(
|
||||
"test_table",
|
||||
VectorQuery(
|
||||
vector=np.random.rand(128).tolist(),
|
||||
k=10,
|
||||
_metric="L2",
|
||||
columns=["id", "vector"],
|
||||
),
|
||||
)
|
||||
).to_df()
|
||||
|
||||
assert "vector" in df.columns
|
||||
assert "id" in df.columns
|
||||
finally:
|
||||
# make sure we don't leak resources
|
||||
await mock_server.stop()
|
||||
35
python/tests/test_remote_db.py
Normal file
35
python/tests/test_remote_db.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.db import LanceDBConnection
|
||||
from lancedb.remote.client import VectorQuery, VectorQueryResult
|
||||
|
||||
|
||||
class FakeLanceDBClient:
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||
assert table_name == "test"
|
||||
t = pa.schema([]).empty_table()
|
||||
return VectorQueryResult(t)
|
||||
|
||||
|
||||
def test_remote_db():
|
||||
conn = LanceDBConnection("lancedb+http://client-will-be-injected")
|
||||
setattr(conn, "_client", FakeLanceDBClient())
|
||||
|
||||
table = conn["test"]
|
||||
table.search([1.0, 2.0]).to_df()
|
||||
@@ -11,11 +11,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import functools
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.table import LanceTable
|
||||
|
||||
|
||||
@@ -23,6 +25,10 @@ class MockDB:
|
||||
def __init__(self, uri: Path):
|
||||
self.uri = uri
|
||||
|
||||
@functools.cached_property
|
||||
def is_managed_remote(self) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path) -> MockDB:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.1.0"
|
||||
version = "0.1.9"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
@@ -10,12 +10,12 @@ exclude = ["index.node"]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow-array = "37.0"
|
||||
arrow-ipc = "37.0"
|
||||
arrow-schema = "37.0"
|
||||
arrow-array = "40.0"
|
||||
arrow-ipc = "40.0"
|
||||
arrow-schema = "40.0"
|
||||
once_cell = "1"
|
||||
futures = "0.3"
|
||||
lance = "0.4.17"
|
||||
lance = "0.5.0"
|
||||
vectordb = { path = "../../vectordb" }
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
||||
|
||||
@@ -97,6 +97,7 @@ fn get_index_params_builder(
|
||||
let ivf_params = IvfBuildParams {
|
||||
num_partitions: np,
|
||||
max_iters,
|
||||
centroids: None,
|
||||
};
|
||||
index_builder.ivf_params(ivf_params)
|
||||
});
|
||||
|
||||
@@ -122,6 +122,27 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let db = cx
|
||||
.this()
|
||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
let channel = cx.channel();
|
||||
let database = db.database.clone();
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
rt.spawn(async move {
|
||||
let result = database.drop_table(&table_name).await;
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
Ok(cx.null())
|
||||
});
|
||||
});
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let query_obj = cx.argument::<JsObject>(0)?;
|
||||
@@ -264,14 +285,56 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
fn table_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let channel = cx.channel();
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.block_on(async move {
|
||||
let num_rows_result = table.lock().unwrap().count_rows().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let num_rows = num_rows_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
Ok(cx.number(num_rows as f64))
|
||||
});
|
||||
});
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
fn table_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let channel = cx.channel();
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
|
||||
let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
delete_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
Ok(cx.undefined())
|
||||
});
|
||||
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
#[neon::main]
|
||||
fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
||||
cx.export_function("databaseNew", database_new)?;
|
||||
cx.export_function("databaseTableNames", database_table_names)?;
|
||||
cx.export_function("databaseOpenTable", database_open_table)?;
|
||||
cx.export_function("databaseDropTable", database_drop_table)?;
|
||||
cx.export_function("tableSearch", table_search)?;
|
||||
cx.export_function("tableCreate", table_create)?;
|
||||
cx.export_function("tableAdd", table_add)?;
|
||||
cx.export_function("tableCountRows", table_count_rows)?;
|
||||
cx.export_function("tableDelete", table_delete)?;
|
||||
cx.export_function(
|
||||
"tableCreateVectorIndex",
|
||||
index::vector::table_create_vector_index,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.0.1"
|
||||
version = "0.1.9"
|
||||
edition = "2021"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
@@ -9,12 +9,12 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
arrow-array = "37.0"
|
||||
arrow-data = "37.0"
|
||||
arrow-schema = "37.0"
|
||||
object_store = "0.5.6"
|
||||
arrow-array = "40.0"
|
||||
arrow-data = "40.0"
|
||||
arrow-schema = "40.0"
|
||||
object_store = "0.6.1"
|
||||
snafu = "0.7.4"
|
||||
lance = "0.4.17"
|
||||
lance = "0.5.0"
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -42,7 +42,7 @@ impl Database {
|
||||
///
|
||||
/// * A [Database] object.
|
||||
pub async fn connect(uri: &str) -> Result<Database> {
|
||||
let object_store = ObjectStore::new(uri).await?;
|
||||
let (object_store, _) = ObjectStore::from_uri(uri).await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||
}
|
||||
@@ -69,7 +69,7 @@ impl Database {
|
||||
pub async fn table_names(&self) -> Result<Vec<String>> {
|
||||
let f = self
|
||||
.object_store
|
||||
.read_dir("/")
|
||||
.read_dir(self.uri.as_str())
|
||||
.await?
|
||||
.iter()
|
||||
.map(|fname| Path::new(fname))
|
||||
@@ -109,6 +109,16 @@ impl Database {
|
||||
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
||||
Table::open(&self.uri, name).await
|
||||
}
|
||||
|
||||
/// Drop a table in the database.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `name` - The name of the table.
|
||||
pub async fn drop_table(&self, name: &str) -> Result<()> {
|
||||
let dir_name = format!("{}/{}.{}", self.uri, name, LANCE_EXTENSION);
|
||||
self.object_store.remove_dir_all(dir_name).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -146,4 +156,17 @@ mod tests {
|
||||
async fn test_connect_s3() {
|
||||
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drop_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = Database::connect(uri).await.unwrap();
|
||||
db.drop_table("table1").await.unwrap();
|
||||
|
||||
let tables = db.table_names().await.unwrap();
|
||||
assert_eq!(tables.len(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,8 @@ pub trait VectorIndexBuilder {
|
||||
fn get_column(&self) -> Option<String>;
|
||||
fn get_index_name(&self) -> Option<String>;
|
||||
fn build(&self) -> VectorIndexParams;
|
||||
|
||||
fn get_replace(&self) -> bool;
|
||||
}
|
||||
|
||||
pub struct IvfPQIndexBuilder {
|
||||
@@ -28,6 +30,7 @@ pub struct IvfPQIndexBuilder {
|
||||
metric_type: Option<MetricType>,
|
||||
ivf_params: Option<IvfBuildParams>,
|
||||
pq_params: Option<PQBuildParams>,
|
||||
replace: bool,
|
||||
}
|
||||
|
||||
impl IvfPQIndexBuilder {
|
||||
@@ -38,6 +41,7 @@ impl IvfPQIndexBuilder {
|
||||
metric_type: None,
|
||||
ivf_params: None,
|
||||
pq_params: None,
|
||||
replace: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -67,6 +71,11 @@ impl IvfPQIndexBuilder {
|
||||
self.pq_params = Some(pq_params);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn replace(&mut self, replace: bool) -> &mut IvfPQIndexBuilder {
|
||||
self.replace = replace;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||
@@ -84,6 +93,10 @@ impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||
|
||||
VectorIndexParams::with_ivf_pq_params(pq_params.metric_type, ivf_params, pq_params)
|
||||
}
|
||||
|
||||
fn get_replace(&self) -> bool {
|
||||
self.replace
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -17,3 +17,5 @@ pub mod error;
|
||||
pub mod index;
|
||||
pub mod query;
|
||||
pub mod table;
|
||||
|
||||
pub use database::Database;
|
||||
|
||||
@@ -74,9 +74,7 @@ impl Query {
|
||||
)?;
|
||||
scanner.nprobs(self.nprobes);
|
||||
scanner.use_index(self.use_index);
|
||||
self.select
|
||||
.as_ref()
|
||||
.map(|p| scanner.project(p.as_slice()));
|
||||
self.select.as_ref().map(|p| scanner.project(p.as_slice()));
|
||||
self.filter.as_ref().map(|f| scanner.filter(f));
|
||||
self.refine_factor.map(|rf| scanner.refine(rf));
|
||||
self.metric_type.map(|mt| scanner.distance_metric(mt));
|
||||
@@ -177,7 +175,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_setters_getters() {
|
||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
let ds = Dataset::write(&mut batches, ":memory:", None)
|
||||
let ds = Dataset::write(&mut batches, "memory://foo", None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -206,7 +204,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_execute() {
|
||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
let ds = Dataset::write(&mut batches, ":memory:", None)
|
||||
let ds = Dataset::write(&mut batches, "memory://foo", None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ pub const VECTOR_COLUMN_NAME: &str = "vector";
|
||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
/// A table in a LanceDB database.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Table {
|
||||
name: String,
|
||||
uri: String,
|
||||
@@ -130,6 +130,7 @@ impl Table {
|
||||
IndexType::Vector,
|
||||
index_builder.get_index_name(),
|
||||
&index_builder.build(),
|
||||
index_builder.get_replace(),
|
||||
)
|
||||
.await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
@@ -174,6 +175,27 @@ impl Table {
|
||||
pub async fn count_rows(&self) -> Result<usize> {
|
||||
Ok(self.dataset.count_rows().await?)
|
||||
}
|
||||
|
||||
/// Merge new data into this table.
|
||||
pub async fn merge(
|
||||
&mut self,
|
||||
mut batches: Box<dyn RecordBatchReader>,
|
||||
left_on: &str,
|
||||
right_on: &str,
|
||||
) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
dataset.merge(&mut batches, left_on, right_on).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete rows from the table
|
||||
pub async fn delete(&mut self, predicate: &str) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
dataset.delete(predicate).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -233,7 +255,7 @@ mod tests {
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
let schema = batches.schema().clone();
|
||||
let _ = batches.schema().clone();
|
||||
Table::create(&uri, "test", batches).await.unwrap();
|
||||
|
||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
|
||||
Reference in New Issue
Block a user