mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
47 Commits
python-v0.
...
v0.1.10-py
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97364a2514 | ||
|
|
e6c6da6104 | ||
|
|
a5eb665b7d | ||
|
|
e2325c634b | ||
|
|
507eeae9c8 | ||
|
|
bb3df62dce | ||
|
|
dc7146b2cb | ||
|
|
d701947f0b | ||
|
|
3c46d7f268 | ||
|
|
9600a38ff0 | ||
|
|
148ed82607 | ||
|
|
fc725c99f0 | ||
|
|
a6bdffd75b | ||
|
|
051c03c3c9 | ||
|
|
39479dcf8e | ||
|
|
b731a6aed9 | ||
|
|
0f58bd7af2 | ||
|
|
01abf82808 | ||
|
|
eb5bcda337 | ||
|
|
4bc676e26a | ||
|
|
c68c236f17 | ||
|
|
313e66c4c5 | ||
|
|
e850df56f1 | ||
|
|
8c5507075c | ||
|
|
0e4c52b8a6 | ||
|
|
c8bebf4776 | ||
|
|
c14ad91df0 | ||
|
|
ad48242ffb | ||
|
|
1a9a392e20 | ||
|
|
b489edc576 | ||
|
|
8708fde3ef | ||
|
|
cc7e54298b | ||
|
|
d1e8a97a2a | ||
|
|
01dadb0862 | ||
|
|
0724d41c4b | ||
|
|
cbb56e25ab | ||
|
|
78de8f5782 | ||
|
|
a6544c2a31 | ||
|
|
39ed70896a | ||
|
|
ae672df1b7 | ||
|
|
15c3f42387 | ||
|
|
f65d85efcc | ||
|
|
6b5c046c3b | ||
|
|
d00f4e51d0 | ||
|
|
fbc44d4243 | ||
|
|
b53eee42ce | ||
|
|
7e0d6088ca |
12
.bumpversion.cfg
Normal file
12
.bumpversion.cfg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[bumpversion]
|
||||||
|
current_version = 0.1.10
|
||||||
|
commit = True
|
||||||
|
message = Bump version: {current_version} → {new_version}
|
||||||
|
tag = True
|
||||||
|
tag_name = v{new_version}
|
||||||
|
|
||||||
|
[bumpversion:file:node/package.json]
|
||||||
|
|
||||||
|
[bumpversion:file:rust/ffi/node/Cargo.toml]
|
||||||
|
|
||||||
|
[bumpversion:file:rust/vectordb/Cargo.toml]
|
||||||
29
.github/workflows/cargo-publish.yml
vendored
Normal file
29
.github/workflows/cargo-publish.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
name: Cargo Publish
|
||||||
|
|
||||||
|
on:
|
||||||
|
release:
|
||||||
|
types: [ published ]
|
||||||
|
|
||||||
|
env:
|
||||||
|
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||||
|
# key, so we set it to make sure it is always consistent.
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
timeout-minutes: 30
|
||||||
|
# Only runs on tags that matches the make-release action
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: rust
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- name: Publish the package
|
||||||
|
run: |
|
||||||
|
cargo publish -p vectordb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||||
24
.github/workflows/docs.yml
vendored
24
.github/workflows/docs.yml
vendored
@@ -39,6 +39,28 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install -e .
|
python -m pip install -e .
|
||||||
python -m pip install -r ../docs/requirements.txt
|
python -m pip install -r ../docs/requirements.txt
|
||||||
|
- name: Set up node
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: ${{ matrix.node-version }}
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: node/package-lock.json
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
- name: Install node dependencies
|
||||||
|
working-directory: node
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- name: Build node
|
||||||
|
working-directory: node
|
||||||
|
run: |
|
||||||
|
npm ci
|
||||||
|
npm run build
|
||||||
|
npm run tsc
|
||||||
|
- name: Create markdown files
|
||||||
|
working-directory: node
|
||||||
|
run: |
|
||||||
|
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
||||||
- name: Build docs
|
- name: Build docs
|
||||||
run: |
|
run: |
|
||||||
PYTHONPATH=. mkdocs build -f docs/mkdocs.yml
|
PYTHONPATH=. mkdocs build -f docs/mkdocs.yml
|
||||||
@@ -50,4 +72,4 @@ jobs:
|
|||||||
path: "docs/site"
|
path: "docs/site"
|
||||||
- name: Deploy to GitHub Pages
|
- name: Deploy to GitHub Pages
|
||||||
id: deployment
|
id: deployment
|
||||||
uses: actions/deploy-pages@v1
|
uses: actions/deploy-pages@v1
|
||||||
93
.github/workflows/docs_test.yml
vendored
Normal file
93
.github/workflows/docs_test.yml
vendored
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
name: Documentation Code Testing
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- docs/**
|
||||||
|
- .github/workflows/docs_test.yml
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- docs/**
|
||||||
|
- .github/workflows/docs_test.yml
|
||||||
|
|
||||||
|
# Allows you to run this workflow manually from the Actions tab
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||||
|
# "1" means line tables only, which is useful for panic tracebacks.
|
||||||
|
RUSTFLAGS: "-C debuginfo=1"
|
||||||
|
RUST_BACKTRACE: "1"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-python:
|
||||||
|
name: Test doc python code
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-minor-version: [ "11" ]
|
||||||
|
os: ["ubuntu-22.04"]
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: 3.${{ matrix.python-minor-version }}
|
||||||
|
cache: "pip"
|
||||||
|
cache-dependency-path: "docs/test/requirements.txt"
|
||||||
|
- name: Build Python
|
||||||
|
working-directory: docs/test
|
||||||
|
run:
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
- name: Create test files
|
||||||
|
run: |
|
||||||
|
cd docs/test
|
||||||
|
python md_testing.py
|
||||||
|
- name: Test
|
||||||
|
run: |
|
||||||
|
cd docs/test/python
|
||||||
|
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
|
||||||
|
test-node:
|
||||||
|
name: Test doc nodejs code
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
node-version: [ "18" ]
|
||||||
|
os: ["ubuntu-22.04"]
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set up Node
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: ${{ matrix.node-version }}
|
||||||
|
- name: Install dependecies needed for ubuntu
|
||||||
|
if: ${{ matrix.os == 'ubuntu-22.04' }}
|
||||||
|
run: |
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- name: Install node dependencies
|
||||||
|
run: |
|
||||||
|
cd docs/test
|
||||||
|
npm install
|
||||||
|
- name: Rust cache
|
||||||
|
uses: swatinem/rust-cache@v2
|
||||||
|
- name: Install LanceDB
|
||||||
|
run: |
|
||||||
|
cd docs/test/node_modules/vectordb
|
||||||
|
npm ci
|
||||||
|
npm run build
|
||||||
|
npm run tsc
|
||||||
|
- name: Create test files
|
||||||
|
run: |
|
||||||
|
cd docs/test
|
||||||
|
node md_testing.js
|
||||||
|
- name: Test
|
||||||
|
run: |
|
||||||
|
cd docs/test/node
|
||||||
|
for d in *; do cd "$d"; echo "$d".js; node "$d".js; cd ..; done
|
||||||
55
.github/workflows/make-release-commit.yml
vendored
Normal file
55
.github/workflows/make-release-commit.yml
vendored
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
name: Create release commit
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
dry_run:
|
||||||
|
description: 'Dry run (create the local commit/tags but do not push it)'
|
||||||
|
required: true
|
||||||
|
default: "false"
|
||||||
|
type: choice
|
||||||
|
options:
|
||||||
|
- "true"
|
||||||
|
- "false"
|
||||||
|
part:
|
||||||
|
description: 'What kind of release is this?'
|
||||||
|
required: true
|
||||||
|
default: 'patch'
|
||||||
|
type: choice
|
||||||
|
options:
|
||||||
|
- patch
|
||||||
|
- minor
|
||||||
|
- major
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bump-version:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check out main
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
ref: main
|
||||||
|
persist-credentials: false
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set git configs for bumpversion
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
git config user.name 'Lance Release'
|
||||||
|
git config user.email 'lance-dev@lancedb.com'
|
||||||
|
- name: Set up Python 3.10
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.10"
|
||||||
|
- name: Bump version, create tag and commit
|
||||||
|
run: |
|
||||||
|
pip install bump2version
|
||||||
|
bumpversion --verbose ${{ inputs.part }}
|
||||||
|
- name: Push new version and tag
|
||||||
|
if: ${{ inputs.dry_run }} == "false"
|
||||||
|
uses: ad-m/github-push-action@master
|
||||||
|
with:
|
||||||
|
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
||||||
|
branch: main
|
||||||
|
tags: true
|
||||||
|
|
||||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -3,12 +3,12 @@ name: PyPI Publish
|
|||||||
on:
|
on:
|
||||||
release:
|
release:
|
||||||
types: [ published ]
|
types: [ published ]
|
||||||
tags:
|
|
||||||
- 'python-v*' # Push events that matches the python-make-release action
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
publish:
|
publish:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
# Only runs on tags that matches the python-make-release action
|
||||||
|
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
10
.github/workflows/python.yml
vendored
10
.github/workflows/python.yml
vendored
@@ -32,9 +32,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
pip install pytest pytest-mock black
|
pip install pytest pytest-mock black isort
|
||||||
- name: Black
|
- name: Black
|
||||||
run: black --check --diff --no-color --quiet .
|
run: black --check --diff --no-color --quiet .
|
||||||
|
- name: isort
|
||||||
|
run: isort --check --diff --quiet .
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: pytest -x -v --durations=30 tests
|
run: pytest -x -v --durations=30 tests
|
||||||
- name: doctest
|
- name: doctest
|
||||||
@@ -59,6 +61,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
pip install pytest pytest-mock
|
pip install pytest pytest-mock black
|
||||||
|
- name: Black
|
||||||
|
run: black --check --diff --no-color --quiet .
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: pytest -x -v --durations=30 tests
|
run: pytest -x -v --durations=30 tests
|
||||||
|
|||||||
67
.github/workflows/rust.yml
vendored
Normal file
67
.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
name: Rust
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- rust/**
|
||||||
|
- .github/workflows/rust.yml
|
||||||
|
|
||||||
|
env:
|
||||||
|
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||||
|
# key, so we set it to make sure it is always consistent.
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||||
|
# "1" means line tables only, which is useful for panic tracebacks.
|
||||||
|
RUSTFLAGS: "-C debuginfo=1"
|
||||||
|
RUST_BACKTRACE: "1"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
linux:
|
||||||
|
timeout-minutes: 30
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
working-directory: rust
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: rust
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- name: Build
|
||||||
|
run: cargo build --all-features
|
||||||
|
- name: Run tests
|
||||||
|
run: cargo test --all-features
|
||||||
|
macos:
|
||||||
|
runs-on: macos-12
|
||||||
|
timeout-minutes: 30
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
working-directory: rust
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: CPU features
|
||||||
|
run: sysctl -a | grep cpu
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: rust
|
||||||
|
- name: Install dependencies
|
||||||
|
run: brew install protobuf
|
||||||
|
- name: Build
|
||||||
|
run: cargo build --all-features
|
||||||
|
- name: Run tests
|
||||||
|
run: cargo test --all-features
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,6 +3,7 @@
|
|||||||
*.egg-info
|
*.egg-info
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
venv
|
||||||
|
|
||||||
rust/target
|
rust/target
|
||||||
rust/Cargo.lock
|
rust/Cargo.lock
|
||||||
@@ -30,3 +31,4 @@ node/examples/**/dist
|
|||||||
## Rust
|
## Rust
|
||||||
target
|
target
|
||||||
|
|
||||||
|
Cargo.lock
|
||||||
@@ -8,4 +8,14 @@ repos:
|
|||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 22.12.0
|
rev: 22.12.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
# Ruff version.
|
||||||
|
rev: v0.0.277
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
- repo: https://github.com/pycqa/isort
|
||||||
|
rev: 5.12.0
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
name: isort (python)
|
||||||
3797
Cargo.lock
generated
3797
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,3 +4,11 @@ members = [
|
|||||||
"rust/ffi/node"
|
"rust/ffi/node"
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
lance = "0.5.3"
|
||||||
|
arrow-array = "40.0"
|
||||||
|
arrow-data = "40.0"
|
||||||
|
arrow-schema = "40.0"
|
||||||
|
arrow-ipc = "40.0"
|
||||||
|
object_store = "0.6.1"
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ pip install lancedb
|
|||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
|
|
||||||
uri = "/tmp/lancedb"
|
uri = "data/sample-lancedb"
|
||||||
db = lancedb.connect(uri)
|
db = lancedb.connect(uri)
|
||||||
table = db.create_table("my_table",
|
table = db.create_table("my_table",
|
||||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
|||||||
@@ -6,11 +6,13 @@ docs_dir: src
|
|||||||
theme:
|
theme:
|
||||||
name: "material"
|
name: "material"
|
||||||
logo: assets/logo.png
|
logo: assets/logo.png
|
||||||
|
favicon: assets/logo.png
|
||||||
features:
|
features:
|
||||||
- content.code.copy
|
- content.code.copy
|
||||||
- content.tabs.link
|
- content.tabs.link
|
||||||
icon:
|
icon:
|
||||||
repo: fontawesome/brands/github
|
repo: fontawesome/brands/github
|
||||||
|
custom_dir: overrides
|
||||||
|
|
||||||
plugins:
|
plugins:
|
||||||
- search
|
- search
|
||||||
@@ -36,6 +38,7 @@ plugins:
|
|||||||
|
|
||||||
markdown_extensions:
|
markdown_extensions:
|
||||||
- admonition
|
- admonition
|
||||||
|
- footnotes
|
||||||
- pymdownx.superfences
|
- pymdownx.superfences
|
||||||
- pymdownx.details
|
- pymdownx.details
|
||||||
- pymdownx.highlight:
|
- pymdownx.highlight:
|
||||||
@@ -64,6 +67,7 @@ nav:
|
|||||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||||
- References:
|
- References:
|
||||||
- Vector Search: search.md
|
- Vector Search: search.md
|
||||||
|
- SQL filters: sql.md
|
||||||
- Indexing: ann_indexes.md
|
- Indexing: ann_indexes.md
|
||||||
- API references:
|
- API references:
|
||||||
- Python API: python/python.md
|
- Python API: python/python.md
|
||||||
|
|||||||
176
docs/overrides/partials/header.html
Normal file
176
docs/overrides/partials/header.html
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
<!--
|
||||||
|
Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to
|
||||||
|
deal in the Software without restriction, including without limitation the
|
||||||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
IN THE SOFTWARE.
|
||||||
|
-->
|
||||||
|
|
||||||
|
{% set class = "md-header" %}
|
||||||
|
{% if "navigation.tabs.sticky" in features %}
|
||||||
|
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||||
|
{% elif "navigation.tabs" not in features %}
|
||||||
|
{% set class = class ~ " md-header--shadow" %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="{{ class }}" data-md-component="header">
|
||||||
|
<nav
|
||||||
|
class="md-header__inner md-grid"
|
||||||
|
aria-label="{{ lang.t('header') }}"
|
||||||
|
>
|
||||||
|
|
||||||
|
<!-- Link to home -->
|
||||||
|
<a
|
||||||
|
href="{{ config.extra.homepage | d(nav.homepage.url, true) | url }}"
|
||||||
|
title="{{ config.site_name | e }}"
|
||||||
|
class="md-header__button md-logo"
|
||||||
|
aria-label="{{ config.site_name }}"
|
||||||
|
data-md-component="logo"
|
||||||
|
>
|
||||||
|
{% include "partials/logo.html" %}
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<!-- Button to open drawer -->
|
||||||
|
<label class="md-header__button md-icon" for="__drawer">
|
||||||
|
{% include ".icons/material/menu" ~ ".svg" %}
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<!-- Header title -->
|
||||||
|
<div class="md-header__title" style="width: auto !important;" data-md-component="header-title">
|
||||||
|
<div class="md-header__ellipsis">
|
||||||
|
<div class="md-header__topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
{{ config.site_name }}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="md-header__topic" data-md-component="header-topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
{% if page.meta and page.meta.title %}
|
||||||
|
{{ page.meta.title }}
|
||||||
|
{% else %}
|
||||||
|
{{ page.title }}
|
||||||
|
{% endif %}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Color palette -->
|
||||||
|
{% if config.theme.palette %}
|
||||||
|
{% if not config.theme.palette is mapping %}
|
||||||
|
<form class="md-header__option" data-md-component="palette">
|
||||||
|
{% for option in config.theme.palette %}
|
||||||
|
{% set scheme = option.scheme | d("default", true) %}
|
||||||
|
{% set primary = option.primary | d("indigo", true) %}
|
||||||
|
{% set accent = option.accent | d("indigo", true) %}
|
||||||
|
<input
|
||||||
|
class="md-option"
|
||||||
|
data-md-color-media="{{ option.media }}"
|
||||||
|
data-md-color-scheme="{{ scheme | replace(' ', '-') }}"
|
||||||
|
data-md-color-primary="{{ primary | replace(' ', '-') }}"
|
||||||
|
data-md-color-accent="{{ accent | replace(' ', '-') }}"
|
||||||
|
{% if option.toggle %}
|
||||||
|
aria-label="{{ option.toggle.name }}"
|
||||||
|
{% else %}
|
||||||
|
aria-hidden="true"
|
||||||
|
{% endif %}
|
||||||
|
type="radio"
|
||||||
|
name="__palette"
|
||||||
|
id="__palette_{{ loop.index }}"
|
||||||
|
/>
|
||||||
|
{% if option.toggle %}
|
||||||
|
<label
|
||||||
|
class="md-header__button md-icon"
|
||||||
|
title="{{ option.toggle.name }}"
|
||||||
|
for="__palette_{{ loop.index0 or loop.length }}"
|
||||||
|
hidden
|
||||||
|
>
|
||||||
|
{% include ".icons/" ~ option.toggle.icon ~ ".svg" %}
|
||||||
|
</label>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</form>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Site language selector -->
|
||||||
|
{% if config.extra.alternate %}
|
||||||
|
<div class="md-header__option">
|
||||||
|
<div class="md-select">
|
||||||
|
{% set icon = config.theme.icon.alternate or "material/translate" %}
|
||||||
|
<button
|
||||||
|
class="md-header__button md-icon"
|
||||||
|
aria-label="{{ lang.t('select.language') }}"
|
||||||
|
>
|
||||||
|
{% include ".icons/" ~ icon ~ ".svg" %}
|
||||||
|
</button>
|
||||||
|
<div class="md-select__inner">
|
||||||
|
<ul class="md-select__list">
|
||||||
|
{% for alt in config.extra.alternate %}
|
||||||
|
<li class="md-select__item">
|
||||||
|
<a
|
||||||
|
href="{{ alt.link | url }}"
|
||||||
|
hreflang="{{ alt.lang }}"
|
||||||
|
class="md-select__link"
|
||||||
|
>
|
||||||
|
{{ alt.name }}
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Button to open search modal -->
|
||||||
|
{% if "material/search" in config.plugins %}
|
||||||
|
<label class="md-header__button md-icon" for="__search">
|
||||||
|
{% include ".icons/material/magnify.svg" %}
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<!-- Search interface -->
|
||||||
|
{% include "partials/search.html" %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<div style="margin-left: 10px; margin-right: 5px;">
|
||||||
|
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
|
||||||
|
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div style="margin-left: 5px; margin-right: 5px;">
|
||||||
|
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Repository information -->
|
||||||
|
{% if config.repo_url %}
|
||||||
|
<div class="md-header__source" style="margin-left: -5px !important;">
|
||||||
|
{% include "partials/source.html" %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<!-- Navigation tabs (sticky) -->
|
||||||
|
{% if "navigation.tabs.sticky" in features %}
|
||||||
|
{% if "navigation.tabs" in features %}
|
||||||
|
{% include "partials/tabs.html" %}
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
</header>
|
||||||
@@ -23,7 +23,7 @@ In the future we will look to automatically create and configure the ANN index.
|
|||||||
|
|
||||||
# Create 10,000 sample vectors
|
# Create 10,000 sample vectors
|
||||||
data = [{"vector": row, "item": f"item {i}"}
|
data = [{"vector": row, "item": f"item {i}"}
|
||||||
for i, row in enumerate(np.random.random((10_000, 768)).astype('float32'))]
|
for i, row in enumerate(np.random.random((10_000, 1536)).astype('float32'))]
|
||||||
|
|
||||||
# Add the vectors to a table
|
# Add the vectors to a table
|
||||||
tbl = db.create_table("my_vectors", data=data)
|
tbl = db.create_table("my_vectors", data=data)
|
||||||
@@ -41,8 +41,8 @@ In the future we will look to automatically create and configure the ANN index.
|
|||||||
for (let i = 0; i < 10_000; i++) {
|
for (let i = 0; i < 10_000; i++) {
|
||||||
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||||
}
|
}
|
||||||
const table = await db.createTable('vectors', data)
|
const table = await db.createTable('my_vectors', data)
|
||||||
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 256, num_sub_vectors: 96 })
|
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 256, num_sub_vectors: 96 })
|
||||||
```
|
```
|
||||||
|
|
||||||
Since `create_index` has a training step, it can take a few minutes to finish for large tables. You can control the index
|
Since `create_index` has a training step, it can take a few minutes to finish for large tables. You can control the index
|
||||||
@@ -67,18 +67,19 @@ There are a couple of parameters that can be used to fine-tune the search:
|
|||||||
e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
|
e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
|
||||||
Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||||
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
||||||
A higher number makes search more accurate but also slower. If you find the recall is less than idea, try refine_factor=10 to start.<br/>
|
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
||||||
e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
|
e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
|
||||||
Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
```python
|
```python
|
||||||
tbl.search(np.random.random((768))) \
|
tbl.search(np.random.random((1536))) \
|
||||||
.limit(2) \
|
.limit(2) \
|
||||||
.nprobes(20) \
|
.nprobes(20) \
|
||||||
.refine_factor(10) \
|
.refine_factor(10) \
|
||||||
.to_df()
|
.to_df()
|
||||||
|
```
|
||||||
|
```
|
||||||
vector item score
|
vector item score
|
||||||
0 [0.44949695, 0.8444449, 0.06281311, 0.23338133... item 1141 103.575333
|
0 [0.44949695, 0.8444449, 0.06281311, 0.23338133... item 1141 103.575333
|
||||||
1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867
|
1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867
|
||||||
@@ -86,8 +87,8 @@ There are a couple of parameters that can be used to fine-tune the search:
|
|||||||
|
|
||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
```javascript
|
```javascript
|
||||||
const results = await table
|
const results_1 = await table
|
||||||
.search(Array(768).fill(1.2))
|
.search(Array(1536).fill(1.2))
|
||||||
.limit(2)
|
.limit(2)
|
||||||
.nprobes(20)
|
.nprobes(20)
|
||||||
.refineFactor(10)
|
.refineFactor(10)
|
||||||
@@ -104,14 +105,14 @@ You can further filter the elements returned by a search using a where clause.
|
|||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
```python
|
```python
|
||||||
tbl.search(np.random.random((768))).where("item != 'item 1141'").to_df()
|
tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_df()
|
||||||
```
|
```
|
||||||
|
|
||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
```javascript
|
```javascript
|
||||||
const results = await table
|
const results_2 = await table
|
||||||
.search(Array(1536).fill(1.2))
|
.search(Array(1536).fill(1.2))
|
||||||
.where("item != 'item 1141'")
|
.where("id != '1141'")
|
||||||
.execute()
|
.execute()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -121,7 +122,9 @@ You can select the columns returned by the query using a select clause.
|
|||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
```python
|
```python
|
||||||
tbl.search(np.random.random((768))).select(["vector"]).to_df()
|
tbl.search(np.random.random((1536))).select(["vector"]).to_df()
|
||||||
|
```
|
||||||
|
```
|
||||||
vector score
|
vector score
|
||||||
0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092
|
0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092
|
||||||
1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485
|
1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485
|
||||||
@@ -130,7 +133,7 @@ You can select the columns returned by the query using a select clause.
|
|||||||
|
|
||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
```javascript
|
```javascript
|
||||||
const results = await table
|
const results_3 = await table
|
||||||
.search(Array(1536).fill(1.2))
|
.search(Array(1536).fill(1.2))
|
||||||
.select(["id"])
|
.select(["id"])
|
||||||
.execute()
|
.execute()
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ We'll cover the basics of using LanceDB on your local machine in this section.
|
|||||||
=== "Python"
|
=== "Python"
|
||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
uri = "~/.lancedb"
|
uri = "data/sample-lancedb"
|
||||||
db = lancedb.connect(uri)
|
db = lancedb.connect(uri)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ We'll cover the basics of using LanceDB on your local machine in this section.
|
|||||||
```javascript
|
```javascript
|
||||||
const lancedb = require("vectordb");
|
const lancedb = require("vectordb");
|
||||||
|
|
||||||
const uri = "~./lancedb";
|
const uri = "data/sample-lancedb";
|
||||||
const db = await lancedb.connect(uri);
|
const db = await lancedb.connect(uri);
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -102,7 +102,7 @@ Once created, you can open a table using the following code:
|
|||||||
If you forget the name of your table, you can always get a listing of all table names:
|
If you forget the name of your table, you can always get a listing of all table names:
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
console.log(db.tableNames());
|
console.log(await db.tableNames());
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to add data to a table
|
## How to add data to a table
|
||||||
@@ -118,7 +118,7 @@ After a table has been created, you can always add more data to it using
|
|||||||
|
|
||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
```javascript
|
```javascript
|
||||||
await tbl.add([vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
await tbl.add([{vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
||||||
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ You can also use an external API like OpenAI to generate embeddings
|
|||||||
embededings for your data.
|
embededings for your data.
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const db = await lancedb.connect("/tmp/lancedb");
|
const db = await lancedb.connect("data/sample-lancedb");
|
||||||
const data = [
|
const data = [
|
||||||
{ text: 'pepperoni' },
|
{ text: 'pepperoni' },
|
||||||
{ text: 'pineapple' }
|
{ text: 'pineapple' }
|
||||||
|
|||||||
@@ -1,18 +1,19 @@
|
|||||||
import sys
|
|
||||||
from modal import Secret, Stub, Image, web_endpoint
|
|
||||||
import lancedb
|
|
||||||
import re
|
|
||||||
import pickle
|
import pickle
|
||||||
import requests
|
import re
|
||||||
|
import sys
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from langchain.chains import RetrievalQA
|
||||||
from langchain.document_loaders import UnstructuredHTMLLoader
|
from langchain.document_loaders import UnstructuredHTMLLoader
|
||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
from langchain.llms import OpenAI
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain.vectorstores import LanceDB
|
from langchain.vectorstores import LanceDB
|
||||||
from langchain.llms import OpenAI
|
from modal import Image, Secret, Stub, web_endpoint
|
||||||
from langchain.chains import RetrievalQA
|
|
||||||
|
import lancedb
|
||||||
|
|
||||||
lancedb_image = Image.debian_slim().pip_install(
|
lancedb_image = Image.debian_slim().pip_install(
|
||||||
"lancedb", "langchain", "openai", "pandas", "tiktoken", "unstructured", "tabulate"
|
"lancedb", "langchain", "openai", "pandas", "tiktoken", "unstructured", "tabulate"
|
||||||
@@ -78,10 +79,7 @@ def qanda_langchain(query):
|
|||||||
download_docs()
|
download_docs()
|
||||||
docs = store_docs()
|
docs = store_docs()
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter(
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200,)
|
||||||
chunk_size=1000,
|
|
||||||
chunk_overlap=200,
|
|
||||||
)
|
|
||||||
documents = text_splitter.split_documents(docs)
|
documents = text_splitter.split_documents(docs)
|
||||||
embeddings = OpenAIEmbeddings()
|
embeddings = OpenAIEmbeddings()
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,20 @@ Assume:
|
|||||||
1. `table` is a LanceDB Table
|
1. `table` is a LanceDB Table
|
||||||
2. `text` is the name of the Table column that we want to index
|
2. `text` is the name of the Table column that we want to index
|
||||||
|
|
||||||
|
For example,
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
uri = "data/sample-lancedb"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
|
||||||
|
table = db.create_table("my_table",
|
||||||
|
data=[{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"},
|
||||||
|
{"vector": [5.9, 26.5], "text": "There are several kittens playing"}])
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
To create the index:
|
To create the index:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ The key features of LanceDB include:
|
|||||||
|
|
||||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||||
|
|
||||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||||
|
|
||||||
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
|||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
|
|
||||||
uri = "/tmp/lancedb"
|
uri = "data/sample-lancedb"
|
||||||
db = lancedb.connect(uri)
|
db = lancedb.connect(uri)
|
||||||
table = db.create_table("my_table",
|
table = db.create_table("my_table",
|
||||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
@@ -44,7 +44,7 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
|||||||
```javascript
|
```javascript
|
||||||
const lancedb = require("vectordb");
|
const lancedb = require("vectordb");
|
||||||
|
|
||||||
const uri = "/tmp/lancedb";
|
const uri = "data/sample-lancedb";
|
||||||
const db = await lancedb.connect(uri);
|
const db = await lancedb.connect(uri);
|
||||||
const table = await db.createTable("my_table",
|
const table = await db.createTable("my_table",
|
||||||
[{ id: 1, vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
[{ id: 1, vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||||
|
|||||||
@@ -6,11 +6,11 @@ Built on top of Apache Arrow, `LanceDB` is easy to integrate with the Python eco
|
|||||||
|
|
||||||
First, we need to connect to a `LanceDB` database.
|
First, we need to connect to a `LanceDB` database.
|
||||||
|
|
||||||
``` py
|
```py
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
|
|
||||||
db = lancedb.connect("/tmp/lancedb")
|
db = lancedb.connect("data/sample-lancedb")
|
||||||
```
|
```
|
||||||
|
|
||||||
And write a `Pandas DataFrame` to LanceDB directly.
|
And write a `Pandas DataFrame` to LanceDB directly.
|
||||||
@@ -26,7 +26,7 @@ data = pd.DataFrame({
|
|||||||
table = db.create_table("pd_table", data=data)
|
table = db.create_table("pd_table", data=data)
|
||||||
```
|
```
|
||||||
|
|
||||||
You will find detailed instructions of creating dataset and index in [Basic Operations](basic.md) and [Indexing](indexing.md)
|
You will find detailed instructions of creating dataset and index in [Basic Operations](basic.md) and [Indexing](ann_indexes.md)
|
||||||
sections.
|
sections.
|
||||||
|
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ We will re-use the dataset created previously
|
|||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
|
|
||||||
db = lancedb.connect("/tmp/lancedb")
|
db = lancedb.connect("data/sample-lancedb")
|
||||||
table = db.open_table("pd_table")
|
table = db.open_table("pd_table")
|
||||||
arrow_table = table.to_arrow()
|
arrow_table = table.to_arrow()
|
||||||
```
|
```
|
||||||
@@ -87,8 +87,12 @@ arrow_table = table.to_arrow()
|
|||||||
`DuckDB` can directly query the `arrow_table`:
|
`DuckDB` can directly query the `arrow_table`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
In [15]: duckdb.query("SELECT * FROM t")
|
import duckdb
|
||||||
Out[15]:
|
|
||||||
|
duckdb.query("SELECT * FROM arrow_table")
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
┌─────────────┬─────────┬────────┐
|
┌─────────────┬─────────┬────────┐
|
||||||
│ vector │ item │ price │
|
│ vector │ item │ price │
|
||||||
│ float[] │ varchar │ double │
|
│ float[] │ varchar │ double │
|
||||||
@@ -96,8 +100,12 @@ Out[15]:
|
|||||||
│ [3.1, 4.1] │ foo │ 10.0 │
|
│ [3.1, 4.1] │ foo │ 10.0 │
|
||||||
│ [5.9, 26.5] │ bar │ 20.0 │
|
│ [5.9, 26.5] │ bar │ 20.0 │
|
||||||
└─────────────┴─────────┴────────┘
|
└─────────────┴─────────┴────────┘
|
||||||
|
```
|
||||||
|
```python
|
||||||
|
duckdb.query("SELECT mean(price) FROM arrow_table")
|
||||||
|
```
|
||||||
|
|
||||||
In [16]: duckdb.query("SELECT mean(price) FROM t")
|
```
|
||||||
Out[16]:
|
Out[16]:
|
||||||
┌─────────────┐
|
┌─────────────┐
|
||||||
│ mean(price) │
|
│ mean(price) │
|
||||||
|
|||||||
@@ -16,9 +16,11 @@ npm install vectordb
|
|||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const lancedb = require('vectordb');
|
const lancedb = require('vectordb');
|
||||||
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>');
|
const db = await lancedb.connect('data/sample-lancedb');
|
||||||
const table = await db.openTable('my_table');
|
const table = await db.createTable("my_table",
|
||||||
const query = await table.search([0.1, 0.3]).setLimit(20).execute();
|
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
||||||
|
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
||||||
|
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
||||||
console.log(results);
|
console.log(results);
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -26,12 +28,6 @@ The [examples](./examples) folder contains complete examples.
|
|||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
The LanceDB javascript is built with npm:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run tsc
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the tests with
|
Run the tests with
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -1,211 +0,0 @@
|
|||||||
[vectordb](../README.md) / [Exports](../modules.md) / Connection
|
|
||||||
|
|
||||||
# Class: Connection
|
|
||||||
|
|
||||||
A connection to a LanceDB database.
|
|
||||||
|
|
||||||
## Table of contents
|
|
||||||
|
|
||||||
### Constructors
|
|
||||||
|
|
||||||
- [constructor](Connection.md#constructor)
|
|
||||||
|
|
||||||
### Properties
|
|
||||||
|
|
||||||
- [\_db](Connection.md#_db)
|
|
||||||
- [\_uri](Connection.md#_uri)
|
|
||||||
|
|
||||||
### Accessors
|
|
||||||
|
|
||||||
- [uri](Connection.md#uri)
|
|
||||||
|
|
||||||
### Methods
|
|
||||||
|
|
||||||
- [createTable](Connection.md#createtable)
|
|
||||||
- [createTableArrow](Connection.md#createtablearrow)
|
|
||||||
- [openTable](Connection.md#opentable)
|
|
||||||
- [tableNames](Connection.md#tablenames)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### constructor
|
|
||||||
|
|
||||||
• **new Connection**(`db`, `uri`)
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `db` | `any` |
|
|
||||||
| `uri` | `string` |
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:46](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L46)
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### \_db
|
|
||||||
|
|
||||||
• `Private` `Readonly` **\_db**: `any`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:44](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L44)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### \_uri
|
|
||||||
|
|
||||||
• `Private` `Readonly` **\_uri**: `string`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:43](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L43)
|
|
||||||
|
|
||||||
## Accessors
|
|
||||||
|
|
||||||
### uri
|
|
||||||
|
|
||||||
• `get` **uri**(): `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`string`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:51](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L51)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### createTable
|
|
||||||
|
|
||||||
▸ **createTable**(`name`, `data`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `name` | `string` | The name of the table. |
|
|
||||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:91](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L91)
|
|
||||||
|
|
||||||
▸ **createTable**<`T`\>(`name`, `data`, `embeddings`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
|
||||||
|
|
||||||
#### Type parameters
|
|
||||||
|
|
||||||
| Name |
|
|
||||||
| :------ |
|
|
||||||
| `T` |
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `name` | `string` | The name of the table. |
|
|
||||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
|
||||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:99](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L99)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### createTableArrow
|
|
||||||
|
|
||||||
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `name` | `string` |
|
|
||||||
| `table` | `Table`<`any`\> |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:109](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L109)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### openTable
|
|
||||||
|
|
||||||
▸ **openTable**(`name`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
Open a table in the database.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `name` | `string` | The name of the table. |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:67](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L67)
|
|
||||||
|
|
||||||
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
|
||||||
|
|
||||||
Open a table in the database.
|
|
||||||
|
|
||||||
#### Type parameters
|
|
||||||
|
|
||||||
| Name |
|
|
||||||
| :------ |
|
|
||||||
| `T` |
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `name` | `string` | The name of the table. |
|
|
||||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:74](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L74)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### tableNames
|
|
||||||
|
|
||||||
▸ **tableNames**(): `Promise`<`string`[]\>
|
|
||||||
|
|
||||||
Get the names of all tables in the database.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`string`[]\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:58](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L58)
|
|
||||||
294
docs/src/javascript/classes/LocalConnection.md
Normal file
294
docs/src/javascript/classes/LocalConnection.md
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / LocalConnection
|
||||||
|
|
||||||
|
# Class: LocalConnection
|
||||||
|
|
||||||
|
A connection to a LanceDB database.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`Connection`](../interfaces/Connection.md)
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Constructors
|
||||||
|
|
||||||
|
- [constructor](LocalConnection.md#constructor)
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [\_db](LocalConnection.md#_db)
|
||||||
|
- [\_uri](LocalConnection.md#_uri)
|
||||||
|
|
||||||
|
### Accessors
|
||||||
|
|
||||||
|
- [uri](LocalConnection.md#uri)
|
||||||
|
|
||||||
|
### Methods
|
||||||
|
|
||||||
|
- [createTable](LocalConnection.md#createtable)
|
||||||
|
- [createTableArrow](LocalConnection.md#createtablearrow)
|
||||||
|
- [dropTable](LocalConnection.md#droptable)
|
||||||
|
- [openTable](LocalConnection.md#opentable)
|
||||||
|
- [tableNames](LocalConnection.md#tablenames)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### constructor
|
||||||
|
|
||||||
|
• **new LocalConnection**(`db`, `uri`)
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `db` | `any` |
|
||||||
|
| `uri` | `string` |
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:132](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L132)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### \_db
|
||||||
|
|
||||||
|
• `Private` `Readonly` **\_db**: `any`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:130](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L130)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### \_uri
|
||||||
|
|
||||||
|
• `Private` `Readonly` **\_uri**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:129](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L129)
|
||||||
|
|
||||||
|
## Accessors
|
||||||
|
|
||||||
|
### uri
|
||||||
|
|
||||||
|
• `get` **uri**(): `string`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`string`
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[uri](../interfaces/Connection.md#uri)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:137](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L137)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### createTable
|
||||||
|
|
||||||
|
▸ **createTable**(`name`, `data`, `mode?`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||||
|
| `mode?` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[createTable](../interfaces/Connection.md#createtable)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:177](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L177)
|
||||||
|
|
||||||
|
▸ **createTable**(`name`, `data`, `mode`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `name` | `string` |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] |
|
||||||
|
| `mode` | [`WriteMode`](../enums/WriteMode.md) |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
Connection.createTable
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:178](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L178)
|
||||||
|
|
||||||
|
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||||
|
| `mode` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||||
|
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
Connection.createTable
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:188](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L188)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### createTableArrow
|
||||||
|
|
||||||
|
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `name` | `string` |
|
||||||
|
| `table` | `Table`<`any`\> |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[createTableArrow](../interfaces/Connection.md#createtablearrow)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:201](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L201)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### dropTable
|
||||||
|
|
||||||
|
▸ **dropTable**(`name`): `Promise`<`void`\>
|
||||||
|
|
||||||
|
Drop an existing table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table to drop. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[dropTable](../interfaces/Connection.md#droptable)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:211](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L211)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### openTable
|
||||||
|
|
||||||
|
▸ **openTable**(`name`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
Open a table in the database.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[openTable](../interfaces/Connection.md#opentable)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:153](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L153)
|
||||||
|
|
||||||
|
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
Open a table in the database.
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
Connection.openTable
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:160](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L160)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### tableNames
|
||||||
|
|
||||||
|
▸ **tableNames**(): `Promise`<`string`[]\>
|
||||||
|
|
||||||
|
Get the names of all tables in the database.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`[]\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Connection](../interfaces/Connection.md).[tableNames](../interfaces/Connection.md#tablenames)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:144](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L144)
|
||||||
289
docs/src/javascript/classes/LocalTable.md
Normal file
289
docs/src/javascript/classes/LocalTable.md
Normal file
@@ -0,0 +1,289 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / LocalTable
|
||||||
|
|
||||||
|
# Class: LocalTable<T\>
|
||||||
|
|
||||||
|
A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||||
|
|
||||||
|
## Type parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `T` | `number`[] |
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`Table`](../interfaces/Table.md)<`T`\>
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Constructors
|
||||||
|
|
||||||
|
- [constructor](LocalTable.md#constructor)
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [\_embeddings](LocalTable.md#_embeddings)
|
||||||
|
- [\_name](LocalTable.md#_name)
|
||||||
|
- [\_tbl](LocalTable.md#_tbl)
|
||||||
|
|
||||||
|
### Accessors
|
||||||
|
|
||||||
|
- [name](LocalTable.md#name)
|
||||||
|
|
||||||
|
### Methods
|
||||||
|
|
||||||
|
- [add](LocalTable.md#add)
|
||||||
|
- [countRows](LocalTable.md#countrows)
|
||||||
|
- [createIndex](LocalTable.md#createindex)
|
||||||
|
- [delete](LocalTable.md#delete)
|
||||||
|
- [overwrite](LocalTable.md#overwrite)
|
||||||
|
- [search](LocalTable.md#search)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### constructor
|
||||||
|
|
||||||
|
• **new LocalTable**<`T`\>(`tbl`, `name`)
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `T` | `number`[] |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `tbl` | `any` |
|
||||||
|
| `name` | `string` |
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:221](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L221)
|
||||||
|
|
||||||
|
• **new LocalTable**<`T`\>(`tbl`, `name`, `embeddings`)
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `T` | `number`[] |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `tbl` | `any` | |
|
||||||
|
| `name` | `string` | |
|
||||||
|
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:227](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L227)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### \_embeddings
|
||||||
|
|
||||||
|
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:219](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L219)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### \_name
|
||||||
|
|
||||||
|
• `Private` `Readonly` **\_name**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:218](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L218)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### \_tbl
|
||||||
|
|
||||||
|
• `Private` `Readonly` **\_tbl**: `any`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:217](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L217)
|
||||||
|
|
||||||
|
## Accessors
|
||||||
|
|
||||||
|
### name
|
||||||
|
|
||||||
|
• `get` **name**(): `string`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`string`
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[name](../interfaces/Table.md#name)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:234](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L234)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### add
|
||||||
|
|
||||||
|
▸ **add**(`data`): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Insert records into this Table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
The number of rows added to the table
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[add](../interfaces/Table.md#add)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:252](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L252)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### countRows
|
||||||
|
|
||||||
|
▸ **countRows**(): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Returns the number of rows in this table.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[countRows](../interfaces/Table.md#countrows)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:278](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L278)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### createIndex
|
||||||
|
|
||||||
|
▸ **createIndex**(`indexParams`): `Promise`<`any`\>
|
||||||
|
|
||||||
|
Create an ANN index on this Table vector index.
|
||||||
|
|
||||||
|
**`See`**
|
||||||
|
|
||||||
|
VectorIndexParams.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`any`\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[createIndex](../interfaces/Table.md#createindex)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:271](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L271)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### delete
|
||||||
|
|
||||||
|
▸ **delete**(`filter`): `Promise`<`void`\>
|
||||||
|
|
||||||
|
Delete rows from this table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[delete](../interfaces/Table.md#delete)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:287](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L287)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### overwrite
|
||||||
|
|
||||||
|
▸ **overwrite**(`data`): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Insert records into this Table, replacing its contents.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
The number of rows added to the table
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[overwrite](../interfaces/Table.md#overwrite)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:262](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L262)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### search
|
||||||
|
|
||||||
|
▸ **search**(`query`): [`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
Creates a search query to find the nearest neighbors of the given search term
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `query` | `T` | The query search term |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[Table](../interfaces/Table.md).[search](../interfaces/Table.md#search)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:242](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L242)
|
||||||
@@ -40,7 +40,7 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L21)
|
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L21)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L19)
|
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L19)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -60,7 +60,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L18)
|
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L18)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -76,7 +76,7 @@ The name of the column that will be used as input for the Embedding Function.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L50)
|
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L50)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -102,4 +102,4 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L38)
|
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L38)
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
|
|
||||||
### Properties
|
### Properties
|
||||||
|
|
||||||
- [\_columns](Query.md#_columns)
|
|
||||||
- [\_embeddings](Query.md#_embeddings)
|
- [\_embeddings](Query.md#_embeddings)
|
||||||
- [\_filter](Query.md#_filter)
|
- [\_filter](Query.md#_filter)
|
||||||
- [\_limit](Query.md#_limit)
|
- [\_limit](Query.md#_limit)
|
||||||
@@ -27,7 +26,9 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
- [\_query](Query.md#_query)
|
- [\_query](Query.md#_query)
|
||||||
- [\_queryVector](Query.md#_queryvector)
|
- [\_queryVector](Query.md#_queryvector)
|
||||||
- [\_refineFactor](Query.md#_refinefactor)
|
- [\_refineFactor](Query.md#_refinefactor)
|
||||||
|
- [\_select](Query.md#_select)
|
||||||
- [\_tbl](Query.md#_tbl)
|
- [\_tbl](Query.md#_tbl)
|
||||||
|
- [where](Query.md#where)
|
||||||
|
|
||||||
### Methods
|
### Methods
|
||||||
|
|
||||||
@@ -37,6 +38,7 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
- [metricType](Query.md#metrictype)
|
- [metricType](Query.md#metrictype)
|
||||||
- [nprobes](Query.md#nprobes)
|
- [nprobes](Query.md#nprobes)
|
||||||
- [refineFactor](Query.md#refinefactor)
|
- [refineFactor](Query.md#refinefactor)
|
||||||
|
- [select](Query.md#select)
|
||||||
|
|
||||||
## Constructors
|
## Constructors
|
||||||
|
|
||||||
@@ -60,27 +62,17 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:241](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L241)
|
[index.ts:362](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L362)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
### \_columns
|
|
||||||
|
|
||||||
• `Private` `Optional` `Readonly` **\_columns**: `string`[]
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:236](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L236)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### \_embeddings
|
### \_embeddings
|
||||||
|
|
||||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:239](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L239)
|
[index.ts:360](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L360)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -90,7 +82,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:237](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L237)
|
[index.ts:358](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L358)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -100,7 +92,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:233](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L233)
|
[index.ts:354](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L354)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -110,7 +102,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:238](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L238)
|
[index.ts:359](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L359)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -120,7 +112,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:235](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L235)
|
[index.ts:356](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L356)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -130,7 +122,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:231](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L231)
|
[index.ts:352](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L352)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -140,7 +132,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:232](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L232)
|
[index.ts:353](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L353)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -150,7 +142,17 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:234](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L234)
|
[index.ts:355](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L355)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### \_select
|
||||||
|
|
||||||
|
• `Private` `Optional` **\_select**: `string`[]
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:357](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L357)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -160,7 +162,33 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:230](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L230)
|
[index.ts:351](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L351)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### where
|
||||||
|
|
||||||
|
• **where**: (`value`: `string`) => [`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`value`): [`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `value` | `string` | A filter in the same format used by a sql WHERE clause. |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
[`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:410](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L410)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -182,7 +210,7 @@ Execute the query and return the results as an Array of Objects
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:301](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L301)
|
[index.ts:433](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L433)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -204,7 +232,7 @@ A filter statement to be applied to this query.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:284](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L284)
|
[index.ts:405](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L405)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -226,7 +254,7 @@ Sets the number of results that will be returned
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:257](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L257)
|
[index.ts:378](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L378)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -252,7 +280,7 @@ MetricType for the different options
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:293](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L293)
|
[index.ts:425](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L425)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -274,7 +302,7 @@ The number of probes used. A higher number makes search more accurate but also s
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:275](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L275)
|
[index.ts:396](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L396)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -296,4 +324,26 @@ Refine the results by reading extra elements and re-ranking them in memory.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:266](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L266)
|
[index.ts:387](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L387)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### select
|
||||||
|
|
||||||
|
▸ **select**(`value`): [`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
Return only the specified columns.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `value` | `string`[] | Only select the specified columns. If not specified, all columns will be returned. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Query`](Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:416](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L416)
|
||||||
|
|||||||
@@ -1,215 +0,0 @@
|
|||||||
[vectordb](../README.md) / [Exports](../modules.md) / Table
|
|
||||||
|
|
||||||
# Class: Table<T\>
|
|
||||||
|
|
||||||
## Type parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `T` | `number`[] |
|
|
||||||
|
|
||||||
## Table of contents
|
|
||||||
|
|
||||||
### Constructors
|
|
||||||
|
|
||||||
- [constructor](Table.md#constructor)
|
|
||||||
|
|
||||||
### Properties
|
|
||||||
|
|
||||||
- [\_embeddings](Table.md#_embeddings)
|
|
||||||
- [\_name](Table.md#_name)
|
|
||||||
- [\_tbl](Table.md#_tbl)
|
|
||||||
|
|
||||||
### Accessors
|
|
||||||
|
|
||||||
- [name](Table.md#name)
|
|
||||||
|
|
||||||
### Methods
|
|
||||||
|
|
||||||
- [add](Table.md#add)
|
|
||||||
- [create\_index](Table.md#create_index)
|
|
||||||
- [overwrite](Table.md#overwrite)
|
|
||||||
- [search](Table.md#search)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### constructor
|
|
||||||
|
|
||||||
• **new Table**<`T`\>(`tbl`, `name`)
|
|
||||||
|
|
||||||
#### Type parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `T` | `number`[] |
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `tbl` | `any` |
|
|
||||||
| `name` | `string` |
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:121](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L121)
|
|
||||||
|
|
||||||
• **new Table**<`T`\>(`tbl`, `name`, `embeddings`)
|
|
||||||
|
|
||||||
#### Type parameters
|
|
||||||
|
|
||||||
| Name | Type |
|
|
||||||
| :------ | :------ |
|
|
||||||
| `T` | `number`[] |
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `tbl` | `any` | |
|
|
||||||
| `name` | `string` | |
|
|
||||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:127](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L127)
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### \_embeddings
|
|
||||||
|
|
||||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:119](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L119)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### \_name
|
|
||||||
|
|
||||||
• `Private` `Readonly` **\_name**: `string`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:118](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L118)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### \_tbl
|
|
||||||
|
|
||||||
• `Private` `Readonly` **\_tbl**: `any`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:117](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L117)
|
|
||||||
|
|
||||||
## Accessors
|
|
||||||
|
|
||||||
### name
|
|
||||||
|
|
||||||
• `get` **name**(): `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`string`
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:134](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L134)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### add
|
|
||||||
|
|
||||||
▸ **add**(`data`): `Promise`<`number`\>
|
|
||||||
|
|
||||||
Insert records into this Table.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`number`\>
|
|
||||||
|
|
||||||
The number of rows added to the table
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:152](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L152)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### create\_index
|
|
||||||
|
|
||||||
▸ **create_index**(`indexParams`): `Promise`<`any`\>
|
|
||||||
|
|
||||||
Create an ANN index on this Table vector index.
|
|
||||||
|
|
||||||
**`See`**
|
|
||||||
|
|
||||||
VectorIndexParams.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`any`\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:171](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L171)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### overwrite
|
|
||||||
|
|
||||||
▸ **overwrite**(`data`): `Promise`<`number`\>
|
|
||||||
|
|
||||||
Insert records into this Table, replacing its contents.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`number`\>
|
|
||||||
|
|
||||||
The number of rows added to the table
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:162](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L162)
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
### search
|
|
||||||
|
|
||||||
▸ **search**(`query`): [`Query`](Query.md)<`T`\>
|
|
||||||
|
|
||||||
Creates a search query to find the nearest neighbors of the given search term
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
| :------ | :------ | :------ |
|
|
||||||
| `query` | `T` | The query search term |
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`Query`](Query.md)<`T`\>
|
|
||||||
|
|
||||||
#### Defined in
|
|
||||||
|
|
||||||
[index.ts:142](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L142)
|
|
||||||
@@ -9,6 +9,7 @@ Distance metrics type.
|
|||||||
### Enumeration Members
|
### Enumeration Members
|
||||||
|
|
||||||
- [Cosine](MetricType.md#cosine)
|
- [Cosine](MetricType.md#cosine)
|
||||||
|
- [Dot](MetricType.md#dot)
|
||||||
- [L2](MetricType.md#l2)
|
- [L2](MetricType.md#l2)
|
||||||
|
|
||||||
## Enumeration Members
|
## Enumeration Members
|
||||||
@@ -21,7 +22,19 @@ Cosine distance
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:341](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L341)
|
[index.ts:481](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L481)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### Dot
|
||||||
|
|
||||||
|
• **Dot** = ``"dot"``
|
||||||
|
|
||||||
|
Dot product
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:486](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L486)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -33,4 +46,4 @@ Euclidean distance
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:336](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L336)
|
[index.ts:476](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L476)
|
||||||
|
|||||||
@@ -2,11 +2,14 @@
|
|||||||
|
|
||||||
# Enumeration: WriteMode
|
# Enumeration: WriteMode
|
||||||
|
|
||||||
|
Write mode for writing a table.
|
||||||
|
|
||||||
## Table of contents
|
## Table of contents
|
||||||
|
|
||||||
### Enumeration Members
|
### Enumeration Members
|
||||||
|
|
||||||
- [Append](WriteMode.md#append)
|
- [Append](WriteMode.md#append)
|
||||||
|
- [Create](WriteMode.md#create)
|
||||||
- [Overwrite](WriteMode.md#overwrite)
|
- [Overwrite](WriteMode.md#overwrite)
|
||||||
|
|
||||||
## Enumeration Members
|
## Enumeration Members
|
||||||
@@ -15,9 +18,23 @@
|
|||||||
|
|
||||||
• **Append** = ``"append"``
|
• **Append** = ``"append"``
|
||||||
|
|
||||||
|
Append new data to the table.
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:326](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L326)
|
[index.ts:466](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L466)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### Create
|
||||||
|
|
||||||
|
• **Create** = ``"create"``
|
||||||
|
|
||||||
|
Create a new [Table](../interfaces/Table.md).
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:462](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L462)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -25,6 +42,8 @@ ___
|
|||||||
|
|
||||||
• **Overwrite** = ``"overwrite"``
|
• **Overwrite** = ``"overwrite"``
|
||||||
|
|
||||||
|
Overwrite the existing [Table](../interfaces/Table.md) if presented.
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:325](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L325)
|
[index.ts:464](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L464)
|
||||||
|
|||||||
152
docs/src/javascript/interfaces/Connection.md
Normal file
152
docs/src/javascript/interfaces/Connection.md
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / Connection
|
||||||
|
|
||||||
|
# Interface: Connection
|
||||||
|
|
||||||
|
A LanceDB Connection that allows you to open tables and create new ones.
|
||||||
|
|
||||||
|
Connection could be local against filesystem or remote against a server.
|
||||||
|
|
||||||
|
## Implemented by
|
||||||
|
|
||||||
|
- [`LocalConnection`](../classes/LocalConnection.md)
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [uri](Connection.md#uri)
|
||||||
|
|
||||||
|
### Methods
|
||||||
|
|
||||||
|
- [createTable](Connection.md#createtable)
|
||||||
|
- [createTableArrow](Connection.md#createtablearrow)
|
||||||
|
- [dropTable](Connection.md#droptable)
|
||||||
|
- [openTable](Connection.md#opentable)
|
||||||
|
- [tableNames](Connection.md#tablenames)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### uri
|
||||||
|
|
||||||
|
• **uri**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:45](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L45)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### createTable
|
||||||
|
|
||||||
|
▸ **createTable**<`T`\>(`name`, `data`, `mode?`, `embeddings?`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||||
|
| `mode?` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||||
|
| `embeddings?` | [`EmbeddingFunction`](EmbeddingFunction.md)<`T`\> | An embedding function to use on this table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:65](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L65)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### createTableArrow
|
||||||
|
|
||||||
|
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `name` | `string` |
|
||||||
|
| `table` | `Table`<`any`\> |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:67](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L67)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### dropTable
|
||||||
|
|
||||||
|
▸ **dropTable**(`name`): `Promise`<`void`\>
|
||||||
|
|
||||||
|
Drop an existing table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table to drop. |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:73](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L73)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### openTable
|
||||||
|
|
||||||
|
▸ **openTable**<`T`\>(`name`, `embeddings?`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
Open a table in the database.
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `name` | `string` | The name of the table. |
|
||||||
|
| `embeddings?` | [`EmbeddingFunction`](EmbeddingFunction.md)<`T`\> | An embedding function to use on this table |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:55](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L55)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### tableNames
|
||||||
|
|
||||||
|
▸ **tableNames**(): `Promise`<`string`[]\>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`[]\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:47](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L47)
|
||||||
@@ -45,7 +45,7 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/embedding_function.ts#L27)
|
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/embedding_function.ts#L27)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -57,4 +57,4 @@ The name of the column that will be used as input for the Embedding Function.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/embedding_function.ts#L22)
|
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/embedding_function.ts#L22)
|
||||||
|
|||||||
195
docs/src/javascript/interfaces/Table.md
Normal file
195
docs/src/javascript/interfaces/Table.md
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / Table
|
||||||
|
|
||||||
|
# Interface: Table<T\>
|
||||||
|
|
||||||
|
A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||||
|
|
||||||
|
## Type parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `T` | `number`[] |
|
||||||
|
|
||||||
|
## Implemented by
|
||||||
|
|
||||||
|
- [`LocalTable`](../classes/LocalTable.md)
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [add](Table.md#add)
|
||||||
|
- [countRows](Table.md#countrows)
|
||||||
|
- [createIndex](Table.md#createindex)
|
||||||
|
- [delete](Table.md#delete)
|
||||||
|
- [name](Table.md#name)
|
||||||
|
- [overwrite](Table.md#overwrite)
|
||||||
|
- [search](Table.md#search)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### add
|
||||||
|
|
||||||
|
• **add**: (`data`: `Record`<`string`, `unknown`\>[]) => `Promise`<`number`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`data`): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Insert records into this Table.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
The number of rows added to the table
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:95](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L95)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### countRows
|
||||||
|
|
||||||
|
• **countRows**: () => `Promise`<`number`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Returns the number of rows in this table.
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:115](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L115)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### createIndex
|
||||||
|
|
||||||
|
• **createIndex**: (`indexParams`: `IvfPQIndexConfig`) => `Promise`<`any`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`indexParams`): `Promise`<`any`\>
|
||||||
|
|
||||||
|
Create an ANN index on this Table vector index.
|
||||||
|
|
||||||
|
**`See`**
|
||||||
|
|
||||||
|
VectorIndexParams.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`any`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:110](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L110)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### delete
|
||||||
|
|
||||||
|
• **delete**: (`filter`: `string`) => `Promise`<`void`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`filter`): `Promise`<`void`\>
|
||||||
|
|
||||||
|
Delete rows from this table.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:122](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L122)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### name
|
||||||
|
|
||||||
|
• **name**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:81](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L81)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### overwrite
|
||||||
|
|
||||||
|
• **overwrite**: (`data`: `Record`<`string`, `unknown`\>[]) => `Promise`<`number`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`data`): `Promise`<`number`\>
|
||||||
|
|
||||||
|
Insert records into this Table, replacing its contents.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`\>
|
||||||
|
|
||||||
|
The number of rows added to the table
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:103](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L103)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### search
|
||||||
|
|
||||||
|
• **search**: (`query`: `T`) => [`Query`](../classes/Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Type declaration
|
||||||
|
|
||||||
|
▸ (`query`): [`Query`](../classes/Query.md)<`T`\>
|
||||||
|
|
||||||
|
Creates a search query to find the nearest neighbors of the given search term
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| :------ | :------ | :------ |
|
||||||
|
| `query` | `T` | The query search term |
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
[`Query`](../classes/Query.md)<`T`\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:87](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L87)
|
||||||
@@ -11,14 +11,16 @@
|
|||||||
|
|
||||||
### Classes
|
### Classes
|
||||||
|
|
||||||
- [Connection](classes/Connection.md)
|
- [LocalConnection](classes/LocalConnection.md)
|
||||||
|
- [LocalTable](classes/LocalTable.md)
|
||||||
- [OpenAIEmbeddingFunction](classes/OpenAIEmbeddingFunction.md)
|
- [OpenAIEmbeddingFunction](classes/OpenAIEmbeddingFunction.md)
|
||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [Table](classes/Table.md)
|
|
||||||
|
|
||||||
### Interfaces
|
### Interfaces
|
||||||
|
|
||||||
|
- [Connection](interfaces/Connection.md)
|
||||||
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
|
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
|
||||||
|
- [Table](interfaces/Table.md)
|
||||||
|
|
||||||
### Type Aliases
|
### Type Aliases
|
||||||
|
|
||||||
@@ -36,13 +38,13 @@
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:224](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L224)
|
[index.ts:345](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L345)
|
||||||
|
|
||||||
## Functions
|
## Functions
|
||||||
|
|
||||||
### connect
|
### connect
|
||||||
|
|
||||||
▸ **connect**(`uri`): `Promise`<[`Connection`](classes/Connection.md)\>
|
▸ **connect**(`uri`): `Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI
|
Connect to a LanceDB instance at the given URI
|
||||||
|
|
||||||
@@ -54,8 +56,8 @@ Connect to a LanceDB instance at the given URI
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<[`Connection`](classes/Connection.md)\>
|
`Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:34](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L34)
|
[index.ts:34](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L34)
|
||||||
|
|||||||
@@ -21,12 +21,13 @@ from argparse import ArgumentParser
|
|||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
import lancedb
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast
|
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
|
||||||
MODEL_ID = "openai/clip-vit-base-patch32"
|
MODEL_ID = "openai/clip-vit-base-patch32"
|
||||||
|
|
||||||
device = "cuda"
|
device = "cuda"
|
||||||
|
|||||||
@@ -10,14 +10,16 @@ pip install lancedb
|
|||||||
|
|
||||||
::: lancedb.connect
|
::: lancedb.connect
|
||||||
|
|
||||||
::: lancedb.LanceDBConnection
|
::: lancedb.db.DBConnection
|
||||||
|
|
||||||
## Table
|
## Table
|
||||||
|
|
||||||
::: lancedb.table.LanceTable
|
::: lancedb.table.Table
|
||||||
|
|
||||||
## Querying
|
## Querying
|
||||||
|
|
||||||
|
::: lancedb.query.Query
|
||||||
|
|
||||||
::: lancedb.query.LanceQueryBuilder
|
::: lancedb.query.LanceQueryBuilder
|
||||||
|
|
||||||
::: lancedb.query.LanceFtsQueryBuilder
|
::: lancedb.query.LanceFtsQueryBuilder
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ Currently, we support the following metrics:
|
|||||||
| ----------- | ------------------------------------ |
|
| ----------- | ------------------------------------ |
|
||||||
| `L2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
|
| `L2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
|
||||||
| `Cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)|
|
| `Cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)|
|
||||||
|
| `Dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) |
|
||||||
|
|
||||||
|
|
||||||
## Search
|
## Search
|
||||||
@@ -28,16 +29,44 @@ Currently, we support the following metrics:
|
|||||||
If there is no [vector index is created](ann_indexes.md), LanceDB will just brute-force scan
|
If there is no [vector index is created](ann_indexes.md), LanceDB will just brute-force scan
|
||||||
the vector column and compute the distance.
|
the vector column and compute the distance.
|
||||||
|
|
||||||
|
<!-- Setup Code
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
import numpy as np
|
||||||
|
uri = "data/sample-lancedb"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
|
||||||
|
data = [{"vector": row, "item": f"item {i}"}
|
||||||
|
for i, row in enumerate(np.random.random((10_000, 1536)).astype('float32'))]
|
||||||
|
|
||||||
|
db.create_table("my_vectors", data=data)
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
<!-- Setup Code
|
||||||
|
```javascript
|
||||||
|
const vectordb_setup = require('vectordb')
|
||||||
|
const db_setup = await vectordb_setup.connect('data/sample-lancedb')
|
||||||
|
|
||||||
|
let data = []
|
||||||
|
for (let i = 0; i < 10_000; i++) {
|
||||||
|
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||||
|
}
|
||||||
|
await db_setup.createTable('my_vectors', data)
|
||||||
|
```
|
||||||
|
-->
|
||||||
=== "Python"
|
=== "Python"
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
db = lancedb.connect("data/sample-lancedb")
|
db = lancedb.connect("data/sample-lancedb")
|
||||||
|
|
||||||
tbl = db.open_table("my_vectors")
|
tbl = db.open_table("my_vectors")
|
||||||
|
|
||||||
df = tbl.search(np.random.random((768)))
|
df = tbl.search(np.random.random((1536))) \
|
||||||
.limit(10)
|
.limit(10) \
|
||||||
.to_df()
|
.to_df()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -47,38 +76,41 @@ the vector column and compute the distance.
|
|||||||
const vectordb = require('vectordb')
|
const vectordb = require('vectordb')
|
||||||
const db = await vectordb.connect('data/sample-lancedb')
|
const db = await vectordb.connect('data/sample-lancedb')
|
||||||
|
|
||||||
tbl = db.open_table("my_vectors")
|
const tbl = await db.openTable("my_vectors")
|
||||||
|
|
||||||
const results = await tbl.search(Array(768))
|
const results_1 = await tbl.search(Array(1536).fill(1.2))
|
||||||
.limit(20)
|
.limit(20)
|
||||||
.execute()
|
.execute()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Commenting out for now since metricType fails for JS on Ubuntu 22.04.
|
||||||
|
|
||||||
By default, `l2` will be used as `Metric` type. You can customize the metric type
|
By default, `l2` will be used as `Metric` type. You can customize the metric type
|
||||||
as well.
|
as well.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
=== "Python"
|
=== "Python"
|
||||||
|
-->
|
||||||
```python
|
<!-- ```python
|
||||||
df = tbl.search(np.random.random((768)))
|
df = tbl.search(np.random.random((1536))) \
|
||||||
.metric("cosine")
|
.metric("cosine") \
|
||||||
.limit(10)
|
.limit(10) \
|
||||||
.to_df()
|
.to_df()
|
||||||
```
|
```
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
=== "JavaScript"
|
=== "JavaScript"
|
||||||
|
-->
|
||||||
|
|
||||||
```javascript
|
<!-- ```javascript
|
||||||
const vectordb = require('vectordb')
|
const results_2 = await tbl.search(Array(1536).fill(1.2))
|
||||||
const db = await vectordb.connect('data/sample-lancedb')
|
.metricType("cosine")
|
||||||
|
|
||||||
tbl = db.open_table("my_vectors")
|
|
||||||
|
|
||||||
const results = await tbl.search(Array(768))
|
|
||||||
.metric("cosine")
|
|
||||||
.limit(20)
|
.limit(20)
|
||||||
.execute()
|
.execute()
|
||||||
```
|
```
|
||||||
|
-->
|
||||||
|
|
||||||
### Search with Vector Index.
|
### Search with Vector Index.
|
||||||
|
|
||||||
|
|||||||
120
docs/src/sql.md
Normal file
120
docs/src/sql.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# SQL filters
|
||||||
|
|
||||||
|
LanceDB embraces the utilization of standard SQL expressions as predicates for hybrid
|
||||||
|
filters. It can be used during hybrid vector search and deletion operations.
|
||||||
|
|
||||||
|
Currently, Lance supports a growing list of expressions.
|
||||||
|
|
||||||
|
* ``>``, ``>=``, ``<``, ``<=``, ``=``
|
||||||
|
* ``AND``, ``OR``, ``NOT``
|
||||||
|
* ``IS NULL``, ``IS NOT NULL``
|
||||||
|
* ``IS TRUE``, ``IS NOT TRUE``, ``IS FALSE``, ``IS NOT FALSE``
|
||||||
|
* ``IN``
|
||||||
|
* ``LIKE``, ``NOT LIKE``
|
||||||
|
* ``CAST``
|
||||||
|
* ``regexp_match(column, pattern)``
|
||||||
|
|
||||||
|
For example, the following filter string is acceptable:
|
||||||
|
<!-- Setup Code
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
import numpy as np
|
||||||
|
uri = "data/sample-lancedb"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
|
||||||
|
data = [{"vector": row, "item": f"item {i}"}
|
||||||
|
for i, row in enumerate(np.random.random((10_000, 2)).astype('int'))]
|
||||||
|
|
||||||
|
tbl = db.create_table("my_vectors", data=data)
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
<!-- Setup Code
|
||||||
|
```javascript
|
||||||
|
const vectordb = require('vectordb')
|
||||||
|
const db = await vectordb.connect('data/sample-lancedb')
|
||||||
|
|
||||||
|
let data = []
|
||||||
|
for (let i = 0; i < 10_000; i++) {
|
||||||
|
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||||
|
}
|
||||||
|
const tbl = await db.createTable('my_vectors', data)
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
tbl.search([100, 102]) \
|
||||||
|
.where("""(
|
||||||
|
(label IN [10, 20])
|
||||||
|
AND
|
||||||
|
(note.email IS NOT NULL)
|
||||||
|
) OR NOT note.created
|
||||||
|
""")
|
||||||
|
|
||||||
|
```
|
||||||
|
=== "Javascript"
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
tbl.search([100, 102])
|
||||||
|
.where(`(
|
||||||
|
(label IN [10, 20])
|
||||||
|
AND
|
||||||
|
(note.email IS NOT NULL)
|
||||||
|
) OR NOT note.created
|
||||||
|
`)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
|
||||||
|
you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the
|
||||||
|
path must be wrapped in backticks.
|
||||||
|
|
||||||
|
=== "SQL"
|
||||||
|
```sql
|
||||||
|
`CUBE` = 10 AND `column name with space` IS NOT NULL
|
||||||
|
AND `nested with space`.`inner with space` < 2
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
Field names containing periods (``.``) are not supported.
|
||||||
|
|
||||||
|
Literals for dates, timestamps, and decimals can be written by writing the string
|
||||||
|
value after the type name. For example
|
||||||
|
|
||||||
|
=== "SQL"
|
||||||
|
```sql
|
||||||
|
date_col = date '2021-01-01'
|
||||||
|
and timestamp_col = timestamp '2021-01-01 00:00:00'
|
||||||
|
and decimal_col = decimal(8,3) '1.000'
|
||||||
|
```
|
||||||
|
|
||||||
|
For timestamp columns, the precision can be specified as a number in the type
|
||||||
|
parameter. Microsecond precision (6) is the default.
|
||||||
|
|
||||||
|
| SQL | Time unit |
|
||||||
|
|------------------|--------------|
|
||||||
|
| ``timestamp(0)`` | Seconds |
|
||||||
|
| ``timestamp(3)`` | Milliseconds |
|
||||||
|
| ``timestamp(6)`` | Microseconds |
|
||||||
|
| ``timestamp(9)`` | Nanoseconds |
|
||||||
|
|
||||||
|
LanceDB internally stores data in [Apache Arrow](https://arrow.apache.org/) format.
|
||||||
|
The mapping from SQL types to Arrow types is:
|
||||||
|
|
||||||
|
| SQL type | Arrow type |
|
||||||
|
|----------|------------|
|
||||||
|
| ``boolean`` | ``Boolean`` |
|
||||||
|
| ``tinyint`` / ``tinyint unsigned`` | ``Int8`` / ``UInt8`` |
|
||||||
|
| ``smallint`` / ``smallint unsigned`` | ``Int16`` / ``UInt16`` |
|
||||||
|
| ``int`` or ``integer`` / ``int unsigned`` or ``integer unsigned`` | ``Int32`` / ``UInt32`` |
|
||||||
|
| ``bigint`` / ``bigint unsigned`` | ``Int64`` / ``UInt64`` |
|
||||||
|
| ``float`` | ``Float32`` |
|
||||||
|
| ``double`` | ``Float64`` |
|
||||||
|
| ``decimal(precision, scale)`` | ``Decimal128`` |
|
||||||
|
| ``date`` | ``Date32`` |
|
||||||
|
| ``timestamp`` | ``Timestamp`` [^1] |
|
||||||
|
| ``string`` | ``Utf8`` |
|
||||||
|
| ``binary`` | ``Binary`` |
|
||||||
|
|
||||||
|
[^1]: See precision mapping in previous table.
|
||||||
|
|
||||||
51
docs/test/md_testing.js
Normal file
51
docs/test/md_testing.js
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
const glob = require("glob");
|
||||||
|
const fs = require("fs");
|
||||||
|
const path = require("path");
|
||||||
|
|
||||||
|
const excludedFiles = [
|
||||||
|
"../src/fts.md",
|
||||||
|
"../src/embedding.md",
|
||||||
|
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||||
|
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||||
|
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
||||||
|
];
|
||||||
|
const nodePrefix = "javascript";
|
||||||
|
const nodeFile = ".js";
|
||||||
|
const nodeFolder = "node";
|
||||||
|
const globString = "../src/**/*.md";
|
||||||
|
const asyncPrefix = "(async () => {\n";
|
||||||
|
const asyncSuffix = "})();";
|
||||||
|
|
||||||
|
function* yieldLines(lines, prefix, suffix) {
|
||||||
|
let inCodeBlock = false;
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line.trim().startsWith(prefix + nodePrefix)) {
|
||||||
|
inCodeBlock = true;
|
||||||
|
} else if (inCodeBlock && line.trim().startsWith(suffix)) {
|
||||||
|
inCodeBlock = false;
|
||||||
|
yield "\n";
|
||||||
|
} else if (inCodeBlock) {
|
||||||
|
yield line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = glob.sync(globString, { recursive: true });
|
||||||
|
|
||||||
|
for (const file of files.filter((file) => !excludedFiles.includes(file))) {
|
||||||
|
const lines = [];
|
||||||
|
const data = fs.readFileSync(file, "utf-8");
|
||||||
|
const fileLines = data.split("\n");
|
||||||
|
|
||||||
|
for (const line of yieldLines(fileLines, "```", "```")) {
|
||||||
|
lines.push(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lines.length > 0) {
|
||||||
|
const fileName = path.basename(file, ".md");
|
||||||
|
const outPath = path.join(nodeFolder, fileName, `${fileName}${nodeFile}`);
|
||||||
|
console.log(outPath)
|
||||||
|
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||||
|
fs.writeFileSync(outPath, asyncPrefix + "\n" + lines.join("\n") + asyncSuffix);
|
||||||
|
}
|
||||||
|
}
|
||||||
41
docs/test/md_testing.py
Normal file
41
docs/test/md_testing.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import glob
|
||||||
|
from typing import Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
excluded_files = [
|
||||||
|
"../src/fts.md",
|
||||||
|
"../src/embedding.md",
|
||||||
|
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||||
|
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||||
|
"../src/examples/youtube_transcript_bot_with_nodejs.md"
|
||||||
|
]
|
||||||
|
|
||||||
|
python_prefix = "py"
|
||||||
|
python_file = ".py"
|
||||||
|
python_folder = "python"
|
||||||
|
glob_string = "../src/**/*.md"
|
||||||
|
|
||||||
|
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
|
||||||
|
in_code_block = False
|
||||||
|
# Python code has strict indentation
|
||||||
|
strip_length = 0
|
||||||
|
for line in lines:
|
||||||
|
if line.strip().startswith(prefix + python_prefix):
|
||||||
|
in_code_block = True
|
||||||
|
strip_length = len(line) - len(line.lstrip())
|
||||||
|
elif in_code_block and line.strip().startswith(suffix):
|
||||||
|
in_code_block = False
|
||||||
|
yield "\n"
|
||||||
|
elif in_code_block:
|
||||||
|
yield line[strip_length:]
|
||||||
|
|
||||||
|
for file in filter(lambda file: file not in excluded_files, glob.glob(glob_string, recursive=True)):
|
||||||
|
with open(file, "r") as f:
|
||||||
|
lines = list(yield_lines(iter(f), "```", "```"))
|
||||||
|
|
||||||
|
if len(lines) > 0:
|
||||||
|
out_path = Path(python_folder) / Path(file).name.strip(".md") / (Path(file).name.strip(".md") + python_file)
|
||||||
|
print(out_path)
|
||||||
|
out_path.parent.mkdir(exist_ok=True, parents=True)
|
||||||
|
with open(out_path, "w") as out:
|
||||||
|
out.writelines(lines)
|
||||||
13
docs/test/package.json
Normal file
13
docs/test/package.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"name": "lancedb-docs-test",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"fs": "^0.0.1-security",
|
||||||
|
"glob": "^10.2.7",
|
||||||
|
"path": "^0.12.7",
|
||||||
|
"vectordb": "https://gitpkg.now.sh/lancedb/lancedb/node?main"
|
||||||
|
}
|
||||||
|
}
|
||||||
5
docs/test/requirements.txt
Normal file
5
docs/test/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
lancedb @ git+https://github.com/lancedb/lancedb.git#egg=subdir&subdirectory=python
|
||||||
|
numpy
|
||||||
|
pandas
|
||||||
|
pylance
|
||||||
|
duckdb
|
||||||
@@ -12,5 +12,6 @@ module.exports = {
|
|||||||
sourceType: 'module'
|
sourceType: 'module'
|
||||||
},
|
},
|
||||||
rules: {
|
rules: {
|
||||||
|
"@typescript-eslint/method-signature-style": "off",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,9 +14,11 @@ npm install vectordb
|
|||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const lancedb = require('vectordb');
|
const lancedb = require('vectordb');
|
||||||
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>');
|
const db = await lancedb.connect('data/sample-lancedb');
|
||||||
const table = await db.openTable('my_table');
|
const table = await db.createTable("my_table",
|
||||||
const query = await table.search([0.1, 0.3]).setLimit(20).execute();
|
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
||||||
|
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
||||||
|
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
||||||
console.log(results);
|
console.log(results);
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -24,12 +26,6 @@ The [examples](./examples) folder contains complete examples.
|
|||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
The LanceDB javascript is built with npm:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run tsc
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the tests with
|
Run the tests with
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -46,4 +42,4 @@ To build documentation
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
||||||
```
|
```
|
||||||
|
|||||||
174
node/package-lock.json
generated
174
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.5",
|
"version": "0.1.9",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.5",
|
"version": "0.1.9",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@apache-arrow/ts": "^12.0.0",
|
"@apache-arrow/ts": "^12.0.0",
|
||||||
@@ -14,6 +14,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/chai": "^4.3.4",
|
"@types/chai": "^4.3.4",
|
||||||
|
"@types/chai-as-promised": "^7.1.5",
|
||||||
"@types/mocha": "^10.0.1",
|
"@types/mocha": "^10.0.1",
|
||||||
"@types/node": "^18.16.2",
|
"@types/node": "^18.16.2",
|
||||||
"@types/sinon": "^10.0.15",
|
"@types/sinon": "^10.0.15",
|
||||||
@@ -21,9 +22,10 @@
|
|||||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||||
"cargo-cp-artifact": "^0.1",
|
"cargo-cp-artifact": "^0.1",
|
||||||
"chai": "^4.3.7",
|
"chai": "^4.3.7",
|
||||||
|
"chai-as-promised": "^7.1.1",
|
||||||
"eslint": "^8.39.0",
|
"eslint": "^8.39.0",
|
||||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||||
"eslint-plugin-import": "^2.27.5",
|
"eslint-plugin-import": "^2.26.0",
|
||||||
"eslint-plugin-n": "^15.7.0",
|
"eslint-plugin-n": "^15.7.0",
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
"eslint-plugin-promise": "^6.1.1",
|
||||||
"mocha": "^10.2.0",
|
"mocha": "^10.2.0",
|
||||||
@@ -311,6 +313,15 @@
|
|||||||
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/chai-as-promised": {
|
||||||
|
"version": "7.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/chai-as-promised/-/chai-as-promised-7.1.5.tgz",
|
||||||
|
"integrity": "sha512-jStwss93SITGBwt/niYrkf2C+/1KTeZCZl1LaeezTlqppAKeoQC7jxyqYuP72sxBGKCIbw7oHgbYssIRzT5FCQ==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@types/chai": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/command-line-args": {
|
"node_modules/@types/command-line-args": {
|
||||||
"version": "5.2.0",
|
"version": "5.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
||||||
@@ -787,24 +798,6 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/array.prototype.flatmap": {
|
|
||||||
"version": "1.3.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
|
||||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"call-bind": "^1.0.2",
|
|
||||||
"define-properties": "^1.1.4",
|
|
||||||
"es-abstract": "^1.20.4",
|
|
||||||
"es-shim-unscopables": "^1.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 0.4"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/ljharb"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/assertion-error": {
|
"node_modules/assertion-error": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||||
@@ -960,6 +953,18 @@
|
|||||||
"node": ">=4"
|
"node": ">=4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/chai-as-promised": {
|
||||||
|
"version": "7.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/chai-as-promised/-/chai-as-promised-7.1.1.tgz",
|
||||||
|
"integrity": "sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"check-error": "^1.0.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"chai": ">= 2.1.2 < 5"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/chalk": {
|
"node_modules/chalk": {
|
||||||
"version": "4.1.2",
|
"version": "4.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
||||||
@@ -1633,25 +1638,23 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eslint-plugin-import": {
|
"node_modules/eslint-plugin-import": {
|
||||||
"version": "2.27.5",
|
"version": "2.26.0",
|
||||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"array-includes": "^3.1.6",
|
"array-includes": "^3.1.4",
|
||||||
"array.prototype.flat": "^1.3.1",
|
"array.prototype.flat": "^1.2.5",
|
||||||
"array.prototype.flatmap": "^1.3.1",
|
"debug": "^2.6.9",
|
||||||
"debug": "^3.2.7",
|
|
||||||
"doctrine": "^2.1.0",
|
"doctrine": "^2.1.0",
|
||||||
"eslint-import-resolver-node": "^0.3.7",
|
"eslint-import-resolver-node": "^0.3.6",
|
||||||
"eslint-module-utils": "^2.7.4",
|
"eslint-module-utils": "^2.7.3",
|
||||||
"has": "^1.0.3",
|
"has": "^1.0.3",
|
||||||
"is-core-module": "^2.11.0",
|
"is-core-module": "^2.8.1",
|
||||||
"is-glob": "^4.0.3",
|
"is-glob": "^4.0.3",
|
||||||
"minimatch": "^3.1.2",
|
"minimatch": "^3.1.2",
|
||||||
"object.values": "^1.1.6",
|
"object.values": "^1.1.5",
|
||||||
"resolve": "^1.22.1",
|
"resolve": "^1.22.0",
|
||||||
"semver": "^6.3.0",
|
|
||||||
"tsconfig-paths": "^3.14.1"
|
"tsconfig-paths": "^3.14.1"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
@@ -1662,12 +1665,12 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eslint-plugin-import/node_modules/debug": {
|
"node_modules/eslint-plugin-import/node_modules/debug": {
|
||||||
"version": "3.2.7",
|
"version": "2.6.9",
|
||||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"ms": "^2.1.1"
|
"ms": "2.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eslint-plugin-import/node_modules/doctrine": {
|
"node_modules/eslint-plugin-import/node_modules/doctrine": {
|
||||||
@@ -1682,14 +1685,11 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eslint-plugin-import/node_modules/semver": {
|
"node_modules/eslint-plugin-import/node_modules/ms": {
|
||||||
"version": "6.3.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||||
"dev": true,
|
"dev": true
|
||||||
"bin": {
|
|
||||||
"semver": "bin/semver.js"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"node_modules/eslint-plugin-n": {
|
"node_modules/eslint-plugin-n": {
|
||||||
"version": "15.7.0",
|
"version": "15.7.0",
|
||||||
@@ -3619,9 +3619,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/semver": {
|
"node_modules/semver": {
|
||||||
"version": "7.5.0",
|
"version": "7.5.3",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"lru-cache": "^6.0.0"
|
"lru-cache": "^6.0.0"
|
||||||
@@ -4703,6 +4703,15 @@
|
|||||||
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"@types/chai-as-promised": {
|
||||||
|
"version": "7.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/chai-as-promised/-/chai-as-promised-7.1.5.tgz",
|
||||||
|
"integrity": "sha512-jStwss93SITGBwt/niYrkf2C+/1KTeZCZl1LaeezTlqppAKeoQC7jxyqYuP72sxBGKCIbw7oHgbYssIRzT5FCQ==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"@types/chai": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"@types/command-line-args": {
|
"@types/command-line-args": {
|
||||||
"version": "5.2.0",
|
"version": "5.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
||||||
@@ -5038,18 +5047,6 @@
|
|||||||
"es-shim-unscopables": "^1.0.0"
|
"es-shim-unscopables": "^1.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"array.prototype.flatmap": {
|
|
||||||
"version": "1.3.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
|
||||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
|
||||||
"call-bind": "^1.0.2",
|
|
||||||
"define-properties": "^1.1.4",
|
|
||||||
"es-abstract": "^1.20.4",
|
|
||||||
"es-shim-unscopables": "^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"assertion-error": {
|
"assertion-error": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||||
@@ -5172,6 +5169,15 @@
|
|||||||
"type-detect": "^4.0.5"
|
"type-detect": "^4.0.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"chai-as-promised": {
|
||||||
|
"version": "7.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/chai-as-promised/-/chai-as-promised-7.1.1.tgz",
|
||||||
|
"integrity": "sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"check-error": "^1.0.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
"chalk": {
|
"chalk": {
|
||||||
"version": "4.1.2",
|
"version": "4.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
||||||
@@ -5707,35 +5713,33 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"eslint-plugin-import": {
|
"eslint-plugin-import": {
|
||||||
"version": "2.27.5",
|
"version": "2.26.0",
|
||||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"array-includes": "^3.1.6",
|
"array-includes": "^3.1.4",
|
||||||
"array.prototype.flat": "^1.3.1",
|
"array.prototype.flat": "^1.2.5",
|
||||||
"array.prototype.flatmap": "^1.3.1",
|
"debug": "^2.6.9",
|
||||||
"debug": "^3.2.7",
|
|
||||||
"doctrine": "^2.1.0",
|
"doctrine": "^2.1.0",
|
||||||
"eslint-import-resolver-node": "^0.3.7",
|
"eslint-import-resolver-node": "^0.3.6",
|
||||||
"eslint-module-utils": "^2.7.4",
|
"eslint-module-utils": "^2.7.3",
|
||||||
"has": "^1.0.3",
|
"has": "^1.0.3",
|
||||||
"is-core-module": "^2.11.0",
|
"is-core-module": "^2.8.1",
|
||||||
"is-glob": "^4.0.3",
|
"is-glob": "^4.0.3",
|
||||||
"minimatch": "^3.1.2",
|
"minimatch": "^3.1.2",
|
||||||
"object.values": "^1.1.6",
|
"object.values": "^1.1.5",
|
||||||
"resolve": "^1.22.1",
|
"resolve": "^1.22.0",
|
||||||
"semver": "^6.3.0",
|
|
||||||
"tsconfig-paths": "^3.14.1"
|
"tsconfig-paths": "^3.14.1"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"debug": {
|
"debug": {
|
||||||
"version": "3.2.7",
|
"version": "2.6.9",
|
||||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"ms": "^2.1.1"
|
"ms": "2.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"doctrine": {
|
"doctrine": {
|
||||||
@@ -5747,10 +5751,10 @@
|
|||||||
"esutils": "^2.0.2"
|
"esutils": "^2.0.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"semver": {
|
"ms": {
|
||||||
"version": "6.3.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||||
"dev": true
|
"dev": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -7078,9 +7082,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"semver": {
|
"semver": {
|
||||||
"version": "7.5.0",
|
"version": "7.5.3",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"lru-cache": "^6.0.0"
|
"lru-cache": "^6.0.0"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.5",
|
"version": "0.1.10",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
"tsc": "tsc -b",
|
"tsc": "tsc -b",
|
||||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json-render-diagnostics",
|
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json-render-diagnostics",
|
||||||
"build-release": "npm run build -- --release",
|
"build-release": "npm run build -- --release",
|
||||||
"test": "mocha -recursive dist/test",
|
"test": "npm run tsc; mocha -recursive dist/test",
|
||||||
"lint": "eslint src --ext .js,.ts",
|
"lint": "eslint src --ext .js,.ts",
|
||||||
"clean": "rm -rf node_modules *.node dist/"
|
"clean": "rm -rf node_modules *.node dist/"
|
||||||
},
|
},
|
||||||
@@ -26,6 +26,7 @@
|
|||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/chai": "^4.3.4",
|
"@types/chai": "^4.3.4",
|
||||||
|
"@types/chai-as-promised": "^7.1.5",
|
||||||
"@types/mocha": "^10.0.1",
|
"@types/mocha": "^10.0.1",
|
||||||
"@types/node": "^18.16.2",
|
"@types/node": "^18.16.2",
|
||||||
"@types/sinon": "^10.0.15",
|
"@types/sinon": "^10.0.15",
|
||||||
@@ -33,9 +34,10 @@
|
|||||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||||
"cargo-cp-artifact": "^0.1",
|
"cargo-cp-artifact": "^0.1",
|
||||||
"chai": "^4.3.7",
|
"chai": "^4.3.7",
|
||||||
|
"chai-as-promised": "^7.1.1",
|
||||||
"eslint": "^8.39.0",
|
"eslint": "^8.39.0",
|
||||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||||
"eslint-plugin-import": "^2.27.5",
|
"eslint-plugin-import": "^2.26.0",
|
||||||
"eslint-plugin-n": "^15.7.0",
|
"eslint-plugin-n": "^15.7.0",
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
"eslint-plugin-promise": "^6.1.1",
|
||||||
"mocha": "^10.2.0",
|
"mocha": "^10.2.0",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ import { fromRecordsToBuffer } from './arrow'
|
|||||||
import type { EmbeddingFunction } from './embedding/embedding_function'
|
import type { EmbeddingFunction } from './embedding/embedding_function'
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')
|
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
|
||||||
|
|
||||||
export type { EmbeddingFunction }
|
export type { EmbeddingFunction }
|
||||||
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
||||||
@@ -33,13 +33,99 @@ export { OpenAIEmbeddingFunction } from './embedding/openai'
|
|||||||
*/
|
*/
|
||||||
export async function connect (uri: string): Promise<Connection> {
|
export async function connect (uri: string): Promise<Connection> {
|
||||||
const db = await databaseNew(uri)
|
const db = await databaseNew(uri)
|
||||||
return new Connection(db, uri)
|
return new LocalConnection(db, uri)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||||
|
*
|
||||||
|
* Connection could be local against filesystem or remote against a server.
|
||||||
|
*/
|
||||||
|
export interface Connection {
|
||||||
|
uri: string
|
||||||
|
|
||||||
|
tableNames(): Promise<string[]>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open a table in the database.
|
||||||
|
*
|
||||||
|
* @param name The name of the table.
|
||||||
|
* @param embeddings An embedding function to use on this table
|
||||||
|
*/
|
||||||
|
openTable<T>(name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new Table and initialize it with new data.
|
||||||
|
*
|
||||||
|
* @param {string} name - The name of the table.
|
||||||
|
* @param data - Non-empty Array of Records to be inserted into the table
|
||||||
|
* @param {WriteMode} mode - The write mode to use when creating the table.
|
||||||
|
* @param {EmbeddingFunction} embeddings - An embedding function to use on this table
|
||||||
|
*/
|
||||||
|
createTable<T>(name: string, data: Array<Record<string, unknown>>, mode?: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||||
|
|
||||||
|
createTableArrow(name: string, table: ArrowTable): Promise<Table>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drop an existing table.
|
||||||
|
* @param name The name of the table to drop.
|
||||||
|
*/
|
||||||
|
dropTable(name: string): Promise<void>
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||||
|
*/
|
||||||
|
export interface Table<T = number[]> {
|
||||||
|
name: string
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a search query to find the nearest neighbors of the given search term
|
||||||
|
* @param query The query search term
|
||||||
|
*/
|
||||||
|
search: (query: T) => Query<T>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert records into this Table.
|
||||||
|
*
|
||||||
|
* @param data Records to be inserted into the Table
|
||||||
|
* @return The number of rows added to the table
|
||||||
|
*/
|
||||||
|
add: (data: Array<Record<string, unknown>>) => Promise<number>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert records into this Table, replacing its contents.
|
||||||
|
*
|
||||||
|
* @param data Records to be inserted into the Table
|
||||||
|
* @return The number of rows added to the table
|
||||||
|
*/
|
||||||
|
overwrite: (data: Array<Record<string, unknown>>) => Promise<number>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an ANN index on this Table vector index.
|
||||||
|
*
|
||||||
|
* @param indexParams The parameters of this Index, @see VectorIndexParams.
|
||||||
|
*/
|
||||||
|
createIndex: (indexParams: VectorIndexParams) => Promise<any>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of rows in this table.
|
||||||
|
*/
|
||||||
|
countRows: () => Promise<number>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete rows from this table.
|
||||||
|
*
|
||||||
|
* @param filter A filter in the same format used by a sql WHERE clause.
|
||||||
|
*/
|
||||||
|
delete: (filter: string) => Promise<void>
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A connection to a LanceDB database.
|
* A connection to a LanceDB database.
|
||||||
*/
|
*/
|
||||||
export class Connection {
|
export class LocalConnection implements Connection {
|
||||||
private readonly _uri: string
|
private readonly _uri: string
|
||||||
private readonly _db: any
|
private readonly _db: any
|
||||||
|
|
||||||
@@ -75,9 +161,9 @@ export class Connection {
|
|||||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||||
const tbl = await databaseOpenTable.call(this._db, name)
|
const tbl = await databaseOpenTable.call(this._db, name)
|
||||||
if (embeddings !== undefined) {
|
if (embeddings !== undefined) {
|
||||||
return new Table(tbl, name, embeddings)
|
return new LocalTable(tbl, name, embeddings)
|
||||||
} else {
|
} else {
|
||||||
return new Table(tbl, name)
|
return new LocalTable(tbl, name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,23 +172,29 @@ export class Connection {
|
|||||||
*
|
*
|
||||||
* @param name The name of the table.
|
* @param name The name of the table.
|
||||||
* @param data Non-empty Array of Records to be inserted into the Table
|
* @param data Non-empty Array of Records to be inserted into the Table
|
||||||
|
* @param mode The write mode to use when creating the table.
|
||||||
*/
|
*/
|
||||||
|
async createTable (name: string, data: Array<Record<string, unknown>>, mode?: WriteMode): Promise<Table>
|
||||||
|
async createTable (name: string, data: Array<Record<string, unknown>>, mode: WriteMode): Promise<Table>
|
||||||
|
|
||||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
|
|
||||||
/**
|
/**
|
||||||
* Creates a new Table and initialize it with new data.
|
* Creates a new Table and initialize it with new data.
|
||||||
*
|
*
|
||||||
* @param name The name of the table.
|
* @param name The name of the table.
|
||||||
* @param data Non-empty Array of Records to be inserted into the Table
|
* @param data Non-empty Array of Records to be inserted into the Table
|
||||||
|
* @param mode The write mode to use when creating the table.
|
||||||
* @param embeddings An embedding function to use on this Table
|
* @param embeddings An embedding function to use on this Table
|
||||||
*/
|
*/
|
||||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||||
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
|
if (mode === undefined) {
|
||||||
|
mode = WriteMode.Create
|
||||||
|
}
|
||||||
|
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings), mode.toLowerCase())
|
||||||
if (embeddings !== undefined) {
|
if (embeddings !== undefined) {
|
||||||
return new Table(tbl, name, embeddings)
|
return new LocalTable(tbl, name, embeddings)
|
||||||
} else {
|
} else {
|
||||||
return new Table(tbl, name)
|
return new LocalTable(tbl, name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,9 +203,17 @@ export class Connection {
|
|||||||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
||||||
return await this.openTable(name)
|
return await this.openTable(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drop an existing table.
|
||||||
|
* @param name The name of the table to drop.
|
||||||
|
*/
|
||||||
|
async dropTable (name: string): Promise<void> {
|
||||||
|
await databaseDropTable.call(this._db, name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export class Table<T = number[]> {
|
export class LocalTable<T = number[]> implements Table<T> {
|
||||||
private readonly _tbl: any
|
private readonly _tbl: any
|
||||||
private readonly _name: string
|
private readonly _name: string
|
||||||
private readonly _embeddings?: EmbeddingFunction<T>
|
private readonly _embeddings?: EmbeddingFunction<T>
|
||||||
@@ -173,14 +273,25 @@ export class Table<T = number[]> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated Use [Table.createIndex]
|
* Returns the number of rows in this table.
|
||||||
*/
|
*/
|
||||||
async create_index (indexParams: VectorIndexParams): Promise<any> {
|
async countRows (): Promise<number> {
|
||||||
return await this.createIndex(indexParams)
|
return tableCountRows.call(this._tbl)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete rows from this table.
|
||||||
|
*
|
||||||
|
* @param filter A filter in the same format used by a sql WHERE clause.
|
||||||
|
*/
|
||||||
|
async delete (filter: string): Promise<void> {
|
||||||
|
return tableDelete.call(this._tbl, filter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
interface IvfPQIndexConfig {
|
/// Config to build IVF_PQ index.
|
||||||
|
///
|
||||||
|
export interface IvfPQIndexConfig {
|
||||||
/**
|
/**
|
||||||
* The column to be indexed
|
* The column to be indexed
|
||||||
*/
|
*/
|
||||||
@@ -225,6 +336,11 @@ interface IvfPQIndexConfig {
|
|||||||
*/
|
*/
|
||||||
max_opq_iters?: number
|
max_opq_iters?: number
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace an existing index with the same name if it exists.
|
||||||
|
*/
|
||||||
|
replace?: boolean
|
||||||
|
|
||||||
type: 'ivf_pq'
|
type: 'ivf_pq'
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -293,6 +409,8 @@ export class Query<T = number[]> {
|
|||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
|
|
||||||
|
where = this.filter
|
||||||
|
|
||||||
/** Return only the specified columns.
|
/** Return only the specified columns.
|
||||||
*
|
*
|
||||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
||||||
@@ -323,6 +441,7 @@ export class Query<T = number[]> {
|
|||||||
|
|
||||||
const buffer = await tableSearch.call(this._tbl, this)
|
const buffer = await tableSearch.call(this._tbl, this)
|
||||||
const data = tableFromIPC(buffer)
|
const data = tableFromIPC(buffer)
|
||||||
|
|
||||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||||
const newObject: Record<string, unknown> = {}
|
const newObject: Record<string, unknown> = {}
|
||||||
Object.keys(entry).forEach((key: string) => {
|
Object.keys(entry).forEach((key: string) => {
|
||||||
@@ -337,8 +456,15 @@ export class Query<T = number[]> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write mode for writing a table.
|
||||||
|
*/
|
||||||
export enum WriteMode {
|
export enum WriteMode {
|
||||||
|
/** Create a new {@link Table}. */
|
||||||
|
Create = 'create',
|
||||||
|
/** Overwrite the existing {@link Table} if presented. */
|
||||||
Overwrite = 'overwrite',
|
Overwrite = 'overwrite',
|
||||||
|
/** Append new data to the table. */
|
||||||
Append = 'append'
|
Append = 'append'
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -354,5 +480,10 @@ export enum MetricType {
|
|||||||
/**
|
/**
|
||||||
* Cosine distance
|
* Cosine distance
|
||||||
*/
|
*/
|
||||||
Cosine = 'cosine'
|
Cosine = 'cosine',
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dot product
|
||||||
|
*/
|
||||||
|
Dot = 'dot'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
// Copyright 2023 LanceDB Developers.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -13,11 +13,16 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import { describe } from 'mocha'
|
import { describe } from 'mocha'
|
||||||
import { assert } from 'chai'
|
|
||||||
import { track } from 'temp'
|
import { track } from 'temp'
|
||||||
|
import * as chai from 'chai'
|
||||||
|
import * as chaiAsPromised from 'chai-as-promised'
|
||||||
|
|
||||||
import * as lancedb from '../index'
|
import * as lancedb from '../index'
|
||||||
import { type EmbeddingFunction, MetricType, Query } from '../index'
|
import { type EmbeddingFunction, MetricType, Query, WriteMode } from '../index'
|
||||||
|
|
||||||
|
const expect = chai.expect
|
||||||
|
const assert = chai.assert
|
||||||
|
chai.use(chaiAsPromised)
|
||||||
|
|
||||||
describe('LanceDB client', function () {
|
describe('LanceDB client', function () {
|
||||||
describe('when creating a connection to lancedb', function () {
|
describe('when creating a connection to lancedb', function () {
|
||||||
@@ -64,13 +69,20 @@ describe('LanceDB client', function () {
|
|||||||
assert.equal(results[0].id, 1)
|
assert.equal(results[0].id, 1)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('uses a filter', async function () {
|
it('uses a filter / where clause', async function () {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
|
||||||
|
const assertResults = (results: Array<Record<string, unknown>>) => {
|
||||||
|
assert.equal(results.length, 1)
|
||||||
|
assert.equal(results[0].id, 2)
|
||||||
|
}
|
||||||
|
|
||||||
const uri = await createTestDB()
|
const uri = await createTestDB()
|
||||||
const con = await lancedb.connect(uri)
|
const con = await lancedb.connect(uri)
|
||||||
const table = await con.openTable('vectors')
|
const table = await con.openTable('vectors')
|
||||||
const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
let results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
||||||
assert.equal(results.length, 1)
|
assertResults(results)
|
||||||
assert.equal(results[0].id, 2)
|
results = await table.search([0.1, 0.1]).where('id == 2').execute()
|
||||||
|
assertResults(results)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('select only a subset of columns', async function () {
|
it('select only a subset of columns', async function () {
|
||||||
@@ -103,9 +115,32 @@ describe('LanceDB client', function () {
|
|||||||
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
|
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
|
||||||
const table = await con.createTable(tableName, data)
|
const table = await con.createTable(tableName, data)
|
||||||
assert.equal(table.name, tableName)
|
assert.equal(table.name, tableName)
|
||||||
|
assert.equal(await table.countRows(), 2)
|
||||||
|
})
|
||||||
|
|
||||||
const results = await table.search([0.1, 0.3]).execute()
|
it('use overwrite flag to overwrite existing table', async function () {
|
||||||
assert.equal(results.length, 2)
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
|
||||||
|
const data = [
|
||||||
|
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||||
|
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
||||||
|
]
|
||||||
|
|
||||||
|
const tableName = 'overwrite'
|
||||||
|
await con.createTable(tableName, data, WriteMode.Create)
|
||||||
|
|
||||||
|
const newData = [
|
||||||
|
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||||
|
{ id: 2, vector: [1.1, 1.2], price: 50 },
|
||||||
|
{ id: 3, vector: [1.1, 1.2], price: 50 }
|
||||||
|
]
|
||||||
|
|
||||||
|
await expect(con.createTable(tableName, newData)).to.be.rejectedWith(Error, 'already exists')
|
||||||
|
|
||||||
|
const table = await con.createTable(tableName, newData, WriteMode.Overwrite)
|
||||||
|
assert.equal(table.name, tableName)
|
||||||
|
assert.equal(await table.countRows(), 3)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('appends records to an existing table ', async function () {
|
it('appends records to an existing table ', async function () {
|
||||||
@@ -118,16 +153,14 @@ describe('LanceDB client', function () {
|
|||||||
]
|
]
|
||||||
|
|
||||||
const table = await con.createTable('vectors', data)
|
const table = await con.createTable('vectors', data)
|
||||||
const results = await table.search([0.1, 0.3]).execute()
|
assert.equal(await table.countRows(), 2)
|
||||||
assert.equal(results.length, 2)
|
|
||||||
|
|
||||||
const dataAdd = [
|
const dataAdd = [
|
||||||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
|
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
|
||||||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
|
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
|
||||||
]
|
]
|
||||||
await table.add(dataAdd)
|
await table.add(dataAdd)
|
||||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
assert.equal(await table.countRows(), 4)
|
||||||
assert.equal(resultsAdd.length, 4)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
it('overwrite all records in a table', async function () {
|
it('overwrite all records in a table', async function () {
|
||||||
@@ -135,16 +168,25 @@ describe('LanceDB client', function () {
|
|||||||
const con = await lancedb.connect(uri)
|
const con = await lancedb.connect(uri)
|
||||||
|
|
||||||
const table = await con.openTable('vectors')
|
const table = await con.openTable('vectors')
|
||||||
const results = await table.search([0.1, 0.3]).execute()
|
assert.equal(await table.countRows(), 2)
|
||||||
assert.equal(results.length, 2)
|
|
||||||
|
|
||||||
const dataOver = [
|
const dataOver = [
|
||||||
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
|
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
|
||||||
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
|
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
|
||||||
]
|
]
|
||||||
await table.overwrite(dataOver)
|
await table.overwrite(dataOver)
|
||||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
assert.equal(await table.countRows(), 2)
|
||||||
assert.equal(resultsAdd.length, 2)
|
})
|
||||||
|
|
||||||
|
it('can delete records from a table', async function () {
|
||||||
|
const uri = await createTestDB()
|
||||||
|
const con = await lancedb.connect(uri)
|
||||||
|
|
||||||
|
const table = await con.openTable('vectors')
|
||||||
|
assert.equal(await table.countRows(), 2)
|
||||||
|
|
||||||
|
await table.delete('price = 10')
|
||||||
|
assert.equal(await table.countRows(), 1)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -153,8 +195,25 @@ describe('LanceDB client', function () {
|
|||||||
const uri = await createTestDB(32, 300)
|
const uri = await createTestDB(32, 300)
|
||||||
const con = await lancedb.connect(uri)
|
const con = await lancedb.connect(uri)
|
||||||
const table = await con.openTable('vectors')
|
const table = await con.openTable('vectors')
|
||||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
|
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||||
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
|
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
|
||||||
|
|
||||||
|
it('replace an existing index', async function () {
|
||||||
|
const uri = await createTestDB(16, 300)
|
||||||
|
const con = await lancedb.connect(uri)
|
||||||
|
const table = await con.openTable('vectors')
|
||||||
|
|
||||||
|
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||||
|
|
||||||
|
// Replace should fail if the index already exists
|
||||||
|
await expect(table.createIndex({
|
||||||
|
type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2, replace: false
|
||||||
|
})
|
||||||
|
).to.be.rejectedWith('LanceError(Index)')
|
||||||
|
|
||||||
|
// Default replace = true
|
||||||
|
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||||
|
}).timeout(50_000)
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('when using a custom embedding function', function () {
|
describe('when using a custom embedding function', function () {
|
||||||
@@ -184,7 +243,7 @@ describe('LanceDB client', function () {
|
|||||||
{ price: 10, name: 'foo' },
|
{ price: 10, name: 'foo' },
|
||||||
{ price: 50, name: 'bar' }
|
{ price: 50, name: 'bar' }
|
||||||
]
|
]
|
||||||
const table = await con.createTable('vectors', data, embeddings)
|
const table = await con.createTable('vectors', data, WriteMode.Create, embeddings)
|
||||||
const results = await table.search('foo').execute()
|
const results = await table.search('foo').execute()
|
||||||
assert.equal(results.length, 2)
|
assert.equal(results.length, 2)
|
||||||
})
|
})
|
||||||
@@ -223,3 +282,22 @@ async function createTestDB (numDimensions: number = 2, numRows: number = 2): Pr
|
|||||||
await con.createTable('vectors', data)
|
await con.createTable('vectors', data)
|
||||||
return dir
|
return dir
|
||||||
}
|
}
|
||||||
|
|
||||||
|
describe('Drop table', function () {
|
||||||
|
it('drop a table', async function () {
|
||||||
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
|
||||||
|
const data = [
|
||||||
|
{ price: 10, name: 'foo', vector: [1, 2, 3] },
|
||||||
|
{ price: 50, name: 'bar', vector: [4, 5, 6] }
|
||||||
|
]
|
||||||
|
await con.createTable('t1', data)
|
||||||
|
await con.createTable('t2', data)
|
||||||
|
|
||||||
|
assert.deepEqual(await con.tableNames(), ['t1', 't2'])
|
||||||
|
|
||||||
|
await con.dropTable('t1')
|
||||||
|
assert.deepEqual(await con.tableNames(), ['t2'])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
85
python/README.md
Normal file
85
python/README.md
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# LanceDB
|
||||||
|
|
||||||
|
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lancedb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Basic Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>')
|
||||||
|
table = db.open_table('my_table')
|
||||||
|
results = table.search([0.1, 0.3]).limit(20).to_df()
|
||||||
|
print(results)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
Create a virtual environment and activate it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv venv
|
||||||
|
. ./venv/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
Install the necessary packages:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pip install .
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the unit tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
To run linter and automatically fix all errors:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
black .
|
||||||
|
isort .
|
||||||
|
```
|
||||||
|
|
||||||
|
If any packages are missing, install them with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install <PACKAGE_NAME>
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
___
|
||||||
|
For **Windows** users, there may be errors when installing packages, so these commands may be helpful:
|
||||||
|
|
||||||
|
Activate the virtual environment:
|
||||||
|
```bash
|
||||||
|
. .\venv\Scripts\activate
|
||||||
|
```
|
||||||
|
|
||||||
|
You may need to run the installs separately:
|
||||||
|
```bash
|
||||||
|
pip install -e .[tests]
|
||||||
|
pip install -e .[dev]
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
`tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation:
|
||||||
|
```bash
|
||||||
|
pip install wheel
|
||||||
|
pip install cargo
|
||||||
|
conda install rust
|
||||||
|
pip install tantivy
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the unit tests:
|
||||||
|
```bash
|
||||||
|
pytest
|
||||||
|
```
|
||||||
@@ -11,16 +11,24 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from .db import URI, LanceDBConnection
|
from typing import Optional
|
||||||
|
|
||||||
|
from .db import URI, DBConnection, LanceDBConnection
|
||||||
|
from .remote.db import RemoteDBConnection
|
||||||
|
|
||||||
|
|
||||||
def connect(uri: URI) -> LanceDBConnection:
|
def connect(
|
||||||
"""Connect to a LanceDB instance at the given URI
|
uri: URI, *, api_key: Optional[str] = None, region: str = "us-west-2"
|
||||||
|
) -> DBConnection:
|
||||||
|
"""Connect to a LanceDB database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
uri: str or Path
|
uri: str or Path
|
||||||
The uri of the database.
|
The uri of the database.
|
||||||
|
api_token: str, optional
|
||||||
|
If presented, connect to LanceDB cloud.
|
||||||
|
Otherwise, connect to a database on file system or cloud storage.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -34,9 +42,17 @@ def connect(uri: URI) -> LanceDBConnection:
|
|||||||
|
|
||||||
>>> db = lancedb.connect("s3://my-bucket/lancedb")
|
>>> db = lancedb.connect("s3://my-bucket/lancedb")
|
||||||
|
|
||||||
|
Connect to LancdDB cloud:
|
||||||
|
|
||||||
|
>>> db = lancedb.connect("db://my_database", api_key="ldb_...")
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
conn : LanceDBConnection
|
conn : DBConnection
|
||||||
A connection to a LanceDB database.
|
A connection to a LanceDB database.
|
||||||
"""
|
"""
|
||||||
|
if isinstance(uri, str) and uri.startswith("db://"):
|
||||||
|
if api_key is None:
|
||||||
|
raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
|
||||||
|
return RemoteDBConnection(uri, api_key, region)
|
||||||
return LanceDBConnection(uri)
|
return LanceDBConnection(uri)
|
||||||
|
|||||||
@@ -23,3 +23,13 @@ URI = Union[str, Path]
|
|||||||
# TODO support generator
|
# TODO support generator
|
||||||
DATA = Union[List[dict], dict, pd.DataFrame]
|
DATA = Union[List[dict], dict, pd.DataFrame]
|
||||||
VECTOR_COLUMN_NAME = "vector"
|
VECTOR_COLUMN_NAME = "vector"
|
||||||
|
|
||||||
|
|
||||||
|
class Credential(str):
|
||||||
|
"""Credential field"""
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "********"
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return "********"
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
import builtins
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
# import lancedb so we don't have to in every example
|
# import lancedb so we don't have to in every example
|
||||||
import lancedb
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
|
|||||||
@@ -13,7 +13,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from .exceptions import MissingValueError, MissingColumnError
|
|
||||||
|
from .exceptions import MissingColumnError, MissingValueError
|
||||||
|
|
||||||
|
|
||||||
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||||
@@ -42,34 +43,38 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
|||||||
paragraphs, messages, etc.
|
paragraphs, messages, etc.
|
||||||
|
|
||||||
>>> contextualize(data).window(3).stride(1).text_col('token').to_df()
|
>>> contextualize(data).window(3).stride(1).text_col('token').to_df()
|
||||||
token document_id
|
token document_id
|
||||||
0 The quick brown 1
|
0 The quick brown 1
|
||||||
1 quick brown fox 1
|
1 quick brown fox 1
|
||||||
2 brown fox jumped 1
|
2 brown fox jumped 1
|
||||||
3 fox jumped over 1
|
3 fox jumped over 1
|
||||||
4 jumped over the 1
|
4 jumped over the 1
|
||||||
5 over the lazy 1
|
5 over the lazy 1
|
||||||
6 the lazy dog 1
|
6 the lazy dog 1
|
||||||
7 lazy dog I 1
|
7 lazy dog I 1
|
||||||
8 dog I love 1
|
8 dog I love 1
|
||||||
>>> contextualize(data).window(7).stride(1).text_col('token').to_df()
|
9 I love sandwiches 2
|
||||||
|
10 love sandwiches 2
|
||||||
|
>>> contextualize(data).window(7).stride(1).min_window_size(7).text_col('token').to_df()
|
||||||
token document_id
|
token document_id
|
||||||
0 The quick brown fox jumped over the 1
|
0 The quick brown fox jumped over the 1
|
||||||
1 quick brown fox jumped over the lazy 1
|
1 quick brown fox jumped over the lazy 1
|
||||||
2 brown fox jumped over the lazy dog 1
|
2 brown fox jumped over the lazy dog 1
|
||||||
3 fox jumped over the lazy dog I 1
|
3 fox jumped over the lazy dog I 1
|
||||||
4 jumped over the lazy dog I love 1
|
4 jumped over the lazy dog I love 1
|
||||||
|
5 over the lazy dog I love sandwiches 1
|
||||||
|
|
||||||
``stride`` determines how many rows to skip between each window start. This can
|
``stride`` determines how many rows to skip between each window start. This can
|
||||||
be used to reduce the total number of windows generated.
|
be used to reduce the total number of windows generated.
|
||||||
|
|
||||||
>>> contextualize(data).window(4).stride(2).text_col('token').to_df()
|
>>> contextualize(data).window(4).stride(2).text_col('token').to_df()
|
||||||
token document_id
|
token document_id
|
||||||
0 The quick brown fox 1
|
0 The quick brown fox 1
|
||||||
2 brown fox jumped over 1
|
2 brown fox jumped over 1
|
||||||
4 jumped over the lazy 1
|
4 jumped over the lazy 1
|
||||||
6 the lazy dog I 1
|
6 the lazy dog I 1
|
||||||
|
8 dog I love sandwiches 1
|
||||||
|
10 love sandwiches 2
|
||||||
|
|
||||||
``groupby`` determines how to group the rows. For example, we would like to have
|
``groupby`` determines how to group the rows. For example, we would like to have
|
||||||
context windows that don't cross document boundaries. In this case, we can
|
context windows that don't cross document boundaries. In this case, we can
|
||||||
@@ -80,6 +85,25 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
|||||||
0 The quick brown fox 1
|
0 The quick brown fox 1
|
||||||
2 brown fox jumped over 1
|
2 brown fox jumped over 1
|
||||||
4 jumped over the lazy 1
|
4 jumped over the lazy 1
|
||||||
|
6 the lazy dog 1
|
||||||
|
9 I love sandwiches 2
|
||||||
|
|
||||||
|
``min_window_size`` determines the minimum size of the context windows that are generated
|
||||||
|
This can be used to trim the last few context windows which have size less than
|
||||||
|
``min_window_size``. By default context windows of size 1 are skipped.
|
||||||
|
|
||||||
|
>>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_df()
|
||||||
|
token document_id
|
||||||
|
0 The quick brown fox jumped over 1
|
||||||
|
3 fox jumped over the lazy dog 1
|
||||||
|
6 the lazy dog 1
|
||||||
|
9 I love sandwiches 2
|
||||||
|
|
||||||
|
>>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_df()
|
||||||
|
token document_id
|
||||||
|
0 The quick brown fox jumped over 1
|
||||||
|
3 fox jumped over the lazy dog 1
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return Contextualizer(raw_df)
|
return Contextualizer(raw_df)
|
||||||
|
|
||||||
@@ -92,6 +116,7 @@ class Contextualizer:
|
|||||||
self._groupby = None
|
self._groupby = None
|
||||||
self._stride = None
|
self._stride = None
|
||||||
self._window = None
|
self._window = None
|
||||||
|
self._min_window_size = 2
|
||||||
self._raw_df = raw_df
|
self._raw_df = raw_df
|
||||||
|
|
||||||
def window(self, window: int) -> Contextualizer:
|
def window(self, window: int) -> Contextualizer:
|
||||||
@@ -139,6 +164,17 @@ class Contextualizer:
|
|||||||
self._text_col = text_col
|
self._text_col = text_col
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def min_window_size(self, min_window_size: int) -> Contextualizer:
|
||||||
|
"""Set the (optional) min_window_size size for the context window.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
min_window_size: int
|
||||||
|
The min_window_size.
|
||||||
|
"""
|
||||||
|
self._min_window_size = min_window_size
|
||||||
|
return self
|
||||||
|
|
||||||
def to_df(self) -> pd.DataFrame:
|
def to_df(self) -> pd.DataFrame:
|
||||||
"""Create the context windows and return a DataFrame."""
|
"""Create the context windows and return a DataFrame."""
|
||||||
|
|
||||||
@@ -159,12 +195,19 @@ class Contextualizer:
|
|||||||
|
|
||||||
def process_group(grp):
|
def process_group(grp):
|
||||||
# For each group, create the text rolling window
|
# For each group, create the text rolling window
|
||||||
|
# with values of size >= min_window_size
|
||||||
text = grp[self._text_col].values
|
text = grp[self._text_col].values
|
||||||
contexts = grp.iloc[: -self._window : self._stride, :].copy()
|
contexts = grp.iloc[:: self._stride, :].copy()
|
||||||
contexts[self._text_col] = [
|
windows = [
|
||||||
" ".join(text[start_i : start_i + self._window])
|
" ".join(text[start_i : min(start_i + self._window, len(grp))])
|
||||||
for start_i in range(0, len(grp) - self._window, self._stride)
|
for start_i in range(0, len(grp), self._stride)
|
||||||
|
if start_i + self._window <= len(grp)
|
||||||
|
or len(grp) - start_i >= self._min_window_size
|
||||||
]
|
]
|
||||||
|
# if last few rows dropped
|
||||||
|
if len(windows) < len(contexts):
|
||||||
|
contexts = contexts.iloc[: len(windows)]
|
||||||
|
contexts[self._text_col] = windows
|
||||||
return contexts
|
return contexts
|
||||||
|
|
||||||
if self._groupby is None:
|
if self._groupby is None:
|
||||||
|
|||||||
@@ -13,105 +13,38 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import functools
|
||||||
import os
|
import os
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os
|
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from pyarrow import fs
|
from pyarrow import fs
|
||||||
|
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .table import LanceTable
|
from .table import LanceTable, Table
|
||||||
from .util import get_uri_scheme, get_uri_location
|
from .util import get_uri_location, get_uri_scheme
|
||||||
|
|
||||||
|
|
||||||
class LanceDBConnection:
|
class DBConnection(ABC):
|
||||||
"""
|
"""An active LanceDB connection interface."""
|
||||||
A connection to a LanceDB database.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
uri: str or Path
|
|
||||||
The root uri of the database.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
>>> import lancedb
|
|
||||||
>>> db = lancedb.connect("./.lancedb")
|
|
||||||
>>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
|
|
||||||
... {"vector": [0.5, 1.3], "b": 4}])
|
|
||||||
LanceTable(my_table)
|
|
||||||
>>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
|
|
||||||
LanceTable(another_table)
|
|
||||||
>>> db.table_names()
|
|
||||||
['another_table', 'my_table']
|
|
||||||
>>> len(db)
|
|
||||||
2
|
|
||||||
>>> db["my_table"]
|
|
||||||
LanceTable(my_table)
|
|
||||||
>>> "my_table" in db
|
|
||||||
True
|
|
||||||
>>> db.drop_table("my_table")
|
|
||||||
>>> db.drop_table("another_table")
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, uri: URI):
|
|
||||||
is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file"
|
|
||||||
if is_local:
|
|
||||||
if isinstance(uri, str):
|
|
||||||
uri = Path(uri)
|
|
||||||
uri = uri.expanduser().absolute()
|
|
||||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
||||||
self._uri = str(uri)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def uri(self) -> str:
|
|
||||||
return self._uri
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def table_names(self) -> list[str]:
|
def table_names(self) -> list[str]:
|
||||||
"""Get the names of all tables in the database.
|
"""List all table names in the database."""
|
||||||
|
pass
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list of str
|
|
||||||
A list of table names.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
filesystem, path = fs.FileSystem.from_uri(self.uri)
|
|
||||||
except pa.ArrowInvalid:
|
|
||||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
|
||||||
|
|
||||||
try:
|
|
||||||
paths = filesystem.get_file_info(
|
|
||||||
fs.FileSelector(get_uri_location(self.uri))
|
|
||||||
)
|
|
||||||
except FileNotFoundError:
|
|
||||||
# It is ok if the file does not exist since it will be created
|
|
||||||
paths = []
|
|
||||||
tables = [
|
|
||||||
os.path.splitext(file_info.base_name)[0]
|
|
||||||
for file_info in paths
|
|
||||||
if file_info.extension == "lance"
|
|
||||||
]
|
|
||||||
return tables
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
|
||||||
return len(self.table_names())
|
|
||||||
|
|
||||||
def __contains__(self, name: str) -> bool:
|
|
||||||
return name in self.table_names()
|
|
||||||
|
|
||||||
def __getitem__(self, name: str) -> LanceTable:
|
|
||||||
return self.open_table(name)
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
data: DATA = None,
|
data: DATA = None,
|
||||||
schema: pa.Schema = None,
|
schema: pa.Schema = None,
|
||||||
mode: str = "create",
|
mode: str = "create",
|
||||||
) -> LanceTable:
|
on_bad_vectors: str = "error",
|
||||||
"""Create a table in the database.
|
fill_value: float = 0.0,
|
||||||
|
) -> Table:
|
||||||
|
"""Create a [Table][lancedb.table.Table] in the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -122,9 +55,14 @@ class LanceDBConnection:
|
|||||||
schema: pyarrow.Schema; optional
|
schema: pyarrow.Schema; optional
|
||||||
The schema of the table.
|
The schema of the table.
|
||||||
mode: str; default "create"
|
mode: str; default "create"
|
||||||
The mode to use when creating the table.
|
The mode to use when creating the table. Can be either "create" or "overwrite".
|
||||||
By default, if the table already exists, an exception is raised.
|
By default, if the table already exists, an exception is raised.
|
||||||
If you want to overwrite the table, use mode="overwrite".
|
If you want to overwrite the table, use mode="overwrite".
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
Note
|
Note
|
||||||
----
|
----
|
||||||
@@ -201,10 +139,235 @@ class LanceDBConnection:
|
|||||||
lat: [[45.5,40.1]]
|
lat: [[45.5,40.1]]
|
||||||
long: [[-122.7,-74.1]]
|
long: [[-122.7,-74.1]]
|
||||||
"""
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __getitem__(self, name: str) -> LanceTable:
|
||||||
|
return self.open_table(name)
|
||||||
|
|
||||||
|
def open_table(self, name: str) -> Table:
|
||||||
|
"""Open a Lance Table in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
A LanceTable object representing the table.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def drop_table(self, name: str):
|
||||||
|
"""Drop a table from the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class LanceDBConnection(DBConnection):
|
||||||
|
"""
|
||||||
|
A connection to a LanceDB database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
uri: str or Path
|
||||||
|
The root uri of the database.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
|
||||||
|
... {"vector": [0.5, 1.3], "b": 4}])
|
||||||
|
LanceTable(my_table)
|
||||||
|
>>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
|
||||||
|
LanceTable(another_table)
|
||||||
|
>>> sorted(db.table_names())
|
||||||
|
['another_table', 'my_table']
|
||||||
|
>>> len(db)
|
||||||
|
2
|
||||||
|
>>> db["my_table"]
|
||||||
|
LanceTable(my_table)
|
||||||
|
>>> "my_table" in db
|
||||||
|
True
|
||||||
|
>>> db.drop_table("my_table")
|
||||||
|
>>> db.drop_table("another_table")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, uri: URI):
|
||||||
|
if not isinstance(uri, Path):
|
||||||
|
scheme = get_uri_scheme(uri)
|
||||||
|
is_local = isinstance(uri, Path) or scheme == "file"
|
||||||
|
if is_local:
|
||||||
|
if isinstance(uri, str):
|
||||||
|
uri = Path(uri)
|
||||||
|
uri = uri.expanduser().absolute()
|
||||||
|
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||||
|
self._uri = str(uri)
|
||||||
|
|
||||||
|
self._entered = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def uri(self) -> str:
|
||||||
|
return self._uri
|
||||||
|
|
||||||
|
def table_names(self) -> list[str]:
|
||||||
|
"""Get the names of all tables in the database.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
list of str
|
||||||
|
A list of table names.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
filesystem, path = fs.FileSystem.from_uri(self.uri)
|
||||||
|
except pa.ArrowInvalid:
|
||||||
|
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
||||||
|
|
||||||
|
try:
|
||||||
|
paths = filesystem.get_file_info(
|
||||||
|
fs.FileSelector(get_uri_location(self.uri))
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# It is ok if the file does not exist since it will be created
|
||||||
|
paths = []
|
||||||
|
tables = [
|
||||||
|
os.path.splitext(file_info.base_name)[0]
|
||||||
|
for file_info in paths
|
||||||
|
if file_info.extension == "lance"
|
||||||
|
]
|
||||||
|
return tables
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.table_names())
|
||||||
|
|
||||||
|
def __contains__(self, name: str) -> bool:
|
||||||
|
return name in self.table_names()
|
||||||
|
|
||||||
|
def create_table(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
data: DATA = None,
|
||||||
|
schema: pa.Schema = None,
|
||||||
|
mode: str = "create",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> LanceTable:
|
||||||
|
"""Create a table in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
data: list, tuple, dict, pd.DataFrame; optional
|
||||||
|
The data to insert into the table.
|
||||||
|
schema: pyarrow.Schema; optional
|
||||||
|
The schema of the table.
|
||||||
|
mode: str; default "create"
|
||||||
|
The mode to use when creating the table. Can be either "create" or "overwrite".
|
||||||
|
By default, if the table already exists, an exception is raised.
|
||||||
|
If you want to overwrite the table, use mode="overwrite".
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
|
Note
|
||||||
|
----
|
||||||
|
The vector index won't be created by default.
|
||||||
|
To create the index, call the `create_index` method on the table.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceTable
|
||||||
|
A reference to the newly created table.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
Can create with list of tuples or dictionaries:
|
||||||
|
|
||||||
|
>>> import lancedb
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
|
||||||
|
... {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}]
|
||||||
|
>>> db.create_table("my_table", data)
|
||||||
|
LanceTable(my_table)
|
||||||
|
>>> db["my_table"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: double
|
||||||
|
long: double
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
You can also pass a pandas DataFrame:
|
||||||
|
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> data = pd.DataFrame({
|
||||||
|
... "vector": [[1.1, 1.2], [0.2, 1.8]],
|
||||||
|
... "lat": [45.5, 40.1],
|
||||||
|
... "long": [-122.7, -74.1]
|
||||||
|
... })
|
||||||
|
>>> db.create_table("table2", data)
|
||||||
|
LanceTable(table2)
|
||||||
|
>>> db["table2"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: double
|
||||||
|
long: double
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
Data is converted to Arrow before being written to disk. For maximum
|
||||||
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
|
convert to or else provide a PyArrow table directly.
|
||||||
|
|
||||||
|
>>> custom_schema = pa.schema([
|
||||||
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
|
... pa.field("lat", pa.float32()),
|
||||||
|
... pa.field("long", pa.float32())
|
||||||
|
... ])
|
||||||
|
>>> db.create_table("table3", data, schema = custom_schema)
|
||||||
|
LanceTable(table3)
|
||||||
|
>>> db["table3"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: float
|
||||||
|
long: float
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
"""
|
||||||
|
if mode.lower() not in ["create", "overwrite"]:
|
||||||
|
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
tbl = LanceTable.create(self, name, data, schema, mode=mode)
|
tbl = LanceTable.create(
|
||||||
|
self,
|
||||||
|
name,
|
||||||
|
data,
|
||||||
|
schema,
|
||||||
|
mode=mode,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
tbl = LanceTable(self, name)
|
tbl = LanceTable.open(self, name)
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
def open_table(self, name: str) -> LanceTable:
|
def open_table(self, name: str) -> LanceTable:
|
||||||
@@ -219,7 +382,7 @@ class LanceDBConnection:
|
|||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
return LanceTable(self, name)
|
return LanceTable.open(self, name)
|
||||||
|
|
||||||
def drop_table(self, name: str):
|
def drop_table(self, name: str):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|||||||
@@ -10,16 +10,47 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Literal
|
|
||||||
|
from typing import List, Literal, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .common import VECTOR_COLUMN_NAME
|
from .common import VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
|
|
||||||
|
class Query(BaseModel):
|
||||||
|
"""A Query"""
|
||||||
|
|
||||||
|
vector_column: str = VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
|
# vector to search for
|
||||||
|
vector: List[float]
|
||||||
|
|
||||||
|
# sql filter to refine the query with
|
||||||
|
filter: Optional[str] = None
|
||||||
|
|
||||||
|
# top k results to return
|
||||||
|
k: int
|
||||||
|
|
||||||
|
# # metrics
|
||||||
|
metric: str = "L2"
|
||||||
|
|
||||||
|
# which columns to return in the results
|
||||||
|
columns: Optional[List[str]] = None
|
||||||
|
|
||||||
|
# optional query parameters for tuning the results,
|
||||||
|
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||||
|
nprobes: int = 10
|
||||||
|
|
||||||
|
# Refine factor.
|
||||||
|
refine_factor: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class LanceQueryBuilder:
|
class LanceQueryBuilder:
|
||||||
"""
|
"""
|
||||||
A builder for nearest neighbor queries for LanceDB.
|
A builder for nearest neighbor queries for LanceDB.
|
||||||
@@ -43,7 +74,12 @@ class LanceQueryBuilder:
|
|||||||
0 6 [0.4, 0.4] 0.0
|
0 6 [0.4, 0.4] 0.0
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, table: "lancedb.table.LanceTable", query: np.ndarray):
|
def __init__(
|
||||||
|
self,
|
||||||
|
table: "lancedb.table.Table",
|
||||||
|
query: Union[np.ndarray, str],
|
||||||
|
vector_column: str = VECTOR_COLUMN_NAME,
|
||||||
|
):
|
||||||
self._metric = "L2"
|
self._metric = "L2"
|
||||||
self._nprobes = 20
|
self._nprobes = 20
|
||||||
self._refine_factor = None
|
self._refine_factor = None
|
||||||
@@ -52,6 +88,7 @@ class LanceQueryBuilder:
|
|||||||
self._limit = 10
|
self._limit = 10
|
||||||
self._columns = None
|
self._columns = None
|
||||||
self._where = None
|
self._where = None
|
||||||
|
self._vector_column = vector_column
|
||||||
|
|
||||||
def limit(self, limit: int) -> LanceQueryBuilder:
|
def limit(self, limit: int) -> LanceQueryBuilder:
|
||||||
"""Set the maximum number of results to return.
|
"""Set the maximum number of results to return.
|
||||||
@@ -168,24 +205,33 @@ class LanceQueryBuilder:
|
|||||||
and also the "score" column which is the distance between the query
|
and also the "score" column which is the distance between the query
|
||||||
vector and the returned vector.
|
vector and the returned vector.
|
||||||
"""
|
"""
|
||||||
ds = self._table.to_lance()
|
|
||||||
tbl = ds.to_table(
|
return self.to_arrow().to_pandas()
|
||||||
columns=self._columns,
|
|
||||||
|
def to_arrow(self) -> pa.Table:
|
||||||
|
"""
|
||||||
|
Execute the query and return the results as an
|
||||||
|
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
||||||
|
|
||||||
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
|
and also the "score" column which is the distance between the query
|
||||||
|
vector and the returned vectors.
|
||||||
|
"""
|
||||||
|
vector = self._query if isinstance(self._query, list) else self._query.tolist()
|
||||||
|
query = Query(
|
||||||
|
vector=vector,
|
||||||
filter=self._where,
|
filter=self._where,
|
||||||
nearest={
|
k=self._limit,
|
||||||
"column": VECTOR_COLUMN_NAME,
|
metric=self._metric,
|
||||||
"q": self._query,
|
columns=self._columns,
|
||||||
"k": self._limit,
|
nprobes=self._nprobes,
|
||||||
"metric": self._metric,
|
refine_factor=self._refine_factor,
|
||||||
"nprobes": self._nprobes,
|
|
||||||
"refine_factor": self._refine_factor,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
return tbl.to_pandas()
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
|
|
||||||
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||||
def to_df(self) -> pd.DataFrame:
|
def to_arrow(self) -> pd.Table:
|
||||||
try:
|
try:
|
||||||
import tantivy
|
import tantivy
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -202,8 +248,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
# get the scores and doc ids
|
# get the scores and doc ids
|
||||||
row_ids, scores = search_index(index, self._query, self._limit)
|
row_ids, scores = search_index(index, self._query, self._limit)
|
||||||
if len(row_ids) == 0:
|
if len(row_ids) == 0:
|
||||||
return pd.DataFrame()
|
empty_schema = pa.schema([pa.field("score", pa.float32())])
|
||||||
|
return pa.Table.from_pylist([], schema=empty_schema)
|
||||||
scores = pa.array(scores)
|
scores = pa.array(scores)
|
||||||
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
||||||
output_tbl = output_tbl.append_column("score", scores)
|
output_tbl = output_tbl.append_column("score", scores)
|
||||||
return output_tbl.to_pandas()
|
return output_tbl
|
||||||
|
|||||||
60
python/lancedb/remote/__init__.py
Normal file
60
python/lancedb/remote/__init__.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import abc
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import attr
|
||||||
|
import pyarrow as pa
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
__all__ = ["LanceDBClient", "VectorQuery", "VectorQueryResult"]
|
||||||
|
|
||||||
|
|
||||||
|
class VectorQuery(BaseModel):
|
||||||
|
# vector to search for
|
||||||
|
vector: List[float]
|
||||||
|
|
||||||
|
# sql filter to refine the query with
|
||||||
|
filter: Optional[str] = None
|
||||||
|
|
||||||
|
# top k results to return
|
||||||
|
k: int
|
||||||
|
|
||||||
|
# # metrics
|
||||||
|
_metric: str = "L2"
|
||||||
|
|
||||||
|
# which columns to return in the results
|
||||||
|
columns: Optional[List[str]] = None
|
||||||
|
|
||||||
|
# optional query parameters for tuning the results,
|
||||||
|
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||||
|
nprobes: int = 10
|
||||||
|
|
||||||
|
refine_factor: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
@attr.define
|
||||||
|
class VectorQueryResult:
|
||||||
|
# for now the response is directly seralized into a pandas dataframe
|
||||||
|
tbl: pa.Table
|
||||||
|
|
||||||
|
def to_arrow(self) -> pa.Table:
|
||||||
|
return self.tbl
|
||||||
|
|
||||||
|
|
||||||
|
class LanceDBClient(abc.ABC):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||||
|
"""Query the LanceDB server for the given table and query."""
|
||||||
|
pass
|
||||||
83
python/lancedb/remote/client.py
Normal file
83
python/lancedb/remote/client.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
import functools
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import attr
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from lancedb.common import Credential
|
||||||
|
from lancedb.remote import VectorQuery, VectorQueryResult
|
||||||
|
from lancedb.remote.errors import LanceDBClientError
|
||||||
|
|
||||||
|
|
||||||
|
def _check_not_closed(f):
|
||||||
|
@functools.wraps(f)
|
||||||
|
def wrapped(self, *args, **kwargs):
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("Connection is closed")
|
||||||
|
return f(self, *args, **kwargs)
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
|
@attr.define(slots=False)
|
||||||
|
class RestfulLanceDBClient:
|
||||||
|
db_name: str
|
||||||
|
region: str
|
||||||
|
api_key: Credential
|
||||||
|
closed: bool = attr.field(default=False, init=False)
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def session(self) -> aiohttp.ClientSession:
|
||||||
|
url = f"https://{self.db_name}.{self.region}.api.lancedb.com"
|
||||||
|
return aiohttp.ClientSession(url)
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
await self.session.close()
|
||||||
|
self.closed = True
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def headers(self) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
"x-api-key": self.api_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
@_check_not_closed
|
||||||
|
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||||
|
async with self.session.post(
|
||||||
|
f"/1/table/{table_name}/",
|
||||||
|
json=query.dict(exclude_none=True),
|
||||||
|
headers=self.headers,
|
||||||
|
) as resp:
|
||||||
|
resp: aiohttp.ClientResponse = resp
|
||||||
|
if 400 <= resp.status < 500:
|
||||||
|
raise LanceDBClientError(
|
||||||
|
f"Bad Request: {resp.status}, error: {await resp.text()}"
|
||||||
|
)
|
||||||
|
if 500 <= resp.status < 600:
|
||||||
|
raise LanceDBClientError(
|
||||||
|
f"Internal Server Error: {resp.status}, error: {await resp.text()}"
|
||||||
|
)
|
||||||
|
if resp.status != 200:
|
||||||
|
raise LanceDBClientError(
|
||||||
|
f"Unknown Error: {resp.status}, error: {await resp.text()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
resp_body = await resp.read()
|
||||||
|
with pa.ipc.open_file(pa.BufferReader(resp_body)) as reader:
|
||||||
|
tbl = reader.read_all()
|
||||||
|
return VectorQueryResult(tbl)
|
||||||
71
python/lancedb/remote/db.py
Normal file
71
python/lancedb/remote/db.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from lancedb.common import DATA
|
||||||
|
from lancedb.db import DBConnection
|
||||||
|
from lancedb.table import Table
|
||||||
|
|
||||||
|
from .client import RestfulLanceDBClient
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteDBConnection(DBConnection):
|
||||||
|
"""A connection to a remote LanceDB database."""
|
||||||
|
|
||||||
|
def __init__(self, db_url: str, api_key: str, region: str):
|
||||||
|
"""Connect to a remote LanceDB database."""
|
||||||
|
parsed = urlparse(db_url)
|
||||||
|
if parsed.scheme != "db":
|
||||||
|
raise ValueError(f"Invalid scheme: {parsed.scheme}, only accepts db://")
|
||||||
|
self.db_name = parsed.netloc
|
||||||
|
self.api_key = api_key
|
||||||
|
self._client = RestfulLanceDBClient(self.db_name, region, api_key)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"RemoveConnect(name={self.db_name})"
|
||||||
|
|
||||||
|
def table_names(self) -> List[str]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def open_table(self, name: str) -> Table:
|
||||||
|
"""Open a Lance Table in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
A LanceTable object representing the table.
|
||||||
|
"""
|
||||||
|
from .table import RemoteTable
|
||||||
|
|
||||||
|
# TODO: check if table exists
|
||||||
|
|
||||||
|
return RemoteTable(self, name)
|
||||||
|
|
||||||
|
def create_table(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
data: DATA = None,
|
||||||
|
schema: pa.Schema = None,
|
||||||
|
mode: str = "create",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> Table:
|
||||||
|
raise NotImplementedError
|
||||||
16
python/lancedb/remote/errors.py
Normal file
16
python/lancedb/remote/errors.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
class LanceDBClientError(RuntimeError):
|
||||||
|
pass
|
||||||
70
python/lancedb/remote/table.py
Normal file
70
python/lancedb/remote/table.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
|
from ..query import LanceQueryBuilder, Query
|
||||||
|
from ..table import Query, Table
|
||||||
|
from .db import RemoteDBConnection
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteTable(Table):
|
||||||
|
def __init__(self, conn: RemoteDBConnection, name: str):
|
||||||
|
self._conn = conn
|
||||||
|
self._name = name
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"RemoteTable({self._conn.db_name}.{self.name})"
|
||||||
|
|
||||||
|
def schema(self) -> pa.Schema:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_arrow(self) -> pa.Table:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def create_index(
|
||||||
|
self,
|
||||||
|
metric="L2",
|
||||||
|
num_partitions=256,
|
||||||
|
num_sub_vectors=96,
|
||||||
|
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||||
|
replace: bool = True,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def add(
|
||||||
|
self,
|
||||||
|
data: DATA,
|
||||||
|
mode: str = "append",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> int:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def search(
|
||||||
|
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
||||||
|
) -> LanceQueryBuilder:
|
||||||
|
return LanceQueryBuilder(self, query, vector_column)
|
||||||
|
|
||||||
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result = self._conn._client.query(self._name, query)
|
||||||
|
return loop.run_until_complete(result).to_arrow()
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
from abc import ABC, abstractmethod
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
@@ -22,36 +22,41 @@ import lance
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
import pyarrow.compute as pc
|
||||||
|
import pyarrow.fs
|
||||||
from lance import LanceDataset
|
from lance import LanceDataset
|
||||||
from lance.vector import vec_to_table
|
from lance.vector import vec_to_table
|
||||||
|
|
||||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
from .query import LanceFtsQueryBuilder, LanceQueryBuilder
|
from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
|
||||||
from .util import get_uri_scheme
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_data(data, schema):
|
def _sanitize_data(data, schema, on_bad_vectors, fill_value):
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
data = pa.Table.from_pylist(data)
|
data = pa.Table.from_pylist(data)
|
||||||
data = _sanitize_schema(data, schema=schema)
|
data = _sanitize_schema(
|
||||||
|
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
data = vec_to_table(data)
|
data = vec_to_table(data)
|
||||||
if isinstance(data, pd.DataFrame):
|
if isinstance(data, pd.DataFrame):
|
||||||
data = pa.Table.from_pandas(data)
|
data = pa.Table.from_pandas(data)
|
||||||
data = _sanitize_schema(data, schema=schema)
|
data = _sanitize_schema(
|
||||||
|
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
if not isinstance(data, pa.Table):
|
if not isinstance(data, pa.Table):
|
||||||
raise TypeError(f"Unsupported data type: {type(data)}")
|
raise TypeError(f"Unsupported data type: {type(data)}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
class LanceTable:
|
class Table(ABC):
|
||||||
"""
|
"""
|
||||||
A table in a LanceDB database.
|
A [Table](Table) is a collection of Records in a LanceDB [Database](Database).
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
Create using [LanceDBConnection.create_table][lancedb.LanceDBConnection.create_table]
|
Create using [DBConnection.create_table][lancedb.DBConnection.create_table]
|
||||||
(more examples in that method's documentation).
|
(more examples in that method's documentation).
|
||||||
|
|
||||||
>>> import lancedb
|
>>> import lancedb
|
||||||
@@ -66,12 +71,12 @@ class LanceTable:
|
|||||||
vector: [[[1.1,1.2]]]
|
vector: [[[1.1,1.2]]]
|
||||||
b: [[2]]
|
b: [[2]]
|
||||||
|
|
||||||
Can append new data with [LanceTable.add][lancedb.table.LanceTable.add].
|
Can append new data with [Table.add()][lancedb.table.Table.add].
|
||||||
|
|
||||||
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
||||||
2
|
2
|
||||||
|
|
||||||
Can query the table with [LanceTable.search][lancedb.table.LanceTable.search].
|
Can query the table with [Table.search][lancedb.table.Table.search].
|
||||||
|
|
||||||
>>> table.search([0.4, 0.4]).select(["b"]).to_df()
|
>>> table.search([0.4, 0.4]).select(["b"]).to_df()
|
||||||
b vector score
|
b vector score
|
||||||
@@ -79,8 +84,128 @@ class LanceTable:
|
|||||||
1 2 [1.1, 1.2] 1.13
|
1 2 [1.1, 1.2] 1.13
|
||||||
|
|
||||||
Search queries are much faster when an index is created. See
|
Search queries are much faster when an index is created. See
|
||||||
[LanceTable.create_index][lancedb.table.LanceTable.create_index].
|
[Table.create_index][lancedb.table.Table.create_index].
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def schema(self) -> pa.Schema:
|
||||||
|
"""Return the [Arrow Schema](https://arrow.apache.org/docs/python/api/datatypes.html#) of
|
||||||
|
this [Table](Table)
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_pandas(self) -> pd.DataFrame:
|
||||||
|
"""Return the table as a pandas DataFrame.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pd.DataFrame
|
||||||
|
"""
|
||||||
|
return self.to_arrow().to_pandas()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_arrow(self) -> pa.Table:
|
||||||
|
"""Return the table as a pyarrow Table.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pa.Table
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def create_index(
|
||||||
|
self,
|
||||||
|
metric="L2",
|
||||||
|
num_partitions=256,
|
||||||
|
num_sub_vectors=96,
|
||||||
|
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||||
|
replace: bool = True,
|
||||||
|
):
|
||||||
|
"""Create an index on the table.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
metric: str, default "L2"
|
||||||
|
The distance metric to use when creating the index.
|
||||||
|
Valid values are "L2", "cosine", or "dot".
|
||||||
|
L2 is euclidean distance.
|
||||||
|
num_partitions: int
|
||||||
|
The number of IVF partitions to use when creating the index.
|
||||||
|
Default is 256.
|
||||||
|
num_sub_vectors: int
|
||||||
|
The number of PQ sub-vectors to use when creating the index.
|
||||||
|
Default is 96.
|
||||||
|
vector_column_name: str, default "vector"
|
||||||
|
The vector column name to create the index.
|
||||||
|
replace: bool, default True
|
||||||
|
If True, replace the existing index if it exists.
|
||||||
|
If False, raise an error if duplicate index exists.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def add(
|
||||||
|
self,
|
||||||
|
data: DATA,
|
||||||
|
mode: str = "append",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> int:
|
||||||
|
"""Add more data to the [Table](Table).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data: list-of-dict, dict, pd.DataFrame
|
||||||
|
The data to insert into the table.
|
||||||
|
mode: str
|
||||||
|
The mode to use when writing the data. Valid values are
|
||||||
|
"append" and "overwrite".
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float, default 0.
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int
|
||||||
|
The number of vectors in the table.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def search(
|
||||||
|
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
||||||
|
) -> LanceQueryBuilder:
|
||||||
|
"""Create a search query to find the nearest neighbors
|
||||||
|
of the given query vector.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query: list, np.ndarray
|
||||||
|
The query vector.
|
||||||
|
vector_column: str, default "vector"
|
||||||
|
The name of the vector column to search.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceQueryBuilder
|
||||||
|
A query builder object representing the query.
|
||||||
|
Once executed, the query returns selected columns, the vector,
|
||||||
|
and also the "score" column which is the distance between the query
|
||||||
|
vector and the returned vector.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LanceTable(Table):
|
||||||
|
"""
|
||||||
|
A table in a LanceDB database.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -92,7 +217,8 @@ class LanceTable:
|
|||||||
|
|
||||||
def _reset_dataset(self):
|
def _reset_dataset(self):
|
||||||
try:
|
try:
|
||||||
del self.__dict__["_dataset"]
|
if "_dataset" in self.__dict__:
|
||||||
|
del self.__dict__["_dataset"]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -184,27 +310,22 @@ class LanceTable:
|
|||||||
def _dataset_uri(self) -> str:
|
def _dataset_uri(self) -> str:
|
||||||
return os.path.join(self._conn.uri, f"{self.name}.lance")
|
return os.path.join(self._conn.uri, f"{self.name}.lance")
|
||||||
|
|
||||||
def create_index(self, metric="L2", num_partitions=256, num_sub_vectors=96):
|
def create_index(
|
||||||
"""Create an index on the table.
|
self,
|
||||||
|
metric="L2",
|
||||||
Parameters
|
num_partitions=256,
|
||||||
----------
|
num_sub_vectors=96,
|
||||||
metric: str, default "L2"
|
vector_column_name=VECTOR_COLUMN_NAME,
|
||||||
The distance metric to use when creating the index. Valid values are "L2" or "cosine".
|
replace: bool = True,
|
||||||
L2 is euclidean distance.
|
):
|
||||||
num_partitions: int
|
"""Create an index on the table."""
|
||||||
The number of IVF partitions to use when creating the index.
|
|
||||||
Default is 256.
|
|
||||||
num_sub_vectors: int
|
|
||||||
The number of PQ sub-vectors to use when creating the index.
|
|
||||||
Default is 96.
|
|
||||||
"""
|
|
||||||
self._dataset.create_index(
|
self._dataset.create_index(
|
||||||
column=VECTOR_COLUMN_NAME,
|
column=vector_column_name,
|
||||||
index_type="IVF_PQ",
|
index_type="IVF_PQ",
|
||||||
metric=metric,
|
metric=metric,
|
||||||
num_partitions=num_partitions,
|
num_partitions=num_partitions,
|
||||||
num_sub_vectors=num_sub_vectors,
|
num_sub_vectors=num_sub_vectors,
|
||||||
|
replace=replace,
|
||||||
)
|
)
|
||||||
self._reset_dataset()
|
self._reset_dataset()
|
||||||
|
|
||||||
@@ -237,7 +358,13 @@ class LanceTable:
|
|||||||
"""Return the LanceDataset backing this table."""
|
"""Return the LanceDataset backing this table."""
|
||||||
return self._dataset
|
return self._dataset
|
||||||
|
|
||||||
def add(self, data: DATA, mode: str = "append") -> int:
|
def add(
|
||||||
|
self,
|
||||||
|
data: DATA,
|
||||||
|
mode: str = "append",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> int:
|
||||||
"""Add data to the table.
|
"""Add data to the table.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -247,18 +374,28 @@ class LanceTable:
|
|||||||
mode: str
|
mode: str
|
||||||
The mode to use when writing the data. Valid values are
|
The mode to use when writing the data. Valid values are
|
||||||
"append" and "overwrite".
|
"append" and "overwrite".
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float, default 0.
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
int
|
int
|
||||||
The number of vectors in the table.
|
The number of vectors in the table.
|
||||||
"""
|
"""
|
||||||
data = _sanitize_data(data, self.schema)
|
# TODO: manage table listing and metadata separately
|
||||||
|
data = _sanitize_data(
|
||||||
|
data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
lance.write_dataset(data, self._dataset_uri, mode=mode)
|
lance.write_dataset(data, self._dataset_uri, mode=mode)
|
||||||
self._reset_dataset()
|
self._reset_dataset()
|
||||||
return len(self)
|
return len(self)
|
||||||
|
|
||||||
def search(self, query: Union[VEC, str]) -> LanceQueryBuilder:
|
def search(
|
||||||
|
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
||||||
|
) -> LanceQueryBuilder:
|
||||||
"""Create a search query to find the nearest neighbors
|
"""Create a search query to find the nearest neighbors
|
||||||
of the given query vector.
|
of the given query vector.
|
||||||
|
|
||||||
@@ -266,6 +403,8 @@ class LanceTable:
|
|||||||
----------
|
----------
|
||||||
query: list, np.ndarray
|
query: list, np.ndarray
|
||||||
The query vector.
|
The query vector.
|
||||||
|
vector_column_name: str, default "vector"
|
||||||
|
The name of the vector column to search.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -277,7 +416,7 @@ class LanceTable:
|
|||||||
"""
|
"""
|
||||||
if isinstance(query, str):
|
if isinstance(query, str):
|
||||||
# fts
|
# fts
|
||||||
return LanceFtsQueryBuilder(self, query)
|
return LanceFtsQueryBuilder(self, query, vector_column_name)
|
||||||
|
|
||||||
if isinstance(query, list):
|
if isinstance(query, list):
|
||||||
query = np.array(query)
|
query = np.array(query)
|
||||||
@@ -285,17 +424,127 @@ class LanceTable:
|
|||||||
query = query.astype(np.float32)
|
query = query.astype(np.float32)
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||||
return LanceQueryBuilder(self, query)
|
return LanceQueryBuilder(self, query, vector_column_name)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, db, name, data, schema=None, mode="create"):
|
def create(
|
||||||
|
cls,
|
||||||
|
db,
|
||||||
|
name,
|
||||||
|
data=None,
|
||||||
|
schema=None,
|
||||||
|
mode="create",
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a new table.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> table = db.create_table("my_table", data)
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 1 [1.0, 2.0]
|
||||||
|
1 2 [3.0, 4.0]
|
||||||
|
2 3 [5.0, 6.0]
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db: LanceDB
|
||||||
|
The LanceDB instance to create the table in.
|
||||||
|
name: str
|
||||||
|
The name of the table to create.
|
||||||
|
data: list-of-dict, dict, pd.DataFrame, default None
|
||||||
|
The data to insert into the table.
|
||||||
|
At least one of `data` or `schema` must be provided.
|
||||||
|
schema: dict, optional
|
||||||
|
The schema of the table. If not provided, the schema is inferred from the data.
|
||||||
|
At least one of `data` or `schema` must be provided.
|
||||||
|
mode: str, default "create"
|
||||||
|
The mode to use when writing the data. Valid values are
|
||||||
|
"create", "overwrite", and "append".
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float, default 0.
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
"""
|
||||||
tbl = LanceTable(db, name)
|
tbl = LanceTable(db, name)
|
||||||
data = _sanitize_data(data, schema)
|
if data is not None:
|
||||||
|
data = _sanitize_data(
|
||||||
|
data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if schema is None:
|
||||||
|
raise ValueError("Either data or schema must be provided")
|
||||||
|
data = pa.Table.from_pylist([], schema=schema)
|
||||||
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
|
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
|
||||||
|
return LanceTable(db, name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def open(cls, db, name):
|
||||||
|
tbl = cls(db, name)
|
||||||
|
if not os.path.exists(tbl._dataset_uri):
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"Table {name} does not exist. Please first call db.create_table({name}, data)"
|
||||||
|
)
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
|
def delete(self, where: str):
|
||||||
|
"""Delete rows from the table.
|
||||||
|
|
||||||
def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
Parameters
|
||||||
|
----------
|
||||||
|
where: str
|
||||||
|
The SQL where clause to use when deleting rows.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> table = db.create_table("my_table", data)
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 1 [1.0, 2.0]
|
||||||
|
1 2 [3.0, 4.0]
|
||||||
|
2 3 [5.0, 6.0]
|
||||||
|
>>> table.delete("x = 2")
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 1 [1.0, 2.0]
|
||||||
|
1 3 [5.0, 6.0]
|
||||||
|
"""
|
||||||
|
self._dataset.delete(where)
|
||||||
|
|
||||||
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
|
ds = self.to_lance()
|
||||||
|
return ds.to_table(
|
||||||
|
columns=query.columns,
|
||||||
|
filter=query.filter,
|
||||||
|
nearest={
|
||||||
|
"column": query.vector_column,
|
||||||
|
"q": query.vector,
|
||||||
|
"k": query.k,
|
||||||
|
"metric": query.metric,
|
||||||
|
"nprobes": query.nprobes,
|
||||||
|
"refine_factor": query.refine_factor,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_schema(
|
||||||
|
data: pa.Table,
|
||||||
|
schema: pa.Schema = None,
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> pa.Table:
|
||||||
"""Ensure that the table has the expected schema.
|
"""Ensure that the table has the expected schema.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -305,21 +554,41 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
|||||||
schema: pa.Schema; optional
|
schema: pa.Schema; optional
|
||||||
The expected schema. If not provided, this just converts the
|
The expected schema. If not provided, this just converts the
|
||||||
vector column to fixed_size_list(float32) if necessary.
|
vector column to fixed_size_list(float32) if necessary.
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float, default 0.
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
"""
|
"""
|
||||||
if schema is not None:
|
if schema is not None:
|
||||||
if data.schema == schema:
|
if data.schema == schema:
|
||||||
return data
|
return data
|
||||||
# cast the columns to the expected types
|
# cast the columns to the expected types
|
||||||
data = data.combine_chunks()
|
data = data.combine_chunks()
|
||||||
data = _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
data = _sanitize_vector_column(
|
||||||
|
data,
|
||||||
|
vector_column_name=VECTOR_COLUMN_NAME,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
)
|
||||||
return pa.Table.from_arrays(
|
return pa.Table.from_arrays(
|
||||||
[data[name] for name in schema.names], schema=schema
|
[data[name] for name in schema.names], schema=schema
|
||||||
)
|
)
|
||||||
# just check the vector column
|
# just check the vector column
|
||||||
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
return _sanitize_vector_column(
|
||||||
|
data,
|
||||||
|
vector_column_name=VECTOR_COLUMN_NAME,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table:
|
def _sanitize_vector_column(
|
||||||
|
data: pa.Table,
|
||||||
|
vector_column_name: str,
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
) -> pa.Table:
|
||||||
"""
|
"""
|
||||||
Ensure that the vector column exists and has type fixed_size_list(float32)
|
Ensure that the vector column exists and has type fixed_size_list(float32)
|
||||||
|
|
||||||
@@ -329,19 +598,103 @@ def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table
|
|||||||
The table to sanitize.
|
The table to sanitize.
|
||||||
vector_column_name: str
|
vector_column_name: str
|
||||||
The name of the vector column.
|
The name of the vector column.
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float, default 0.0
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
"""
|
"""
|
||||||
if vector_column_name not in data.column_names:
|
if vector_column_name not in data.column_names:
|
||||||
raise ValueError(f"Missing vector column: {vector_column_name}")
|
raise ValueError(f"Missing vector column: {vector_column_name}")
|
||||||
|
# ChunkedArray is annoying to work with, so we combine chunks here
|
||||||
vec_arr = data[vector_column_name].combine_chunks()
|
vec_arr = data[vector_column_name].combine_chunks()
|
||||||
if pa.types.is_fixed_size_list(vec_arr.type):
|
if pa.types.is_list(data[vector_column_name].type):
|
||||||
return data
|
# if it's a variable size list array we make sure the dimensions are all the same
|
||||||
if not pa.types.is_list(vec_arr.type):
|
has_jagged_ndims = len(vec_arr.values) % len(data) != 0
|
||||||
|
if has_jagged_ndims:
|
||||||
|
data = _sanitize_jagged(
|
||||||
|
data, fill_value, on_bad_vectors, vec_arr, vector_column_name
|
||||||
|
)
|
||||||
|
vec_arr = data[vector_column_name].combine_chunks()
|
||||||
|
elif not pa.types.is_fixed_size_list(vec_arr.type):
|
||||||
raise TypeError(f"Unsupported vector column type: {vec_arr.type}")
|
raise TypeError(f"Unsupported vector column type: {vec_arr.type}")
|
||||||
|
|
||||||
|
vec_arr = ensure_fixed_size_list_of_f32(vec_arr)
|
||||||
|
data = data.set_column(
|
||||||
|
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||||
|
)
|
||||||
|
|
||||||
|
has_nans = pc.any(pc.is_nan(vec_arr.values)).as_py()
|
||||||
|
if has_nans:
|
||||||
|
data = _sanitize_nans(
|
||||||
|
data, fill_value, on_bad_vectors, vec_arr, vector_column_name
|
||||||
|
)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_fixed_size_list_of_f32(vec_arr):
|
||||||
values = vec_arr.values
|
values = vec_arr.values
|
||||||
if not pa.types.is_float32(values.type):
|
if not pa.types.is_float32(values.type):
|
||||||
values = values.cast(pa.float32())
|
values = values.cast(pa.float32())
|
||||||
list_size = len(values) / len(data)
|
if pa.types.is_fixed_size_list(vec_arr.type):
|
||||||
|
list_size = vec_arr.type.list_size
|
||||||
|
else:
|
||||||
|
list_size = len(values) / len(vec_arr)
|
||||||
vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size)
|
vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size)
|
||||||
return data.set_column(
|
return vec_arr
|
||||||
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
|
||||||
)
|
|
||||||
|
def _sanitize_jagged(data, fill_value, on_bad_vectors, vec_arr, vector_column_name):
|
||||||
|
"""Sanitize jagged vectors."""
|
||||||
|
if on_bad_vectors == "error":
|
||||||
|
raise ValueError(
|
||||||
|
f"Vector column {vector_column_name} has variable length vectors "
|
||||||
|
"Set on_bad_vectors='drop' to remove them, or "
|
||||||
|
"set on_bad_vectors='fill' and fill_value=<value> to replace them."
|
||||||
|
)
|
||||||
|
|
||||||
|
lst_lengths = pc.list_value_length(vec_arr)
|
||||||
|
ndims = pc.max(lst_lengths).as_py()
|
||||||
|
correct_ndims = pc.equal(lst_lengths, ndims)
|
||||||
|
|
||||||
|
if on_bad_vectors == "fill":
|
||||||
|
if fill_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
"`fill_value` must not be None if `on_bad_vectors` is 'fill'"
|
||||||
|
)
|
||||||
|
fill_arr = pa.scalar([float(fill_value)] * ndims)
|
||||||
|
vec_arr = pc.if_else(correct_ndims, vec_arr, fill_arr)
|
||||||
|
data = data.set_column(
|
||||||
|
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||||
|
)
|
||||||
|
elif on_bad_vectors == "drop":
|
||||||
|
data = data.filter(correct_ndims)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_nans(data, fill_value, on_bad_vectors, vec_arr, vector_column_name):
|
||||||
|
"""Sanitize NaNs in vectors"""
|
||||||
|
if on_bad_vectors == "error":
|
||||||
|
raise ValueError(
|
||||||
|
f"Vector column {vector_column_name} has NaNs. "
|
||||||
|
"Set on_bad_vectors='drop' to remove them, or "
|
||||||
|
"set on_bad_vectors='fill' and fill_value=<value> to replace them."
|
||||||
|
)
|
||||||
|
elif on_bad_vectors == "fill":
|
||||||
|
if fill_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
"`fill_value` must not be None if `on_bad_vectors` is 'fill'"
|
||||||
|
)
|
||||||
|
fill_value = float(fill_value)
|
||||||
|
values = pc.if_else(pc.is_nan(vec_arr.values), fill_value, vec_arr.values)
|
||||||
|
ndims = len(vec_arr[0])
|
||||||
|
vec_arr = pa.FixedSizeListArray.from_arrays(values, ndims)
|
||||||
|
data = data.set_column(
|
||||||
|
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||||
|
)
|
||||||
|
elif on_bad_vectors == "drop":
|
||||||
|
is_value_nan = pc.is_nan(vec_arr.values).to_numpy(zero_copy_only=False)
|
||||||
|
is_full = np.any(~is_value_nan.reshape(-1, vec_arr.type.list_size), axis=1)
|
||||||
|
data = data.filter(is_full)
|
||||||
|
return data
|
||||||
|
|||||||
@@ -11,9 +11,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from urllib.parse import ParseResult, urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from pyarrow import fs
|
|
||||||
|
|
||||||
|
|
||||||
def get_uri_scheme(uri: str) -> str:
|
def get_uri_scheme(uri: str) -> str:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.1.8"
|
version = "0.1.10"
|
||||||
dependencies = ["pylance>=0.4.20", "ratelimiter", "retry", "tqdm"]
|
dependencies = ["pylance~=0.5.0", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr"]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||||
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
tests = [
|
tests = [
|
||||||
"pytest", "pytest-mock", "doctest"
|
"pytest", "pytest-mock", "pytest-asyncio"
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff", "pre-commit", "black"
|
"ruff", "pre-commit", "black"
|
||||||
|
|||||||
77
python/tests/test_context.py
Normal file
77
python/tests/test_context.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from lancedb.context import contextualize
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def raw_df() -> pd.DataFrame:
|
||||||
|
return pd.DataFrame(
|
||||||
|
{
|
||||||
|
"token": [
|
||||||
|
"The",
|
||||||
|
"quick",
|
||||||
|
"brown",
|
||||||
|
"fox",
|
||||||
|
"jumped",
|
||||||
|
"over",
|
||||||
|
"the",
|
||||||
|
"lazy",
|
||||||
|
"dog",
|
||||||
|
"I",
|
||||||
|
"love",
|
||||||
|
"sandwiches",
|
||||||
|
],
|
||||||
|
"document_id": [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_contextualizer(raw_df: pd.DataFrame):
|
||||||
|
result = (
|
||||||
|
contextualize(raw_df)
|
||||||
|
.window(6)
|
||||||
|
.stride(3)
|
||||||
|
.text_col("token")
|
||||||
|
.groupby("document_id")
|
||||||
|
.to_df()["token"]
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == [
|
||||||
|
"The quick brown fox jumped over",
|
||||||
|
"fox jumped over the lazy dog",
|
||||||
|
"the lazy dog",
|
||||||
|
"I love sandwiches",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_contextualizer_with_threshold(raw_df: pd.DataFrame):
|
||||||
|
result = (
|
||||||
|
contextualize(raw_df)
|
||||||
|
.window(6)
|
||||||
|
.stride(3)
|
||||||
|
.text_col("token")
|
||||||
|
.groupby("document_id")
|
||||||
|
.min_window_size(4)
|
||||||
|
.to_df()["token"]
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == [
|
||||||
|
"The quick brown fox jumped over",
|
||||||
|
"fox jumped over the lazy dog",
|
||||||
|
]
|
||||||
@@ -11,6 +11,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -120,3 +121,40 @@ def test_delete_table(tmp_path):
|
|||||||
|
|
||||||
db.create_table("test", data=data)
|
db.create_table("test", data=data)
|
||||||
assert db.table_names() == ["test"]
|
assert db.table_names() == ["test"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_or_nonexistent_table(tmp_path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
db.create_table("test_with_no_data")
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
db.open_table("does_not_exist")
|
||||||
|
|
||||||
|
|
||||||
|
def test_replace_index(tmp_path):
|
||||||
|
db = lancedb.connect(uri=tmp_path)
|
||||||
|
table = db.create_table(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
{"vector": np.random.rand(128), "item": "foo", "price": float(i)}
|
||||||
|
for i in range(1000)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
table.create_index(
|
||||||
|
num_partitions=2,
|
||||||
|
num_sub_vectors=4,
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
table.create_index(
|
||||||
|
num_partitions=2,
|
||||||
|
num_sub_vectors=4,
|
||||||
|
replace=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
table.create_index(
|
||||||
|
num_partitions=2,
|
||||||
|
num_sub_vectors=4,
|
||||||
|
replace=True,
|
||||||
|
)
|
||||||
|
|||||||
27
python/tests/test_e2e_remote_db.py
Normal file
27
python/tests/test_e2e_remote_db.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from lancedb import LanceDBConnection
|
||||||
|
|
||||||
|
# TODO: setup integ test mark and script
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Need to set up a local server")
|
||||||
|
def test_against_local_server():
|
||||||
|
conn = LanceDBConnection("lancedb+http://localhost:10024")
|
||||||
|
table = conn.open_table("sift1m_ivf1024_pq16")
|
||||||
|
df = table.search(np.random.rand(128)).to_df()
|
||||||
|
assert len(df) == 10
|
||||||
@@ -14,6 +14,7 @@ import sys
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
from lancedb.embeddings import with_embeddings
|
from lancedb.embeddings import with_embeddings
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,13 +13,13 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import lancedb.fts
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
import tantivy
|
import tantivy
|
||||||
|
|
||||||
import lancedb as ldb
|
import lancedb as ldb
|
||||||
|
import lancedb.fts
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
|
|||||||
@@ -11,22 +11,42 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import unittest.mock as mock
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pandas.testing as tm
|
import pandas.testing as tm
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from lancedb.query import LanceQueryBuilder
|
|
||||||
|
from lancedb.db import LanceDBConnection
|
||||||
|
from lancedb.query import LanceQueryBuilder, Query
|
||||||
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
|
|
||||||
class MockTable:
|
class MockTable:
|
||||||
def __init__(self, tmp_path):
|
def __init__(self, tmp_path):
|
||||||
self.uri = tmp_path
|
self.uri = tmp_path
|
||||||
|
self._conn = LanceDBConnection(self.uri)
|
||||||
|
|
||||||
def to_lance(self):
|
def to_lance(self):
|
||||||
return lance.dataset(self.uri)
|
return lance.dataset(self.uri)
|
||||||
|
|
||||||
|
def _execute_query(self, query):
|
||||||
|
ds = self.to_lance()
|
||||||
|
return ds.to_table(
|
||||||
|
columns=query.columns,
|
||||||
|
filter=query.filter,
|
||||||
|
nearest={
|
||||||
|
"column": query.vector_column,
|
||||||
|
"q": query.vector,
|
||||||
|
"k": query.k,
|
||||||
|
"metric": query.metric,
|
||||||
|
"nprobes": query.nprobes,
|
||||||
|
"refine_factor": query.refine_factor,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def table(tmp_path) -> MockTable:
|
def table(tmp_path) -> MockTable:
|
||||||
@@ -45,24 +65,30 @@ def table(tmp_path) -> MockTable:
|
|||||||
|
|
||||||
|
|
||||||
def test_query_builder(table):
|
def test_query_builder(table):
|
||||||
df = LanceQueryBuilder(table, [0, 0]).limit(1).select(["id"]).to_df()
|
df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
|
||||||
assert df["id"].values[0] == 1
|
assert df["id"].values[0] == 1
|
||||||
assert all(df["vector"].values[0] == [1, 2])
|
assert all(df["vector"].values[0] == [1, 2])
|
||||||
|
|
||||||
|
|
||||||
def test_query_builder_with_filter(table):
|
def test_query_builder_with_filter(table):
|
||||||
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
|
df = LanceQueryBuilder(table, [0, 0], "vector").where("id = 2").to_df()
|
||||||
assert df["id"].values[0] == 2
|
assert df["id"].values[0] == 2
|
||||||
assert all(df["vector"].values[0] == [3, 4])
|
assert all(df["vector"].values[0] == [3, 4])
|
||||||
|
|
||||||
|
|
||||||
def test_query_builder_with_metric(table):
|
def test_query_builder_with_metric(table):
|
||||||
query = [4, 8]
|
query = [4, 8]
|
||||||
df_default = LanceQueryBuilder(table, query).to_df()
|
vector_column_name = "vector"
|
||||||
df_l2 = LanceQueryBuilder(table, query).metric("L2").to_df()
|
df_default = LanceQueryBuilder(table, query, vector_column_name).to_df()
|
||||||
|
df_l2 = LanceQueryBuilder(table, query, vector_column_name).metric("L2").to_df()
|
||||||
tm.assert_frame_equal(df_default, df_l2)
|
tm.assert_frame_equal(df_default, df_l2)
|
||||||
|
|
||||||
df_cosine = LanceQueryBuilder(table, query).metric("cosine").limit(1).to_df()
|
df_cosine = (
|
||||||
|
LanceQueryBuilder(table, query, vector_column_name)
|
||||||
|
.metric("cosine")
|
||||||
|
.limit(1)
|
||||||
|
.to_df()
|
||||||
|
)
|
||||||
assert df_cosine.score[0] == pytest.approx(
|
assert df_cosine.score[0] == pytest.approx(
|
||||||
cosine_distance(query, df_cosine.vector[0]),
|
cosine_distance(query, df_cosine.vector[0]),
|
||||||
abs=1e-6,
|
abs=1e-6,
|
||||||
@@ -70,5 +96,32 @@ def test_query_builder_with_metric(table):
|
|||||||
assert 0 <= df_cosine.score[0] <= 1
|
assert 0 <= df_cosine.score[0] <= 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_builder_with_different_vector_column():
|
||||||
|
table = mock.MagicMock(spec=LanceTable)
|
||||||
|
query = [4, 8]
|
||||||
|
vector_column_name = "foo_vector"
|
||||||
|
builder = (
|
||||||
|
LanceQueryBuilder(table, query, vector_column_name)
|
||||||
|
.metric("cosine")
|
||||||
|
.where("b < 10")
|
||||||
|
.select(["b"])
|
||||||
|
.limit(2)
|
||||||
|
)
|
||||||
|
ds = mock.Mock()
|
||||||
|
table.to_lance.return_value = ds
|
||||||
|
builder.to_arrow()
|
||||||
|
table._execute_query.assert_called_once_with(
|
||||||
|
Query(
|
||||||
|
vector=query,
|
||||||
|
filter="b < 10",
|
||||||
|
k=2,
|
||||||
|
metric="cosine",
|
||||||
|
columns=["b"],
|
||||||
|
nprobes=20,
|
||||||
|
refine_factor=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def cosine_distance(vec1, vec2):
|
def cosine_distance(vec1, vec2):
|
||||||
return 1 - np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
return 1 - np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||||
|
|||||||
95
python/tests/test_remote_client.py
Normal file
95
python/tests/test_remote_client.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import attr
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pyarrow as pa
|
||||||
|
import pytest
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
from lancedb.remote.client import RestfulLanceDBClient, VectorQuery
|
||||||
|
|
||||||
|
|
||||||
|
@attr.define
|
||||||
|
class MockLanceDBServer:
|
||||||
|
runner: web.AppRunner = attr.field(init=False)
|
||||||
|
site: web.TCPSite = attr.field(init=False)
|
||||||
|
|
||||||
|
async def query_handler(self, request: web.Request) -> web.Response:
|
||||||
|
table_name = request.match_info["table_name"]
|
||||||
|
assert table_name == "test_table"
|
||||||
|
|
||||||
|
await request.json()
|
||||||
|
# TODO: do some matching
|
||||||
|
|
||||||
|
vecs = pd.Series([np.random.rand(128) for x in range(10)], name="vector")
|
||||||
|
ids = pd.Series(range(10), name="id")
|
||||||
|
df = pd.DataFrame([vecs, ids]).T
|
||||||
|
|
||||||
|
batch = pa.RecordBatch.from_pandas(
|
||||||
|
df,
|
||||||
|
schema=pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("vector", pa.list_(pa.float32(), 128)),
|
||||||
|
pa.field("id", pa.int64()),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
sink = pa.BufferOutputStream()
|
||||||
|
with pa.ipc.new_file(sink, batch.schema) as writer:
|
||||||
|
writer.write_batch(batch)
|
||||||
|
|
||||||
|
return web.Response(body=sink.getvalue().to_pybytes())
|
||||||
|
|
||||||
|
async def setup(self):
|
||||||
|
app = web.Application()
|
||||||
|
app.add_routes([web.post("/table/{table_name}", self.query_handler)])
|
||||||
|
self.runner = web.AppRunner(app)
|
||||||
|
await self.runner.setup()
|
||||||
|
self.site = web.TCPSite(self.runner, "localhost", 8111)
|
||||||
|
|
||||||
|
async def start(self):
|
||||||
|
await self.site.start()
|
||||||
|
|
||||||
|
async def stop(self):
|
||||||
|
await self.runner.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="flaky somehow, fix later")
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_e2e_with_mock_server():
|
||||||
|
mock_server = MockLanceDBServer()
|
||||||
|
await mock_server.setup()
|
||||||
|
await mock_server.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = RestfulLanceDBClient("lancedb+http://localhost:8111")
|
||||||
|
df = (
|
||||||
|
await client.query(
|
||||||
|
"test_table",
|
||||||
|
VectorQuery(
|
||||||
|
vector=np.random.rand(128).tolist(),
|
||||||
|
k=10,
|
||||||
|
_metric="L2",
|
||||||
|
columns=["id", "vector"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
).to_df()
|
||||||
|
|
||||||
|
assert "vector" in df.columns
|
||||||
|
assert "id" in df.columns
|
||||||
|
finally:
|
||||||
|
# make sure we don't leak resources
|
||||||
|
await mock_server.stop()
|
||||||
35
python/tests/test_remote_db.py
Normal file
35
python/tests/test_remote_db.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
from lancedb.remote.client import VectorQuery, VectorQueryResult
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLanceDBClient:
|
||||||
|
async def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||||
|
assert table_name == "test"
|
||||||
|
t = pa.schema([]).empty_table()
|
||||||
|
return VectorQueryResult(t)
|
||||||
|
|
||||||
|
|
||||||
|
def test_remote_db():
|
||||||
|
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
|
||||||
|
setattr(conn, "_client", FakeLanceDBClient())
|
||||||
|
|
||||||
|
table = conn["test"]
|
||||||
|
table.search([1.0, 2.0]).to_df()
|
||||||
@@ -11,11 +11,17 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import functools
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import PropertyMock, patch
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
from lance.vector import vec_to_table
|
||||||
|
|
||||||
|
from lancedb.db import LanceDBConnection
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
|
|
||||||
@@ -23,6 +29,10 @@ class MockDB:
|
|||||||
def __init__(self, uri: Path):
|
def __init__(self, uri: Path):
|
||||||
self.uri = uri
|
self.uri = uri
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def is_managed_remote(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db(tmp_path) -> MockDB:
|
def db(tmp_path) -> MockDB:
|
||||||
@@ -80,7 +90,31 @@ def test_create_table(db):
|
|||||||
assert expected == tbl
|
assert expected == tbl
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_table(db):
|
||||||
|
schema = pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
|
pa.field("item", pa.string()),
|
||||||
|
pa.field("price", pa.float32()),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
tbl = LanceTable.create(db, "test", schema=schema)
|
||||||
|
data = [
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||||
|
]
|
||||||
|
tbl.add(data=data)
|
||||||
|
|
||||||
|
|
||||||
def test_add(db):
|
def test_add(db):
|
||||||
|
schema = pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
|
pa.field("item", pa.string()),
|
||||||
|
pa.field("price", pa.float64()),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
table = LanceTable.create(
|
table = LanceTable.create(
|
||||||
db,
|
db,
|
||||||
"test",
|
"test",
|
||||||
@@ -89,7 +123,19 @@ def test_add(db):
|
|||||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
_add(table, schema)
|
||||||
|
|
||||||
|
table = LanceTable.create(db, "test2", schema=schema)
|
||||||
|
table.add(
|
||||||
|
data=[
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
_add(table, schema)
|
||||||
|
|
||||||
|
|
||||||
|
def _add(table, schema):
|
||||||
# table = LanceTable(db, "test")
|
# table = LanceTable(db, "test")
|
||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|
||||||
@@ -104,13 +150,7 @@ def test_add(db):
|
|||||||
pa.array(["foo", "bar", "new"]),
|
pa.array(["foo", "bar", "new"]),
|
||||||
pa.array([10.0, 20.0, 30.0]),
|
pa.array([10.0, 20.0, 30.0]),
|
||||||
],
|
],
|
||||||
schema=pa.schema(
|
schema=schema,
|
||||||
[
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
pa.field("item", pa.string()),
|
|
||||||
pa.field("price", pa.float64()),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
assert expected == table.to_arrow()
|
assert expected == table.to_arrow()
|
||||||
|
|
||||||
@@ -136,3 +176,83 @@ def test_versioning(db):
|
|||||||
table.checkout(1)
|
table.checkout(1)
|
||||||
assert table.version == 1
|
assert table.version == 1
|
||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_index_method():
|
||||||
|
with patch.object(LanceTable, "_reset_dataset", return_value=None):
|
||||||
|
with patch.object(
|
||||||
|
LanceTable, "_dataset", new_callable=PropertyMock
|
||||||
|
) as mock_dataset:
|
||||||
|
# Setup mock responses
|
||||||
|
mock_dataset.return_value.create_index.return_value = None
|
||||||
|
|
||||||
|
# Create a LanceTable object
|
||||||
|
connection = LanceDBConnection(uri="mock.uri")
|
||||||
|
table = LanceTable(connection, "test_table")
|
||||||
|
|
||||||
|
# Call the create_index method
|
||||||
|
table.create_index(
|
||||||
|
metric="L2",
|
||||||
|
num_partitions=256,
|
||||||
|
num_sub_vectors=96,
|
||||||
|
vector_column_name="vector",
|
||||||
|
replace=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that the _dataset.create_index method was called
|
||||||
|
# with the right parameters
|
||||||
|
mock_dataset.return_value.create_index.assert_called_once_with(
|
||||||
|
column="vector",
|
||||||
|
index_type="IVF_PQ",
|
||||||
|
metric="L2",
|
||||||
|
num_partitions=256,
|
||||||
|
num_sub_vectors=96,
|
||||||
|
replace=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_with_nans(db):
|
||||||
|
# by default we raise an error on bad input vectors
|
||||||
|
bad_data = [
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [5], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, 5.0], "item": "bar", "price": 20.0},
|
||||||
|
]
|
||||||
|
for row in bad_data:
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
LanceTable.create(
|
||||||
|
db,
|
||||||
|
"error_test",
|
||||||
|
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, row],
|
||||||
|
)
|
||||||
|
|
||||||
|
table = LanceTable.create(
|
||||||
|
db,
|
||||||
|
"drop_test",
|
||||||
|
data=[
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [5], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
],
|
||||||
|
on_bad_vectors="drop",
|
||||||
|
)
|
||||||
|
assert len(table) == 1
|
||||||
|
|
||||||
|
# We can fill bad input with some value
|
||||||
|
table = LanceTable.create(
|
||||||
|
db,
|
||||||
|
"fill_test",
|
||||||
|
data=[
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
],
|
||||||
|
on_bad_vectors="fill",
|
||||||
|
fill_value=0.0,
|
||||||
|
)
|
||||||
|
assert len(table) == 3
|
||||||
|
arrow_tbl = table.to_lance().to_table(filter="item == 'bar'")
|
||||||
|
v = arrow_tbl["vector"].to_pylist()[0]
|
||||||
|
assert np.allclose(v, np.array([0.0, 0.0]))
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb-node"
|
name = "vectordb-node"
|
||||||
version = "0.1.0"
|
version = "0.1.10"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
@@ -10,12 +10,12 @@ exclude = ["index.node"]
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow-array = "37.0"
|
arrow-array = { workspace = true }
|
||||||
arrow-ipc = "37.0"
|
arrow-ipc = { workspace = true }
|
||||||
arrow-schema = "37.0"
|
arrow-schema = { workspace = true }
|
||||||
once_cell = "1"
|
once_cell = "1"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
lance = "0.4.17"
|
lance = { workspace = true }
|
||||||
vectordb = { path = "../../vectordb" }
|
vectordb = { path = "../../vectordb" }
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ fn get_index_params_builder(
|
|||||||
let ivf_params = IvfBuildParams {
|
let ivf_params = IvfBuildParams {
|
||||||
num_partitions: np,
|
num_partitions: np,
|
||||||
max_iters,
|
max_iters,
|
||||||
|
centroids: None,
|
||||||
};
|
};
|
||||||
index_builder.ivf_params(ivf_params)
|
index_builder.ivf_params(ivf_params)
|
||||||
});
|
});
|
||||||
@@ -121,6 +122,10 @@ fn get_index_params_builder(
|
|||||||
.map_err(|t| t.to_string())?
|
.map_err(|t| t.to_string())?
|
||||||
.map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
|
.map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
|
||||||
|
|
||||||
|
obj.get_opt::<JsBoolean, _, _>(cx, "replace")
|
||||||
|
.map_err(|t| t.to_string())?
|
||||||
|
.map(|s| index_builder.replace(s.value(cx)));
|
||||||
|
|
||||||
Ok(index_builder)
|
Ok(index_builder)
|
||||||
}
|
}
|
||||||
t => Err(format!("{} is not a valid index type", t).to_string()),
|
t => Err(format!("{} is not a valid index type", t).to_string()),
|
||||||
|
|||||||
@@ -17,11 +17,10 @@ use std::convert::TryFrom;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatchReader};
|
use arrow_array::{Float32Array, RecordBatchIterator, RecordBatchReader};
|
||||||
use arrow_ipc::writer::FileWriter;
|
use arrow_ipc::writer::FileWriter;
|
||||||
use futures::{TryFutureExt, TryStreamExt};
|
use futures::{TryFutureExt, TryStreamExt};
|
||||||
use lance::arrow::RecordBatchBuffer;
|
use lance::dataset::{WriteMode, WriteParams};
|
||||||
use lance::dataset::WriteMode;
|
|
||||||
use lance::index::vector::MetricType;
|
use lance::index::vector::MetricType;
|
||||||
use neon::prelude::*;
|
use neon::prelude::*;
|
||||||
use neon::types::buffer::TypedArray;
|
use neon::types::buffer::TypedArray;
|
||||||
@@ -122,6 +121,27 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
Ok(promise)
|
Ok(promise)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let db = cx
|
||||||
|
.this()
|
||||||
|
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
||||||
|
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||||
|
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
let channel = cx.channel();
|
||||||
|
let database = db.database.clone();
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
rt.spawn(async move {
|
||||||
|
let result = database.drop_table(&table_name).await;
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||||
|
Ok(cx.null())
|
||||||
|
});
|
||||||
|
});
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
|
|
||||||
fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||||
let query_obj = cx.argument::<JsObject>(0)?;
|
let query_obj = cx.argument::<JsObject>(0)?;
|
||||||
@@ -212,6 +232,17 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||||
let buffer = cx.argument::<JsBuffer>(1)?;
|
let buffer = cx.argument::<JsBuffer>(1)?;
|
||||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
||||||
|
let schema = batches[0].schema();
|
||||||
|
|
||||||
|
// Write mode
|
||||||
|
let mode = match cx.argument::<JsString>(2)?.value(&mut cx).as_str() {
|
||||||
|
"overwrite" => WriteMode::Overwrite,
|
||||||
|
"append" => WriteMode::Append,
|
||||||
|
"create" => WriteMode::Create,
|
||||||
|
_ => return cx.throw_error("Table::create only supports 'overwrite' and 'create' modes"),
|
||||||
|
};
|
||||||
|
let mut params = WriteParams::default();
|
||||||
|
params.mode = mode;
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
let rt = runtime(&mut cx)?;
|
||||||
let channel = cx.channel();
|
let channel = cx.channel();
|
||||||
@@ -220,8 +251,13 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let database = db.database.clone();
|
let database = db.database.clone();
|
||||||
|
|
||||||
rt.block_on(async move {
|
rt.block_on(async move {
|
||||||
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches));
|
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchIterator::new(
|
||||||
let table_rst = database.create_table(&table_name, batch_reader).await;
|
batches.into_iter().map(Ok),
|
||||||
|
schema,
|
||||||
|
));
|
||||||
|
let table_rst = database
|
||||||
|
.create_table(&table_name, batch_reader, Some(params))
|
||||||
|
.await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let table = Arc::new(Mutex::new(
|
let table = Arc::new(Mutex::new(
|
||||||
@@ -244,6 +280,7 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let buffer = cx.argument::<JsBuffer>(0)?;
|
let buffer = cx.argument::<JsBuffer>(0)?;
|
||||||
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
||||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
||||||
|
let schema = batches[0].schema();
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
let rt = runtime(&mut cx)?;
|
||||||
let channel = cx.channel();
|
let channel = cx.channel();
|
||||||
@@ -253,7 +290,10 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let write_mode = write_mode_map.get(write_mode.as_str()).cloned();
|
let write_mode = write_mode_map.get(write_mode.as_str()).cloned();
|
||||||
|
|
||||||
rt.block_on(async move {
|
rt.block_on(async move {
|
||||||
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches));
|
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchIterator::new(
|
||||||
|
batches.into_iter().map(Ok),
|
||||||
|
schema,
|
||||||
|
));
|
||||||
let add_result = table.lock().unwrap().add(batch_reader, write_mode).await;
|
let add_result = table.lock().unwrap().add(batch_reader, write_mode).await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
@@ -264,14 +304,56 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
Ok(promise)
|
Ok(promise)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn table_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
let channel = cx.channel();
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
let table = js_table.table.clone();
|
||||||
|
|
||||||
|
rt.block_on(async move {
|
||||||
|
let num_rows_result = table.lock().unwrap().count_rows().await;
|
||||||
|
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
let num_rows = num_rows_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||||
|
Ok(cx.number(num_rows as f64))
|
||||||
|
});
|
||||||
|
});
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn table_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
let channel = cx.channel();
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
let table = js_table.table.clone();
|
||||||
|
|
||||||
|
let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||||
|
|
||||||
|
let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });
|
||||||
|
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
delete_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||||
|
Ok(cx.undefined())
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
|
|
||||||
#[neon::main]
|
#[neon::main]
|
||||||
fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
||||||
cx.export_function("databaseNew", database_new)?;
|
cx.export_function("databaseNew", database_new)?;
|
||||||
cx.export_function("databaseTableNames", database_table_names)?;
|
cx.export_function("databaseTableNames", database_table_names)?;
|
||||||
cx.export_function("databaseOpenTable", database_open_table)?;
|
cx.export_function("databaseOpenTable", database_open_table)?;
|
||||||
|
cx.export_function("databaseDropTable", database_drop_table)?;
|
||||||
cx.export_function("tableSearch", table_search)?;
|
cx.export_function("tableSearch", table_search)?;
|
||||||
cx.export_function("tableCreate", table_create)?;
|
cx.export_function("tableCreate", table_create)?;
|
||||||
cx.export_function("tableAdd", table_add)?;
|
cx.export_function("tableAdd", table_add)?;
|
||||||
|
cx.export_function("tableCountRows", table_count_rows)?;
|
||||||
|
cx.export_function("tableDelete", table_delete)?;
|
||||||
cx.export_function(
|
cx.export_function(
|
||||||
"tableCreateVectorIndex",
|
"tableCreateVectorIndex",
|
||||||
index::vector::table_create_vector_index,
|
index::vector::table_create_vector_index,
|
||||||
|
|||||||
@@ -1,20 +1,19 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb"
|
name = "vectordb"
|
||||||
version = "0.0.1"
|
version = "0.1.10"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow-array = "37.0"
|
arrow-array = { workspace = true }
|
||||||
arrow-data = "37.0"
|
arrow-data = { workspace = true }
|
||||||
arrow-schema = "37.0"
|
arrow-schema = { workspace = true }
|
||||||
object_store = "0.5.6"
|
object_store = { workspace = true }
|
||||||
snafu = "0.7.4"
|
snafu = "0.7.4"
|
||||||
lance = "0.4.17"
|
lance = { workspace = true }
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
// Copyright 2023 LanceDB Developers.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -16,11 +16,12 @@ use std::fs::create_dir_all;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use arrow_array::RecordBatchReader;
|
use arrow_array::RecordBatchReader;
|
||||||
|
use lance::dataset::WriteParams;
|
||||||
use lance::io::object_store::ObjectStore;
|
use lance::io::object_store::ObjectStore;
|
||||||
use snafu::prelude::*;
|
use snafu::prelude::*;
|
||||||
|
|
||||||
use crate::error::{CreateDirSnafu, Result};
|
use crate::error::{CreateDirSnafu, Result};
|
||||||
use crate::table::Table;
|
use crate::table::{OpenTableParams, Table};
|
||||||
|
|
||||||
pub struct Database {
|
pub struct Database {
|
||||||
object_store: ObjectStore,
|
object_store: ObjectStore,
|
||||||
@@ -42,7 +43,7 @@ impl Database {
|
|||||||
///
|
///
|
||||||
/// * A [Database] object.
|
/// * A [Database] object.
|
||||||
pub async fn connect(uri: &str) -> Result<Database> {
|
pub async fn connect(uri: &str) -> Result<Database> {
|
||||||
let object_store = ObjectStore::new(uri).await?;
|
let (object_store, _) = ObjectStore::from_uri(uri).await?;
|
||||||
if object_store.is_local() {
|
if object_store.is_local() {
|
||||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||||
}
|
}
|
||||||
@@ -69,7 +70,7 @@ impl Database {
|
|||||||
pub async fn table_names(&self) -> Result<Vec<String>> {
|
pub async fn table_names(&self) -> Result<Vec<String>> {
|
||||||
let f = self
|
let f = self
|
||||||
.object_store
|
.object_store
|
||||||
.read_dir("/")
|
.read_dir(self.uri.as_str())
|
||||||
.await?
|
.await?
|
||||||
.iter()
|
.iter()
|
||||||
.map(|fname| Path::new(fname))
|
.map(|fname| Path::new(fname))
|
||||||
@@ -90,12 +91,19 @@ impl Database {
|
|||||||
Ok(f)
|
Ok(f)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a new table in the database.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `name` - The name of the table.
|
||||||
|
/// * `batches` - The initial data to write to the table.
|
||||||
|
/// * `params` - Optional [`WriteParams`] to create the table.
|
||||||
pub async fn create_table(
|
pub async fn create_table(
|
||||||
&self,
|
&self,
|
||||||
name: &str,
|
name: &str,
|
||||||
batches: Box<dyn RecordBatchReader>,
|
batches: Box<dyn RecordBatchReader>,
|
||||||
|
params: Option<WriteParams>,
|
||||||
) -> Result<Table> {
|
) -> Result<Table> {
|
||||||
Table::create(&self.uri, name, batches).await
|
Table::create(&self.uri, name, batches, params).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Open a table in the database.
|
/// Open a table in the database.
|
||||||
@@ -107,7 +115,35 @@ impl Database {
|
|||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
||||||
Table::open(&self.uri, name).await
|
self.open_table_with_params(name, OpenTableParams::default())
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Open a table in the database.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `name` - The name of the table.
|
||||||
|
/// * `params` - The parameters to open the table.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// * A [Table] object.
|
||||||
|
pub async fn open_table_with_params(
|
||||||
|
&self,
|
||||||
|
name: &str,
|
||||||
|
params: OpenTableParams,
|
||||||
|
) -> Result<Table> {
|
||||||
|
Table::open_with_params(&self.uri, name, params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drop a table in the database.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `name` - The name of the table.
|
||||||
|
pub async fn drop_table(&self, name: &str) -> Result<()> {
|
||||||
|
let dir_name = format!("{}/{}.{}", self.uri, name, LANCE_EXTENSION);
|
||||||
|
self.object_store.remove_dir_all(dir_name).await?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,4 +182,17 @@ mod tests {
|
|||||||
async fn test_connect_s3() {
|
async fn test_connect_s3() {
|
||||||
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn drop_table() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||||
|
|
||||||
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
let db = Database::connect(uri).await.unwrap();
|
||||||
|
db.drop_table("table1").await.unwrap();
|
||||||
|
|
||||||
|
let tables = db.table_names().await.unwrap();
|
||||||
|
assert_eq!(tables.len(), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ pub trait VectorIndexBuilder {
|
|||||||
fn get_column(&self) -> Option<String>;
|
fn get_column(&self) -> Option<String>;
|
||||||
fn get_index_name(&self) -> Option<String>;
|
fn get_index_name(&self) -> Option<String>;
|
||||||
fn build(&self) -> VectorIndexParams;
|
fn build(&self) -> VectorIndexParams;
|
||||||
|
|
||||||
|
fn get_replace(&self) -> bool;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IvfPQIndexBuilder {
|
pub struct IvfPQIndexBuilder {
|
||||||
@@ -28,6 +30,7 @@ pub struct IvfPQIndexBuilder {
|
|||||||
metric_type: Option<MetricType>,
|
metric_type: Option<MetricType>,
|
||||||
ivf_params: Option<IvfBuildParams>,
|
ivf_params: Option<IvfBuildParams>,
|
||||||
pq_params: Option<PQBuildParams>,
|
pq_params: Option<PQBuildParams>,
|
||||||
|
replace: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IvfPQIndexBuilder {
|
impl IvfPQIndexBuilder {
|
||||||
@@ -38,6 +41,7 @@ impl IvfPQIndexBuilder {
|
|||||||
metric_type: None,
|
metric_type: None,
|
||||||
ivf_params: None,
|
ivf_params: None,
|
||||||
pq_params: None,
|
pq_params: None,
|
||||||
|
replace: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -67,6 +71,11 @@ impl IvfPQIndexBuilder {
|
|||||||
self.pq_params = Some(pq_params);
|
self.pq_params = Some(pq_params);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn replace(&mut self, replace: bool) -> &mut IvfPQIndexBuilder {
|
||||||
|
self.replace = replace;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VectorIndexBuilder for IvfPQIndexBuilder {
|
impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||||
@@ -84,6 +93,10 @@ impl VectorIndexBuilder for IvfPQIndexBuilder {
|
|||||||
|
|
||||||
VectorIndexParams::with_ivf_pq_params(pq_params.metric_type, ivf_params, pq_params)
|
VectorIndexParams::with_ivf_pq_params(pq_params.metric_type, ivf_params, pq_params)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_replace(&self) -> bool {
|
||||||
|
self.replace
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -17,3 +17,8 @@ pub mod error;
|
|||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod table;
|
pub mod table;
|
||||||
|
|
||||||
|
pub use database::Database;
|
||||||
|
pub use table::Table;
|
||||||
|
|
||||||
|
pub use lance::dataset::WriteMode;
|
||||||
|
|||||||
@@ -74,9 +74,7 @@ impl Query {
|
|||||||
)?;
|
)?;
|
||||||
scanner.nprobs(self.nprobes);
|
scanner.nprobs(self.nprobes);
|
||||||
scanner.use_index(self.use_index);
|
scanner.use_index(self.use_index);
|
||||||
self.select
|
self.select.as_ref().map(|p| scanner.project(p.as_slice()));
|
||||||
.as_ref()
|
|
||||||
.map(|p| scanner.project(p.as_slice()));
|
|
||||||
self.filter.as_ref().map(|f| scanner.filter(f));
|
self.filter.as_ref().map(|f| scanner.filter(f));
|
||||||
self.refine_factor.map(|rf| scanner.refine(rf));
|
self.refine_factor.map(|rf| scanner.refine(rf));
|
||||||
self.metric_type.map(|mt| scanner.distance_metric(mt));
|
self.metric_type.map(|mt| scanner.distance_metric(mt));
|
||||||
@@ -166,9 +164,8 @@ impl Query {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatch, RecordBatchReader};
|
use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||||
use lance::arrow::RecordBatchBuffer;
|
|
||||||
use lance::dataset::Dataset;
|
use lance::dataset::Dataset;
|
||||||
use lance::index::vector::MetricType;
|
use lance::index::vector::MetricType;
|
||||||
|
|
||||||
@@ -176,8 +173,8 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_setters_getters() {
|
async fn test_setters_getters() {
|
||||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let mut batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let ds = Dataset::write(&mut batches, ":memory:", None)
|
let ds = Dataset::write(&mut batches, "memory://foo", None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -205,8 +202,8 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_execute() {
|
async fn test_execute() {
|
||||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let mut batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let ds = Dataset::write(&mut batches, ":memory:", None)
|
let ds = Dataset::write(&mut batches, "memory://foo", None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@@ -216,7 +213,7 @@ mod tests {
|
|||||||
assert_eq!(result.is_ok(), true);
|
assert_eq!(result.is_ok(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_test_batches() -> RecordBatchBuffer {
|
fn make_test_batches() -> Box<dyn RecordBatchReader> {
|
||||||
let dim: usize = 128;
|
let dim: usize = 128;
|
||||||
let schema = Arc::new(ArrowSchema::new(vec![
|
let schema = Arc::new(ArrowSchema::new(vec![
|
||||||
ArrowField::new("key", DataType::Int32, false),
|
ArrowField::new("key", DataType::Int32, false),
|
||||||
@@ -230,7 +227,11 @@ mod tests {
|
|||||||
),
|
),
|
||||||
ArrowField::new("uri", DataType::Utf8, true),
|
ArrowField::new("uri", DataType::Utf8, true),
|
||||||
]));
|
]));
|
||||||
|
Box::new(RecordBatchIterator::new(
|
||||||
RecordBatchBuffer::new(vec![RecordBatch::new_empty(schema.clone())])
|
vec![RecordBatch::new_empty(schema.clone())]
|
||||||
|
.into_iter()
|
||||||
|
.map(Ok),
|
||||||
|
schema,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
// Copyright 2023 LanceDB Developers.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -16,19 +16,20 @@ use std::path::Path;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatchReader};
|
use arrow_array::{Float32Array, RecordBatchReader};
|
||||||
use lance::dataset::{Dataset, WriteMode, WriteParams};
|
use lance::dataset::{Dataset, ReadParams, WriteParams};
|
||||||
use lance::index::IndexType;
|
use lance::index::IndexType;
|
||||||
use snafu::prelude::*;
|
use snafu::prelude::*;
|
||||||
|
|
||||||
use crate::error::{Error, InvalidTableNameSnafu, Result};
|
use crate::error::{Error, InvalidTableNameSnafu, Result};
|
||||||
use crate::index::vector::VectorIndexBuilder;
|
use crate::index::vector::VectorIndexBuilder;
|
||||||
|
use crate::WriteMode;
|
||||||
use crate::query::Query;
|
use crate::query::Query;
|
||||||
|
|
||||||
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
||||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||||
|
|
||||||
/// A table in a LanceDB database.
|
/// A table in a LanceDB database.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Table {
|
pub struct Table {
|
||||||
name: String,
|
name: String,
|
||||||
uri: String,
|
uri: String,
|
||||||
@@ -41,6 +42,11 @@ impl std::fmt::Display for Table {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct OpenTableParams {
|
||||||
|
pub open_table_params: ReadParams,
|
||||||
|
}
|
||||||
|
|
||||||
impl Table {
|
impl Table {
|
||||||
/// Opens an existing Table
|
/// Opens an existing Table
|
||||||
///
|
///
|
||||||
@@ -53,6 +59,25 @@ impl Table {
|
|||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
|
pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
|
||||||
|
Self::open_with_params(base_uri, name, OpenTableParams::default()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Opens an existing Table
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `base_path` - The base path where the table is located
|
||||||
|
/// * `name` The Table name
|
||||||
|
/// * `params` The [OpenTableParams] to use when opening the table
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// * A [Table] object.
|
||||||
|
pub async fn open_with_params(
|
||||||
|
base_uri: &str,
|
||||||
|
name: &str,
|
||||||
|
params: OpenTableParams,
|
||||||
|
) -> Result<Self> {
|
||||||
let path = Path::new(base_uri);
|
let path = Path::new(base_uri);
|
||||||
|
|
||||||
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||||
@@ -61,14 +86,16 @@ impl Table {
|
|||||||
.to_str()
|
.to_str()
|
||||||
.context(InvalidTableNameSnafu { name })?;
|
.context(InvalidTableNameSnafu { name })?;
|
||||||
|
|
||||||
let dataset = Dataset::open(&uri).await.map_err(|e| match e {
|
let dataset = Dataset::open_with_params(uri, ¶ms.open_table_params)
|
||||||
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
|
.await
|
||||||
name: name.to_string(),
|
.map_err(|e| match e {
|
||||||
},
|
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
|
||||||
e => Error::Lance {
|
name: name.to_string(),
|
||||||
message: e.to_string(),
|
},
|
||||||
},
|
e => Error::Lance {
|
||||||
})?;
|
message: e.to_string(),
|
||||||
|
},
|
||||||
|
})?;
|
||||||
Ok(Table {
|
Ok(Table {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
uri: uri.to_string(),
|
uri: uri.to_string(),
|
||||||
@@ -91,6 +118,7 @@ impl Table {
|
|||||||
base_uri: &str,
|
base_uri: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
mut batches: Box<dyn RecordBatchReader>,
|
mut batches: Box<dyn RecordBatchReader>,
|
||||||
|
params: Option<WriteParams>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let base_path = Path::new(base_uri);
|
let base_path = Path::new(base_uri);
|
||||||
let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||||
@@ -99,7 +127,7 @@ impl Table {
|
|||||||
.to_str()
|
.to_str()
|
||||||
.context(InvalidTableNameSnafu { name })?
|
.context(InvalidTableNameSnafu { name })?
|
||||||
.to_string();
|
.to_string();
|
||||||
let dataset = Dataset::write(&mut batches, &uri, Some(WriteParams::default()))
|
let dataset = Dataset::write(&mut batches, &uri, params)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| match e {
|
.map_err(|e| match e {
|
||||||
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
|
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
|
||||||
@@ -130,6 +158,7 @@ impl Table {
|
|||||||
IndexType::Vector,
|
IndexType::Vector,
|
||||||
index_builder.get_index_name(),
|
index_builder.get_index_name(),
|
||||||
&index_builder.build(),
|
&index_builder.build(),
|
||||||
|
index_builder.get_replace(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
self.dataset = Arc::new(dataset);
|
self.dataset = Arc::new(dataset);
|
||||||
@@ -174,21 +203,44 @@ impl Table {
|
|||||||
pub async fn count_rows(&self) -> Result<usize> {
|
pub async fn count_rows(&self) -> Result<usize> {
|
||||||
Ok(self.dataset.count_rows().await?)
|
Ok(self.dataset.count_rows().await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Merge new data into this table.
|
||||||
|
pub async fn merge(
|
||||||
|
&mut self,
|
||||||
|
mut batches: Box<dyn RecordBatchReader>,
|
||||||
|
left_on: &str,
|
||||||
|
right_on: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut dataset = self.dataset.as_ref().clone();
|
||||||
|
dataset.merge(&mut batches, left_on, right_on).await?;
|
||||||
|
self.dataset = Arc::new(dataset);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete rows from the table
|
||||||
|
pub async fn delete(&mut self, predicate: &str) -> Result<()> {
|
||||||
|
let mut dataset = self.dataset.as_ref().clone();
|
||||||
|
dataset.delete(predicate).await?;
|
||||||
|
self.dataset = Arc::new(dataset);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchReader,
|
Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchIterator,
|
||||||
|
RecordBatchReader,
|
||||||
};
|
};
|
||||||
use arrow_data::ArrayDataBuilder;
|
use arrow_data::ArrayDataBuilder;
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use lance::arrow::RecordBatchBuffer;
|
|
||||||
use lance::dataset::{Dataset, WriteMode};
|
use lance::dataset::{Dataset, WriteMode};
|
||||||
use lance::index::vector::ivf::IvfBuildParams;
|
use lance::index::vector::ivf::IvfBuildParams;
|
||||||
use lance::index::vector::pq::PQBuildParams;
|
use lance::index::vector::pq::PQBuildParams;
|
||||||
|
use lance::io::object_store::{ObjectStoreParams, WrappingObjectStore};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
@@ -201,7 +253,7 @@ mod tests {
|
|||||||
let dataset_path = tmp_dir.path().join("test.lance");
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let mut batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -232,12 +284,12 @@ mod tests {
|
|||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let schema = batches.schema().clone();
|
let _ = batches.schema().clone();
|
||||||
Table::create(&uri, "test", batches).await.unwrap();
|
Table::create(&uri, "test", batches, None).await.unwrap();
|
||||||
|
|
||||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let result = Table::create(&uri, "test", batches).await;
|
let result = Table::create(&uri, "test", batches, None).await;
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
result.unwrap_err(),
|
result.unwrap_err(),
|
||||||
Error::TableAlreadyExists { .. }
|
Error::TableAlreadyExists { .. }
|
||||||
@@ -249,17 +301,21 @@ mod tests {
|
|||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let schema = batches.schema().clone();
|
let schema = batches.schema().clone();
|
||||||
let mut table = Table::create(&uri, "test", batches).await.unwrap();
|
let mut table = Table::create(&uri, "test", batches, None).await.unwrap();
|
||||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||||
|
|
||||||
let new_batches: Box<dyn RecordBatchReader> =
|
let new_batches: Box<dyn RecordBatchReader> = Box::new(RecordBatchIterator::new(
|
||||||
Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
|
vec![RecordBatch::try_new(
|
||||||
schema,
|
schema.clone(),
|
||||||
vec![Arc::new(Int32Array::from_iter_values(100..110))],
|
vec![Arc::new(Int32Array::from_iter_values(100..110))],
|
||||||
)
|
)
|
||||||
.unwrap()]));
|
.unwrap()]
|
||||||
|
.into_iter()
|
||||||
|
.map(Ok),
|
||||||
|
schema.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
table.add(new_batches, None).await.unwrap();
|
table.add(new_batches, None).await.unwrap();
|
||||||
assert_eq!(table.count_rows().await.unwrap(), 20);
|
assert_eq!(table.count_rows().await.unwrap(), 20);
|
||||||
@@ -271,17 +327,21 @@ mod tests {
|
|||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
let schema = batches.schema().clone();
|
let schema = batches.schema().clone();
|
||||||
let mut table = Table::create(uri, "test", batches).await.unwrap();
|
let mut table = Table::create(uri, "test", batches, None).await.unwrap();
|
||||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||||
|
|
||||||
let new_batches: Box<dyn RecordBatchReader> =
|
let new_batches: Box<dyn RecordBatchReader> = Box::new(RecordBatchIterator::new(
|
||||||
Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
|
vec![RecordBatch::try_new(
|
||||||
schema,
|
schema.clone(),
|
||||||
vec![Arc::new(Int32Array::from_iter_values(100..110))],
|
vec![Arc::new(Int32Array::from_iter_values(100..110))],
|
||||||
)
|
)
|
||||||
.unwrap()]));
|
.unwrap()]
|
||||||
|
.into_iter()
|
||||||
|
.map(Ok),
|
||||||
|
schema.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
table
|
table
|
||||||
.add(new_batches, Some(WriteMode::Overwrite))
|
.add(new_batches, Some(WriteMode::Overwrite))
|
||||||
@@ -297,7 +357,7 @@ mod tests {
|
|||||||
let dataset_path = tmp_dir.path().join("test.lance");
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
let mut batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -309,13 +369,63 @@ mod tests {
|
|||||||
assert_eq!(vector, query.query_vector);
|
assert_eq!(vector, query.query_vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_test_batches() -> RecordBatchBuffer {
|
#[derive(Default)]
|
||||||
|
struct NoOpCacheWrapper {
|
||||||
|
called: AtomicBool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NoOpCacheWrapper {
|
||||||
|
fn called(&self) -> bool {
|
||||||
|
self.called.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WrappingObjectStore for NoOpCacheWrapper {
|
||||||
|
fn wrap(
|
||||||
|
&self,
|
||||||
|
original: Arc<dyn object_store::ObjectStore>,
|
||||||
|
) -> Arc<dyn object_store::ObjectStore> {
|
||||||
|
self.called.store(true, Ordering::Relaxed);
|
||||||
|
return original;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_open_table_options() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
let mut batches: Box<dyn RecordBatchReader> = make_test_batches();
|
||||||
|
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let wrapper = Arc::new(NoOpCacheWrapper::default());
|
||||||
|
|
||||||
|
let mut object_store_params = ObjectStoreParams::default();
|
||||||
|
object_store_params.object_store_wrapper = Some(wrapper.clone());
|
||||||
|
let param = OpenTableParams {
|
||||||
|
open_table_params: ReadParams {
|
||||||
|
store_options: Some(object_store_params),
|
||||||
|
..ReadParams::default()
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(!wrapper.called());
|
||||||
|
let _ = Table::open_with_params(uri, "test", param).await.unwrap();
|
||||||
|
assert!(wrapper.called());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_test_batches() -> Box<dyn RecordBatchReader> {
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)]));
|
let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)]));
|
||||||
RecordBatchBuffer::new(vec![RecordBatch::try_new(
|
Box::new(RecordBatchIterator::new(
|
||||||
schema.clone(),
|
vec![RecordBatch::try_new(
|
||||||
vec![Arc::new(Int32Array::from_iter_values(0..10))],
|
schema.clone(),
|
||||||
)
|
vec![Arc::new(Int32Array::from_iter_values(0..10))],
|
||||||
.unwrap()])
|
)],
|
||||||
|
schema,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -348,14 +458,15 @@ mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let vectors = Arc::new(create_fixed_size_list(float_arr, dimension).unwrap());
|
let vectors = Arc::new(create_fixed_size_list(float_arr, dimension).unwrap());
|
||||||
let batches = RecordBatchBuffer::new(vec![RecordBatch::try_new(
|
let batches = RecordBatchIterator::new(
|
||||||
schema.clone(),
|
vec![RecordBatch::try_new(schema.clone(), vec![vectors.clone()]).unwrap()]
|
||||||
vec![vectors.clone()],
|
.into_iter()
|
||||||
)
|
.map(Ok),
|
||||||
.unwrap()]);
|
schema,
|
||||||
|
);
|
||||||
|
|
||||||
let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches);
|
let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches);
|
||||||
let mut table = Table::create(uri, "test", reader).await.unwrap();
|
let mut table = Table::create(uri, "test", reader, None).await.unwrap();
|
||||||
|
|
||||||
let mut i = IvfPQIndexBuilder::new();
|
let mut i = IvfPQIndexBuilder::new();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user