mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
102 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1cbfc1bbf4 | ||
|
|
a2bb497135 | ||
|
|
0cf40c8da3 | ||
|
|
8233c689c3 | ||
|
|
6e24e731b8 | ||
|
|
f4ce86e12c | ||
|
|
0664eaec82 | ||
|
|
63acdc2069 | ||
|
|
a636bb1075 | ||
|
|
5e3167da83 | ||
|
|
f09db4a6d6 | ||
|
|
1d343edbd4 | ||
|
|
980f910f50 | ||
|
|
fb97b03a51 | ||
|
|
141b6647a8 | ||
|
|
b45ac4608f | ||
|
|
a86bc05131 | ||
|
|
3537afb2c3 | ||
|
|
23f5dddc7c | ||
|
|
9748406cba | ||
|
|
6271949d38 | ||
|
|
131ad09ab3 | ||
|
|
030f07e7f0 | ||
|
|
72afa06b7a | ||
|
|
088e745e1d | ||
|
|
7a57cddb2c | ||
|
|
8ff5f88916 | ||
|
|
028a6e433d | ||
|
|
04c6814fb1 | ||
|
|
c62e4ca1eb | ||
|
|
aecc5fc42b | ||
|
|
2fdcb307eb | ||
|
|
ad18826579 | ||
|
|
a8a50591d7 | ||
|
|
6dfe7fabc2 | ||
|
|
2b108e1c80 | ||
|
|
8c9edafccc | ||
|
|
0590413b96 | ||
|
|
bd2d40a927 | ||
|
|
08944bf4fd | ||
|
|
826dc90151 | ||
|
|
08cc483ec9 | ||
|
|
ff1d206182 | ||
|
|
c385c55629 | ||
|
|
0a03f7ca5a | ||
|
|
88be978e87 | ||
|
|
98b12caa06 | ||
|
|
091dffb171 | ||
|
|
ace6aa883a | ||
|
|
80c25f9896 | ||
|
|
caf22fdb71 | ||
|
|
0e7ae5dfbf | ||
|
|
b261e27222 | ||
|
|
9f603f73a9 | ||
|
|
9ef846929b | ||
|
|
97364a2514 | ||
|
|
e6c6da6104 | ||
|
|
a5eb665b7d | ||
|
|
e2325c634b | ||
|
|
507eeae9c8 | ||
|
|
bb3df62dce | ||
|
|
dc7146b2cb | ||
|
|
d701947f0b | ||
|
|
3c46d7f268 | ||
|
|
9600a38ff0 | ||
|
|
148ed82607 | ||
|
|
fc725c99f0 | ||
|
|
a6bdffd75b | ||
|
|
051c03c3c9 | ||
|
|
39479dcf8e | ||
|
|
b731a6aed9 | ||
|
|
0f58bd7af2 | ||
|
|
01abf82808 | ||
|
|
eb5bcda337 | ||
|
|
4bc676e26a | ||
|
|
c68c236f17 | ||
|
|
313e66c4c5 | ||
|
|
e850df56f1 | ||
|
|
8c5507075c | ||
|
|
0e4c52b8a6 | ||
|
|
c8bebf4776 | ||
|
|
c14ad91df0 | ||
|
|
ad48242ffb | ||
|
|
1a9a392e20 | ||
|
|
b489edc576 | ||
|
|
8708fde3ef | ||
|
|
cc7e54298b | ||
|
|
d1e8a97a2a | ||
|
|
01dadb0862 | ||
|
|
0724d41c4b | ||
|
|
cbb56e25ab | ||
|
|
78de8f5782 | ||
|
|
a6544c2a31 | ||
|
|
39ed70896a | ||
|
|
ae672df1b7 | ||
|
|
15c3f42387 | ||
|
|
f65d85efcc | ||
|
|
6b5c046c3b | ||
|
|
d00f4e51d0 | ||
|
|
fbc44d4243 | ||
|
|
b53eee42ce | ||
|
|
7e0d6088ca |
12
.bumpversion.cfg
Normal file
12
.bumpversion.cfg
Normal file
@@ -0,0 +1,12 @@
|
||||
[bumpversion]
|
||||
current_version = 0.1.15
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
tag_name = v{new_version}
|
||||
|
||||
[bumpversion:file:node/package.json]
|
||||
|
||||
[bumpversion:file:rust/ffi/node/Cargo.toml]
|
||||
|
||||
[bumpversion:file:rust/vectordb/Cargo.toml]
|
||||
29
.github/workflows/cargo-publish.yml
vendored
Normal file
29
.github/workflows/cargo-publish.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Cargo Publish
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [ published ]
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 30
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Publish the package
|
||||
run: |
|
||||
cargo publish -p vectordb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
22
.github/workflows/docs.yml
vendored
22
.github/workflows/docs.yml
vendored
@@ -39,6 +39,28 @@ jobs:
|
||||
run: |
|
||||
python -m pip install -e .
|
||||
python -m pip install -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install node dependencies
|
||||
working-directory: node
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build node
|
||||
working-directory: node
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
- name: Create markdown files
|
||||
working-directory: node
|
||||
run: |
|
||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
||||
- name: Build docs
|
||||
run: |
|
||||
PYTHONPATH=. mkdocs build -f docs/mkdocs.yml
|
||||
|
||||
93
.github/workflows/docs_test.yml
vendored
Normal file
93
.github/workflows/docs_test.yml
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
name: Documentation Code Testing
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- docs/**
|
||||
- .github/workflows/docs_test.yml
|
||||
pull_request:
|
||||
paths:
|
||||
- docs/**
|
||||
- .github/workflows/docs_test.yml
|
||||
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
test-python:
|
||||
name: Test doc python code
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-minor-version: [ "11" ]
|
||||
os: ["ubuntu-22.04"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.${{ matrix.python-minor-version }}
|
||||
cache: "pip"
|
||||
cache-dependency-path: "docs/test/requirements.txt"
|
||||
- name: Build Python
|
||||
working-directory: docs/test
|
||||
run:
|
||||
python -m pip install -r requirements.txt
|
||||
- name: Create test files
|
||||
run: |
|
||||
cd docs/test
|
||||
python md_testing.py
|
||||
- name: Test
|
||||
run: |
|
||||
cd docs/test/python
|
||||
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
|
||||
test-node:
|
||||
name: Test doc nodejs code
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [ "18" ]
|
||||
os: ["ubuntu-22.04"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
- name: Install dependecies needed for ubuntu
|
||||
if: ${{ matrix.os == 'ubuntu-22.04' }}
|
||||
run: |
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Install node dependencies
|
||||
run: |
|
||||
cd docs/test
|
||||
npm install
|
||||
- name: Rust cache
|
||||
uses: swatinem/rust-cache@v2
|
||||
- name: Install LanceDB
|
||||
run: |
|
||||
cd docs/test/node_modules/vectordb
|
||||
npm ci
|
||||
npm run build-release
|
||||
npm run tsc
|
||||
- name: Create test files
|
||||
run: |
|
||||
cd docs/test
|
||||
node md_testing.js
|
||||
- name: Test
|
||||
run: |
|
||||
cd docs/test/node
|
||||
for d in *; do cd "$d"; echo "$d".js; node "$d".js; cd ..; done
|
||||
49
.github/workflows/make-release-commit.yml
vendored
Normal file
49
.github/workflows/make-release-commit.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Create release commit
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: 'Dry run (create the local commit/tags but do not push it)'
|
||||
required: true
|
||||
default: "false"
|
||||
type: choice
|
||||
options:
|
||||
- "true"
|
||||
- "false"
|
||||
part:
|
||||
description: 'What kind of release is this?'
|
||||
required: true
|
||||
default: 'patch'
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out main
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set git configs for bumpversion
|
||||
shell: bash
|
||||
run: |
|
||||
git config user.name 'Lance Release'
|
||||
git config user.email 'lance-dev@lancedb.com'
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Bump version, create tag and commit
|
||||
run: |
|
||||
pip install bump2version
|
||||
bumpversion --verbose ${{ inputs.part }}
|
||||
git push
|
||||
- uses: ./.github/workflows/update_package_lock
|
||||
|
||||
12
.github/workflows/node.yml
vendored
12
.github/workflows/node.yml
vendored
@@ -67,8 +67,12 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
npm run build
|
||||
npm run pack-build
|
||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
||||
# Remove index.node to test with dependency installed
|
||||
rm index.node
|
||||
- name: Test
|
||||
run: npm run test
|
||||
macos:
|
||||
@@ -94,8 +98,12 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
npm run build
|
||||
npm run pack-build
|
||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
||||
# Remove index.node to test with dependency installed
|
||||
rm index.node
|
||||
- name: Test
|
||||
run: |
|
||||
npm run test
|
||||
|
||||
180
.github/workflows/npm-publish.yml
vendored
Normal file
180
.github/workflows/npm-publish.yml
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
name: NPM Publish
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [ published ]
|
||||
|
||||
jobs:
|
||||
node:
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: node
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run tsc
|
||||
npm pack
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: node-package
|
||||
path: |
|
||||
node/lancedb-vectordb-*.tgz
|
||||
|
||||
node-macos:
|
||||
runs-on: macos-12
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
target: [x86_64-apple-darwin, aarch64-apple-darwin]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Install system dependencies
|
||||
run: brew install protobuf
|
||||
- name: Install npm dependencies
|
||||
run: |
|
||||
cd node
|
||||
npm ci
|
||||
- name: Install rustup target
|
||||
if: ${{ matrix.target == 'aarch64-apple-darwin' }}
|
||||
run: rustup target add aarch64-apple-darwin
|
||||
- name: Build MacOS native node modules
|
||||
run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
|
||||
- name: Upload Darwin Artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: native-darwin
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-darwin*.tgz
|
||||
|
||||
node-linux:
|
||||
name: node-linux (${{ matrix.arch}}-unknown-linux-${{ matrix.libc }})
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
libc:
|
||||
- gnu
|
||||
# TODO: re-enable musl once we have refactored to pre-built containers
|
||||
# Right now we have to build node from source which is too expensive.
|
||||
# - musl
|
||||
arch:
|
||||
- x86_64
|
||||
# Building on aarch64 is too slow for now
|
||||
# - aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Change owner to root (for npm)
|
||||
# The docker container is run as root, so we need the files to be owned by root
|
||||
# Otherwise npm is a nightmare: https://github.com/npm/cli/issues/3773
|
||||
run: sudo chown -R root:root .
|
||||
- name: Set up QEMU
|
||||
if: ${{ matrix.arch == 'aarch64' }}
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: arm64
|
||||
- name: Build Linux GNU native node modules
|
||||
if: ${{ matrix.libc == 'gnu' }}
|
||||
run: |
|
||||
docker run \
|
||||
-v $(pwd):/io -w /io \
|
||||
rust:1.70-bookworm \
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-gnu
|
||||
- name: Build musl Linux native node modules
|
||||
if: ${{ matrix.libc == 'musl' }}
|
||||
run: |
|
||||
docker run --platform linux/arm64/v8 \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/musllinux_1_1_${{ matrix.arch }} \
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-musl
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: native-linux
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-linux*.tgz
|
||||
|
||||
node-windows:
|
||||
runs-on: windows-2022
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
target: [x86_64-pc-windows-msvc]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Install Protoc v21.12
|
||||
working-directory: C:\
|
||||
run: |
|
||||
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||
Set-Location C:\protoc
|
||||
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||
7z x protoc.zip
|
||||
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||
shell: powershell
|
||||
- name: Install npm dependencies
|
||||
run: |
|
||||
cd node
|
||||
npm ci
|
||||
- name: Build Windows native node modules
|
||||
run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
|
||||
- name: Upload Windows Artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: native-windows
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-win32*.tgz
|
||||
|
||||
release:
|
||||
needs: [node, node-macos, node-linux, node-windows]
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
- name: Publish to NPM
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||
run: |
|
||||
mv */*.tgz .
|
||||
for filename in *.tgz; do
|
||||
npm publish $filename
|
||||
done
|
||||
|
||||
update-package-lock:
|
||||
needs: [release]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- uses: ./.github/workflows/update_package_lock
|
||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -3,12 +3,12 @@ name: PyPI Publish
|
||||
on:
|
||||
release:
|
||||
types: [ published ]
|
||||
tags:
|
||||
- 'python-v*' # Push events that matches the python-make-release action
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the python-make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
8
.github/workflows/python.yml
vendored
8
.github/workflows/python.yml
vendored
@@ -32,9 +32,11 @@ jobs:
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
pip install pytest pytest-mock black
|
||||
pip install pytest pytest-mock black isort
|
||||
- name: Black
|
||||
run: black --check --diff --no-color --quiet .
|
||||
- name: isort
|
||||
run: isort --check --diff --quiet .
|
||||
- name: Run tests
|
||||
run: pytest -x -v --durations=30 tests
|
||||
- name: doctest
|
||||
@@ -59,6 +61,8 @@ jobs:
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
pip install pytest pytest-mock
|
||||
pip install pytest pytest-mock black
|
||||
- name: Black
|
||||
run: black --check --diff --no-color --quiet .
|
||||
- name: Run tests
|
||||
run: pytest -x -v --durations=30 tests
|
||||
89
.github/workflows/rust.yml
vendored
Normal file
89
.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
timeout-minutes: 30
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: rust
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
macos:
|
||||
runs-on: macos-12
|
||||
timeout-minutes: 30
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: rust
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: CPU features
|
||||
run: sysctl -a | grep cpu
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: brew install protobuf
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
windows:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install Protoc v21.12
|
||||
working-directory: C:\
|
||||
run: |
|
||||
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||
Set-Location C:\protoc
|
||||
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||
7z x protoc.zip
|
||||
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||
shell: powershell
|
||||
- name: Run tests
|
||||
run: |
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo build
|
||||
cargo test
|
||||
22
.github/workflows/update_package_lock/action.yml
vendored
Normal file
22
.github/workflows/update_package_lock/action.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: update_package_lock
|
||||
description: "Update node's package.lock"
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
- name: Set git configs
|
||||
shell: bash
|
||||
run: |
|
||||
git config user.name 'Lance Release'
|
||||
git config user.email 'lance-dev@lancedb.com'
|
||||
- name: Update package-lock.json file
|
||||
working-directory: ./node
|
||||
run: |
|
||||
npm install
|
||||
git add package-lock.json
|
||||
git commit -m "Updating package-lock.json"
|
||||
git push
|
||||
shell: bash
|
||||
12
.github/workflows/update_package_lock_run.yml
vendored
Normal file
12
.github/workflows/update_package_lock_run.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
name: Update package-lock.json
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- uses: ./.github/workflows/update_package_lock
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -3,6 +3,9 @@
|
||||
*.egg-info
|
||||
**/__pycache__
|
||||
.DS_Store
|
||||
venv
|
||||
|
||||
.vscode
|
||||
|
||||
rust/target
|
||||
rust/Cargo.lock
|
||||
@@ -30,3 +33,4 @@ node/examples/**/dist
|
||||
## Rust
|
||||
target
|
||||
|
||||
Cargo.lock
|
||||
@@ -9,3 +9,13 @@ repos:
|
||||
rev: 22.12.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.0.277
|
||||
hooks:
|
||||
- id: ruff
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort (python)
|
||||
3797
Cargo.lock
generated
3797
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
10
Cargo.toml
10
Cargo.toml
@@ -4,3 +4,13 @@ members = [
|
||||
"rust/ffi/node"
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = "=0.5.8"
|
||||
arrow-array = "42.0"
|
||||
arrow-data = "42.0"
|
||||
arrow-schema = "42.0"
|
||||
arrow-ipc = "42.0"
|
||||
half = { "version" = "=2.2.1", default-features = false }
|
||||
object_store = "0.6.1"
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ pip install lancedb
|
||||
```python
|
||||
import lancedb
|
||||
|
||||
uri = "/tmp/lancedb"
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
table = db.create_table("my_table",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
|
||||
72
ci/build_linux_artifacts.sh
Normal file
72
ci/build_linux_artifacts.sh
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
# Builds the Linux artifacts (node binaries).
|
||||
# Usage: ./build_linux_artifacts.sh [target]
|
||||
# Targets supported:
|
||||
# - x86_64-unknown-linux-gnu:centos
|
||||
# - aarch64-unknown-linux-gnu:centos
|
||||
# - aarch64-unknown-linux-musl
|
||||
# - x86_64-unknown-linux-musl
|
||||
|
||||
# TODO: refactor this into a Docker container we can pull
|
||||
|
||||
set -e
|
||||
|
||||
setup_dependencies() {
|
||||
echo "Installing system dependencies..."
|
||||
if [[ $1 == *musl ]]; then
|
||||
# musllinux
|
||||
apk add openssl-dev
|
||||
else
|
||||
# rust / debian
|
||||
apt update
|
||||
apt install -y libssl-dev protobuf-compiler
|
||||
fi
|
||||
}
|
||||
|
||||
install_node() {
|
||||
echo "Installing node..."
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
|
||||
source "$HOME"/.bashrc
|
||||
|
||||
if [[ $1 == *musl ]]; then
|
||||
# This node version is 15, we need 16 or higher:
|
||||
# apk add nodejs-current npm
|
||||
# So instead we install from source (nvm doesn't provide binaries for musl):
|
||||
nvm install -s --no-progress 17
|
||||
else
|
||||
nvm install --no-progress 17 # latest that supports glibc 2.17
|
||||
fi
|
||||
}
|
||||
|
||||
build_node_binary() {
|
||||
echo "Building node library for $1..."
|
||||
pushd node
|
||||
|
||||
npm ci
|
||||
|
||||
if [[ $1 == *musl ]]; then
|
||||
# This is needed for cargo to allow build cdylibs with musl
|
||||
export RUSTFLAGS="-C target-feature=-crt-static"
|
||||
fi
|
||||
|
||||
# Cargo can run out of memory while pulling dependencies, especially when running
|
||||
# in QEMU. This is a workaround for that.
|
||||
export CARGO_NET_GIT_FETCH_WITH_CLI=true
|
||||
|
||||
# We don't pass in target, since the native target here already matches
|
||||
# We need to pass OPENSSL_LIB_DIR and OPENSSL_INCLUDE_DIR for static build to work https://github.com/sfackler/rust-openssl/issues/877
|
||||
OPENSSL_STATIC=1 OPENSSL_LIB_DIR=/usr/lib/x86_64-linux-gnu OPENSSL_INCLUDE_DIR=/usr/include/openssl/ npm run build-release
|
||||
npm run pack-build
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
TARGET=${1:-x86_64-unknown-linux-gnu}
|
||||
# Others:
|
||||
# aarch64-unknown-linux-gnu
|
||||
# x86_64-unknown-linux-musl
|
||||
# aarch64-unknown-linux-musl
|
||||
|
||||
setup_dependencies $TARGET
|
||||
install_node $TARGET
|
||||
build_node_binary $TARGET
|
||||
33
ci/build_macos_artifacts.sh
Normal file
33
ci/build_macos_artifacts.sh
Normal file
@@ -0,0 +1,33 @@
|
||||
# Builds the macOS artifacts (node binaries).
|
||||
# Usage: ./ci/build_macos_artifacts.sh [target]
|
||||
# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
|
||||
|
||||
prebuild_rust() {
|
||||
# Building here for the sake of easier debugging.
|
||||
pushd rust/ffi/node
|
||||
echo "Building rust library for $1"
|
||||
export RUST_BACKTRACE=1
|
||||
cargo build --release --target $1
|
||||
popd
|
||||
}
|
||||
|
||||
build_node_binaries() {
|
||||
pushd node
|
||||
echo "Building node library for $1"
|
||||
npm run build-release -- --target $1
|
||||
npm run pack-build -- --target $1
|
||||
popd
|
||||
}
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
targets=$1
|
||||
else
|
||||
targets="x86_64-apple-darwin aarch64-apple-darwin"
|
||||
fi
|
||||
|
||||
echo "Building artifacts for targets: $targets"
|
||||
for target in $targets
|
||||
do
|
||||
prebuild_rust $target
|
||||
build_node_binaries $target
|
||||
done
|
||||
41
ci/build_windows_artifacts.ps1
Normal file
41
ci/build_windows_artifacts.ps1
Normal file
@@ -0,0 +1,41 @@
|
||||
# Builds the Windows artifacts (node binaries).
|
||||
# Usage: .\ci\build_windows_artifacts.ps1 [target]
|
||||
# Targets supported:
|
||||
# - x86_64-pc-windows-msvc
|
||||
# - i686-pc-windows-msvc
|
||||
|
||||
function Prebuild-Rust {
|
||||
param (
|
||||
[string]$target
|
||||
)
|
||||
|
||||
# Building here for the sake of easier debugging.
|
||||
Push-Location -Path "rust/ffi/node"
|
||||
Write-Host "Building rust library for $target"
|
||||
$env:RUST_BACKTRACE=1
|
||||
cargo build --release --target $target
|
||||
Pop-Location
|
||||
}
|
||||
|
||||
function Build-NodeBinaries {
|
||||
param (
|
||||
[string]$target
|
||||
)
|
||||
|
||||
Push-Location -Path "node"
|
||||
Write-Host "Building node library for $target"
|
||||
npm run build-release -- --target $target
|
||||
npm run pack-build -- --target $target
|
||||
Pop-Location
|
||||
}
|
||||
|
||||
$targets = $args[0]
|
||||
if (-not $targets) {
|
||||
$targets = "x86_64-pc-windows-msvc"
|
||||
}
|
||||
|
||||
Write-Host "Building artifacts for targets: $targets"
|
||||
foreach ($target in $targets) {
|
||||
Prebuild-Rust $target
|
||||
Build-NodeBinaries $target
|
||||
}
|
||||
@@ -6,11 +6,13 @@ docs_dir: src
|
||||
theme:
|
||||
name: "material"
|
||||
logo: assets/logo.png
|
||||
favicon: assets/logo.png
|
||||
features:
|
||||
- content.code.copy
|
||||
- content.tabs.link
|
||||
icon:
|
||||
repo: fontawesome/brands/github
|
||||
custom_dir: overrides
|
||||
|
||||
plugins:
|
||||
- search
|
||||
@@ -36,6 +38,7 @@ plugins:
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- footnotes
|
||||
- pymdownx.superfences
|
||||
- pymdownx.details
|
||||
- pymdownx.highlight:
|
||||
@@ -47,13 +50,19 @@ markdown_extensions:
|
||||
- pymdownx.superfences
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- md_in_html
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Basics: basic.md
|
||||
- Embeddings: embedding.md
|
||||
- Python full-text search: fts.md
|
||||
- Python integrations: integrations.md
|
||||
- Python integrations:
|
||||
- Pandas and PyArrow: python/arrow.md
|
||||
- DuckDB: python/duckdb.md
|
||||
- LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
|
||||
- LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
|
||||
- Pydantic: python/pydantic.md
|
||||
- Python examples:
|
||||
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
||||
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
||||
@@ -62,8 +71,10 @@ nav:
|
||||
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
||||
- Javascript examples:
|
||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
- References:
|
||||
- Vector Search: search.md
|
||||
- SQL filters: sql.md
|
||||
- Indexing: ann_indexes.md
|
||||
- API references:
|
||||
- Python API: python/python.md
|
||||
|
||||
176
docs/overrides/partials/header.html
Normal file
176
docs/overrides/partials/header.html
Normal file
@@ -0,0 +1,176 @@
|
||||
<!--
|
||||
Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
-->
|
||||
|
||||
{% set class = "md-header" %}
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||
{% elif "navigation.tabs" not in features %}
|
||||
{% set class = class ~ " md-header--shadow" %}
|
||||
{% endif %}
|
||||
|
||||
<!-- Header -->
|
||||
<header class="{{ class }}" data-md-component="header">
|
||||
<nav
|
||||
class="md-header__inner md-grid"
|
||||
aria-label="{{ lang.t('header') }}"
|
||||
>
|
||||
|
||||
<!-- Link to home -->
|
||||
<a
|
||||
href="{{ config.extra.homepage | d(nav.homepage.url, true) | url }}"
|
||||
title="{{ config.site_name | e }}"
|
||||
class="md-header__button md-logo"
|
||||
aria-label="{{ config.site_name }}"
|
||||
data-md-component="logo"
|
||||
>
|
||||
{% include "partials/logo.html" %}
|
||||
</a>
|
||||
|
||||
<!-- Button to open drawer -->
|
||||
<label class="md-header__button md-icon" for="__drawer">
|
||||
{% include ".icons/material/menu" ~ ".svg" %}
|
||||
</label>
|
||||
|
||||
<!-- Header title -->
|
||||
<div class="md-header__title" style="width: auto !important;" data-md-component="header-title">
|
||||
<div class="md-header__ellipsis">
|
||||
<div class="md-header__topic">
|
||||
<span class="md-ellipsis">
|
||||
{{ config.site_name }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="md-header__topic" data-md-component="header-topic">
|
||||
<span class="md-ellipsis">
|
||||
{% if page.meta and page.meta.title %}
|
||||
{{ page.meta.title }}
|
||||
{% else %}
|
||||
{{ page.title }}
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Color palette -->
|
||||
{% if config.theme.palette %}
|
||||
{% if not config.theme.palette is mapping %}
|
||||
<form class="md-header__option" data-md-component="palette">
|
||||
{% for option in config.theme.palette %}
|
||||
{% set scheme = option.scheme | d("default", true) %}
|
||||
{% set primary = option.primary | d("indigo", true) %}
|
||||
{% set accent = option.accent | d("indigo", true) %}
|
||||
<input
|
||||
class="md-option"
|
||||
data-md-color-media="{{ option.media }}"
|
||||
data-md-color-scheme="{{ scheme | replace(' ', '-') }}"
|
||||
data-md-color-primary="{{ primary | replace(' ', '-') }}"
|
||||
data-md-color-accent="{{ accent | replace(' ', '-') }}"
|
||||
{% if option.toggle %}
|
||||
aria-label="{{ option.toggle.name }}"
|
||||
{% else %}
|
||||
aria-hidden="true"
|
||||
{% endif %}
|
||||
type="radio"
|
||||
name="__palette"
|
||||
id="__palette_{{ loop.index }}"
|
||||
/>
|
||||
{% if option.toggle %}
|
||||
<label
|
||||
class="md-header__button md-icon"
|
||||
title="{{ option.toggle.name }}"
|
||||
for="__palette_{{ loop.index0 or loop.length }}"
|
||||
hidden
|
||||
>
|
||||
{% include ".icons/" ~ option.toggle.icon ~ ".svg" %}
|
||||
</label>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</form>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
<!-- Site language selector -->
|
||||
{% if config.extra.alternate %}
|
||||
<div class="md-header__option">
|
||||
<div class="md-select">
|
||||
{% set icon = config.theme.icon.alternate or "material/translate" %}
|
||||
<button
|
||||
class="md-header__button md-icon"
|
||||
aria-label="{{ lang.t('select.language') }}"
|
||||
>
|
||||
{% include ".icons/" ~ icon ~ ".svg" %}
|
||||
</button>
|
||||
<div class="md-select__inner">
|
||||
<ul class="md-select__list">
|
||||
{% for alt in config.extra.alternate %}
|
||||
<li class="md-select__item">
|
||||
<a
|
||||
href="{{ alt.link | url }}"
|
||||
hreflang="{{ alt.lang }}"
|
||||
class="md-select__link"
|
||||
>
|
||||
{{ alt.name }}
|
||||
</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Button to open search modal -->
|
||||
{% if "material/search" in config.plugins %}
|
||||
<label class="md-header__button md-icon" for="__search">
|
||||
{% include ".icons/material/magnify.svg" %}
|
||||
</label>
|
||||
|
||||
<!-- Search interface -->
|
||||
{% include "partials/search.html" %}
|
||||
{% endif %}
|
||||
|
||||
<div style="margin-left: 10px; margin-right: 5px;">
|
||||
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<div style="margin-left: 5px; margin-right: 5px;">
|
||||
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<!-- Repository information -->
|
||||
{% if config.repo_url %}
|
||||
<div class="md-header__source" style="margin-left: -5px !important;">
|
||||
{% include "partials/source.html" %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</nav>
|
||||
|
||||
<!-- Navigation tabs (sticky) -->
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% if "navigation.tabs" in features %}
|
||||
{% include "partials/tabs.html" %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</header>
|
||||
@@ -1,7 +1,7 @@
|
||||
# ANN (Approximate Nearest Neighbor) Indexes
|
||||
|
||||
You can create an index over your vector data to make search faster.
|
||||
Vector indexes are faster but less accurate than exhaustive search.
|
||||
Vector indexes are faster but less accurate than exhaustive search (KNN or Flat Search).
|
||||
LanceDB provides many parameters to fine-tune the index's size, the speed of queries, and the accuracy of results.
|
||||
|
||||
Currently, LanceDB does *not* automatically create the ANN index.
|
||||
@@ -10,7 +10,18 @@ If you can live with <100ms latency, skipping index creation is a simpler workfl
|
||||
|
||||
In the future we will look to automatically create and configure the ANN index.
|
||||
|
||||
## Creating an ANN Index
|
||||
## Types of Index
|
||||
|
||||
Lance can support multiple index types, the most widely used one is `IVF_PQ`.
|
||||
|
||||
* `IVF_PQ`: use **Inverted File Index (IVF)** to first divide the dataset into `N` partitions,
|
||||
and then use **Product Quantization** to compress vectors in each partition.
|
||||
* `DISKANN` (**Experimental**): organize the vector as a on-disk graph, where the vertices approximately
|
||||
represent the nearest neighbors of each vector.
|
||||
|
||||
## Creating an IVF_PQ Index
|
||||
|
||||
Lance supports `IVF_PQ` index type by default.
|
||||
|
||||
=== "Python"
|
||||
Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method.
|
||||
@@ -23,7 +34,7 @@ In the future we will look to automatically create and configure the ANN index.
|
||||
|
||||
# Create 10,000 sample vectors
|
||||
data = [{"vector": row, "item": f"item {i}"}
|
||||
for i, row in enumerate(np.random.random((10_000, 768)).astype('float32'))]
|
||||
for i, row in enumerate(np.random.random((10_000, 1536)).astype('float32'))]
|
||||
|
||||
# Add the vectors to a table
|
||||
tbl = db.create_table("my_vectors", data=data)
|
||||
@@ -41,19 +52,22 @@ In the future we will look to automatically create and configure the ANN index.
|
||||
for (let i = 0; i < 10_000; i++) {
|
||||
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||
}
|
||||
const table = await db.createTable('vectors', data)
|
||||
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 256, num_sub_vectors: 96 })
|
||||
const table = await db.createTable('my_vectors', data)
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 256, num_sub_vectors: 96 })
|
||||
```
|
||||
|
||||
Since `create_index` has a training step, it can take a few minutes to finish for large tables. You can control the index
|
||||
creation by providing the following parameters:
|
||||
- **metric** (default: "L2"): The distance metric to use. By default it uses euclidean distance "`L2`".
|
||||
We also support "cosine" and "dot" distance as well.
|
||||
- **num_partitions** (default: 256): The number of partitions of the index.
|
||||
- **num_sub_vectors** (default: 96): The number of sub-vectors (M) that will be created during Product Quantization (PQ).
|
||||
For D dimensional vector, it will be divided into `M` of `D/M` sub-vectors, each of which is presented by
|
||||
a single PQ code.
|
||||
|
||||
<figure markdown>
|
||||

|
||||
<figcaption>IVF_PQ index with <code>num_partitions=2, num_sub_vectors=4</code></figcaption>
|
||||
</figure>
|
||||
|
||||
- **metric** (default: "L2"): The distance metric to use. By default we use euclidean distance. We also support "cosine" distance.
|
||||
- **num_partitions** (default: 256): The number of partitions of the index. The number of partitions should be configured so each partition has 3-5K vectors. For example, a table
|
||||
with ~1M vectors should use 256 partitions. You can specify arbitrary number of partitions but powers of 2 is most conventional.
|
||||
A higher number leads to faster queries, but it makes index generation slower.
|
||||
- **num_sub_vectors** (default: 96): The number of subvectors (M) that will be created during Product Quantization (PQ). A larger number makes
|
||||
search more accurate, but also makes the index larger and slower to build.
|
||||
|
||||
## Querying an ANN Index
|
||||
|
||||
@@ -67,18 +81,19 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
|
||||
Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
||||
A higher number makes search more accurate but also slower. If you find the recall is less than idea, try refine_factor=10 to start.<br/>
|
||||
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
||||
e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
|
||||
Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.search(np.random.random((768))) \
|
||||
tbl.search(np.random.random((1536))) \
|
||||
.limit(2) \
|
||||
.nprobes(20) \
|
||||
.refine_factor(10) \
|
||||
.to_df()
|
||||
|
||||
```
|
||||
```
|
||||
vector item score
|
||||
0 [0.44949695, 0.8444449, 0.06281311, 0.23338133... item 1141 103.575333
|
||||
1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867
|
||||
@@ -86,8 +101,8 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const results = await table
|
||||
.search(Array(768).fill(1.2))
|
||||
const results_1 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.limit(2)
|
||||
.nprobes(20)
|
||||
.refineFactor(10)
|
||||
@@ -104,14 +119,14 @@ You can further filter the elements returned by a search using a where clause.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.search(np.random.random((768))).where("item != 'item 1141'").to_df()
|
||||
tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_df()
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const results = await table
|
||||
const results_2 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.where("item != 'item 1141'")
|
||||
.where("id != '1141'")
|
||||
.execute()
|
||||
```
|
||||
|
||||
@@ -121,7 +136,9 @@ You can select the columns returned by the query using a select clause.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.search(np.random.random((768))).select(["vector"]).to_df()
|
||||
tbl.search(np.random.random((1536))).select(["vector"]).to_df()
|
||||
```
|
||||
```
|
||||
vector score
|
||||
0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092
|
||||
1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485
|
||||
@@ -130,8 +147,36 @@ You can select the columns returned by the query using a select clause.
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const results = await table
|
||||
const results_3 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.select(["id"])
|
||||
.execute()
|
||||
```
|
||||
|
||||
## FAQ
|
||||
|
||||
### When is it necessary to create an ANN vector index.
|
||||
|
||||
`LanceDB` has manually tuned SIMD code for computing vector distances.
|
||||
In our benchmarks, computing 100K pairs of 1K dimension vectors only take less than 20ms.
|
||||
For small dataset (<100K rows) or the applications which can accept 100ms latency, vector indices are usually not necessary.
|
||||
|
||||
For large-scale or higher dimension vectors, it is beneficial to create vector index.
|
||||
|
||||
### How big is my index, and how many memory will it take.
|
||||
|
||||
In LanceDB, all vector indices are disk-based, meaning that when responding to a vector query, only the relevant pages from the index file are loaded from disk and cached in memory. Additionally, each sub-vector is usually encoded into 1 byte PQ code.
|
||||
|
||||
For example, with a 1024-dimension dataset, if we choose `num_sub_vectors=64`, each sub-vector has `1024 / 64 = 16` float32 numbers.
|
||||
Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` times of space reduction.
|
||||
|
||||
### How to choose `num_partitions` and `num_sub_vectors` for `IVF_PQ` index.
|
||||
|
||||
`num_partitions` is used to decide how many partitions the first level `IVF` index uses.
|
||||
Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
|
||||
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.
|
||||
|
||||
`num_sub_vectors` decides how many Product Quantization code to generate on each vector. Because
|
||||
Product Quantization is a lossy compression of the original vector, the more `num_sub_vectors` usually results to
|
||||
less space distortion, and thus yield better accuracy. However, similarly, more `num_sub_vectors` causes heavier I/O and
|
||||
more PQ computation, thus, higher latency. `dimension / num_sub_vectors` should be aligned with 8 for better SIMD efficiency.
|
||||
BIN
docs/src/assets/ivf_pq.png
Normal file
BIN
docs/src/assets/ivf_pq.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 266 KiB |
@@ -23,7 +23,7 @@ We'll cover the basics of using LanceDB on your local machine in this section.
|
||||
=== "Python"
|
||||
```python
|
||||
import lancedb
|
||||
uri = "~/.lancedb"
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ We'll cover the basics of using LanceDB on your local machine in this section.
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "~./lancedb";
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
@@ -102,7 +102,7 @@ Once created, you can open a table using the following code:
|
||||
If you forget the name of your table, you can always get a listing of all table names:
|
||||
|
||||
```javascript
|
||||
console.log(db.tableNames());
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
## How to add data to a table
|
||||
@@ -118,10 +118,39 @@ After a table has been created, you can always add more data to it using
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
await tbl.add([vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
||||
await tbl.add([{vector: [1.3, 1.4], item: "fizz", price: 100.0},
|
||||
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
||||
```
|
||||
|
||||
## How to delete rows from a table
|
||||
|
||||
Use the `delete()` method on tables to delete rows from a table. To choose
|
||||
which rows to delete, provide a filter that matches on the metadata columns.
|
||||
This can delete any number of rows that match the filter.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
await tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
The deletion predicate is a SQL expression that supports the same expressions
|
||||
as the `where()` clause on a search. They can be as simple or complex as needed.
|
||||
To see what expressions are supported, see the [SQL filters](sql.md) section.
|
||||
|
||||
|
||||
=== "Python"
|
||||
|
||||
Read more: [lancedb.table.Table.delete][]
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
|
||||
## How to search for (approximate) nearest neighbors
|
||||
|
||||
Once you've embedded the query, you can find its nearest neighbors using the following code:
|
||||
|
||||
@@ -98,7 +98,7 @@ You can also use an external API like OpenAI to generate embeddings
|
||||
embededings for your data.
|
||||
|
||||
```javascript
|
||||
const db = await lancedb.connect("/tmp/lancedb");
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const data = [
|
||||
{ text: 'pepperoni' },
|
||||
{ text: 'pineapple' }
|
||||
@@ -126,7 +126,7 @@ belong in the same latent space and your results will be nonsensical.
|
||||
=== "Javascript"
|
||||
```javascript
|
||||
const results = await table
|
||||
.search('What's the best pizza topping?')
|
||||
.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
.execute()
|
||||
```
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
import sys
|
||||
from modal import Secret, Stub, Image, web_endpoint
|
||||
import lancedb
|
||||
import re
|
||||
import pickle
|
||||
import requests
|
||||
import re
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.document_loaders import UnstructuredHTMLLoader
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain.vectorstores import LanceDB
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import RetrievalQA
|
||||
from modal import Image, Secret, Stub, web_endpoint
|
||||
|
||||
import lancedb
|
||||
|
||||
lancedb_image = Image.debian_slim().pip_install(
|
||||
"lancedb", "langchain", "openai", "pandas", "tiktoken", "unstructured", "tabulate"
|
||||
@@ -78,10 +79,7 @@ def qanda_langchain(query):
|
||||
download_docs()
|
||||
docs = store_docs()
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=1000,
|
||||
chunk_overlap=200,
|
||||
)
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200,)
|
||||
documents = text_splitter.split_documents(docs)
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
|
||||
121
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
121
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Vector embedding search using TransformersJS
|
||||
|
||||
## Embed and query data from LacneDB using TransformersJS
|
||||
|
||||
<img id="splash" width="400" alt="transformersjs" src="https://github.com/lancedb/lancedb/assets/43097991/88a31e30-3d6f-4eef-9216-4b7c688f1b4f">
|
||||
|
||||
This example shows how to use the [transformers.js](https://github.com/xenova/transformers.js) library to perform vector embedding search using LanceDB's Javascript API.
|
||||
|
||||
|
||||
### Setting up
|
||||
First, install the dependencies:
|
||||
```bash
|
||||
npm install vectordb
|
||||
npm i @xenova/transformers
|
||||
```
|
||||
|
||||
We will also be using the [all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) model to make it compatible with Transformers.js
|
||||
|
||||
Within our `index.js` file we will import the necessary libraries and define our model and database:
|
||||
|
||||
```javascript
|
||||
const lancedb = require('vectordb')
|
||||
const { pipeline } = await import('@xenova/transformers')
|
||||
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||
```
|
||||
|
||||
### Creating the embedding function
|
||||
|
||||
Next, we will create a function that will take in a string and return the vector embedding of that string. We will use the `pipe` function we defined earlier to get the vector embedding of the string.
|
||||
|
||||
```javascript
|
||||
// Define the function. `sourceColumn` is required for LanceDB to know
|
||||
// which column to use as input.
|
||||
const embed_fun = {}
|
||||
embed_fun.sourceColumn = 'text'
|
||||
embed_fun.embed = async function (batch) {
|
||||
let result = []
|
||||
// Given a batch of strings, we will use the `pipe` function to get
|
||||
// the vector embedding of each string.
|
||||
for (let text of batch) {
|
||||
// 'mean' pooling and normalizing allows the embeddings to share the
|
||||
// same length.
|
||||
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||
result.push(Array.from(res['data']))
|
||||
}
|
||||
return (result)
|
||||
}
|
||||
```
|
||||
|
||||
### Creating the database
|
||||
|
||||
Now, we will create the LanceDB database and add the embedding function we defined earlier.
|
||||
|
||||
```javascript
|
||||
// Link a folder and create a table with data
|
||||
const db = await lancedb.connect('data/sample-lancedb')
|
||||
|
||||
// You can also import any other data, but make sure that you have a column
|
||||
// for the embedding function to use.
|
||||
const data = [
|
||||
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||
]
|
||||
|
||||
// Create the table with the embedding function
|
||||
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||
```
|
||||
|
||||
### Performing the search
|
||||
|
||||
Now, we can perform the search using the `search` function. LanceDB automatically uses the embedding function we defined earlier to get the vector embedding of the query string.
|
||||
|
||||
```javascript
|
||||
// Query the table
|
||||
const results = await table
|
||||
.search("a sweet fruit to eat")
|
||||
.metricType("cosine")
|
||||
.limit(2)
|
||||
.execute()
|
||||
console.log(results.map(r => r.text))
|
||||
```
|
||||
```bash
|
||||
[ 'Banana', 'Cherry' ]
|
||||
```
|
||||
|
||||
Output of `results`:
|
||||
```bash
|
||||
[
|
||||
{
|
||||
vector: Float32Array(384) [
|
||||
-0.057455405592918396,
|
||||
0.03617725893855095,
|
||||
-0.0367760956287384,
|
||||
... 381 more items
|
||||
],
|
||||
id: 5,
|
||||
text: 'Banana',
|
||||
type: 'fruit',
|
||||
score: 0.4919965863227844
|
||||
},
|
||||
{
|
||||
vector: Float32Array(384) [
|
||||
0.0009714411571621895,
|
||||
0.008223623037338257,
|
||||
0.009571489877998829,
|
||||
... 381 more items
|
||||
],
|
||||
id: 1,
|
||||
text: 'Cherry',
|
||||
type: 'fruit',
|
||||
score: 0.5540297031402588
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Wrapping it up
|
||||
|
||||
In this example, we showed how to use the `transformers.js` library to perform vector embedding search using LanceDB's Javascript API. You can find the full code for this example on [Github](https://github.com/lancedb/lancedb/blob/main/node/examples/js-transformers/index.js)!
|
||||
@@ -18,6 +18,20 @@ Assume:
|
||||
1. `table` is a LanceDB Table
|
||||
2. `text` is the name of the Table column that we want to index
|
||||
|
||||
For example,
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
|
||||
table = db.create_table("my_table",
|
||||
data=[{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"},
|
||||
{"vector": [5.9, 26.5], "text": "There are several kittens playing"}])
|
||||
|
||||
```
|
||||
|
||||
To create the index:
|
||||
|
||||
```python
|
||||
|
||||
@@ -14,7 +14,7 @@ The key features of LanceDB include:
|
||||
|
||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||
|
||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||
|
||||
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
||||
|
||||
@@ -28,7 +28,7 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
||||
```python
|
||||
import lancedb
|
||||
|
||||
uri = "/tmp/lancedb"
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
table = db.create_table("my_table",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
@@ -44,7 +44,7 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "/tmp/lancedb";
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
const table = await db.createTable("my_table",
|
||||
[{ id: 1, vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
@@ -67,6 +67,6 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
||||
* [`Embedding Functions`](embedding.md) - functions for working with embeddings.
|
||||
* [`Indexing`](ann_indexes.md) - create vector indexes to speed up queries.
|
||||
* [`Full text search`](fts.md) - [EXPERIMENTAL] full-text search API
|
||||
* [`Ecosystem Integrations`](integrations.md) - integrating LanceDB with python data tooling ecosystem.
|
||||
* [`Ecosystem Integrations`](python/integration.md) - integrating LanceDB with python data tooling ecosystem.
|
||||
* [`Python API Reference`](python/python.md) - detailed documentation for the LanceDB Python SDK.
|
||||
* [`Node API Reference`](javascript/modules.md) - detailed documentation for the LanceDB Python SDK.
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
# Integrations
|
||||
|
||||
Built on top of Apache Arrow, `LanceDB` is easy to integrate with the Python ecosystem, including Pandas, PyArrow and DuckDB.
|
||||
|
||||
## Pandas and PyArrow
|
||||
|
||||
First, we need to connect to a `LanceDB` database.
|
||||
|
||||
``` py
|
||||
|
||||
import lancedb
|
||||
|
||||
db = lancedb.connect("/tmp/lancedb")
|
||||
```
|
||||
|
||||
And write a `Pandas DataFrame` to LanceDB directly.
|
||||
|
||||
```py
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame({
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
"item": ["foo", "bar"],
|
||||
"price": [10.0, 20.0]
|
||||
})
|
||||
table = db.create_table("pd_table", data=data)
|
||||
```
|
||||
|
||||
You will find detailed instructions of creating dataset and index in [Basic Operations](basic.md) and [Indexing](indexing.md)
|
||||
sections.
|
||||
|
||||
|
||||
We can now perform similarity searches via `LanceDB`.
|
||||
|
||||
```py
|
||||
# Open the table previously created.
|
||||
table = db.open_table("pd_table")
|
||||
|
||||
query_vector = [100, 100]
|
||||
# Pandas DataFrame
|
||||
df = table.search(query_vector).limit(1).to_df()
|
||||
print(df)
|
||||
```
|
||||
|
||||
```
|
||||
vector item price score
|
||||
0 [5.9, 26.5] bar 20.0 14257.05957
|
||||
```
|
||||
|
||||
If you have a simple filter, it's faster to provide a where clause to `LanceDB`'s search query.
|
||||
If you have more complex criteria, you can always apply the filter to the resulting pandas `DataFrame` from the search query.
|
||||
|
||||
```python
|
||||
|
||||
# Apply the filter via LanceDB
|
||||
results = table.search([100, 100]).where("price < 15").to_df()
|
||||
assert len(results) == 1
|
||||
assert results["item"].iloc[0] == "foo"
|
||||
|
||||
# Apply the filter via Pandas
|
||||
df = results = table.search([100, 100]).to_df()
|
||||
results = df[df.price < 15]
|
||||
assert len(results) == 1
|
||||
assert results["item"].iloc[0] == "foo"
|
||||
```
|
||||
|
||||
## DuckDB
|
||||
|
||||
`LanceDB` works with `DuckDB` via [PyArrow integration](https://duckdb.org/docs/guides/python/sql_on_arrow).
|
||||
|
||||
Let us start with installing `duckdb` and `lancedb`.
|
||||
|
||||
```shell
|
||||
pip install duckdb lancedb
|
||||
```
|
||||
|
||||
We will re-use the dataset created previously
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
|
||||
db = lancedb.connect("/tmp/lancedb")
|
||||
table = db.open_table("pd_table")
|
||||
arrow_table = table.to_arrow()
|
||||
```
|
||||
|
||||
`DuckDB` can directly query the `arrow_table`:
|
||||
|
||||
```python
|
||||
In [15]: duckdb.query("SELECT * FROM t")
|
||||
Out[15]:
|
||||
┌─────────────┬─────────┬────────┐
|
||||
│ vector │ item │ price │
|
||||
│ float[] │ varchar │ double │
|
||||
├─────────────┼─────────┼────────┤
|
||||
│ [3.1, 4.1] │ foo │ 10.0 │
|
||||
│ [5.9, 26.5] │ bar │ 20.0 │
|
||||
└─────────────┴─────────┴────────┘
|
||||
|
||||
In [16]: duckdb.query("SELECT mean(price) FROM t")
|
||||
Out[16]:
|
||||
┌─────────────┐
|
||||
│ mean(price) │
|
||||
│ double │
|
||||
├─────────────┤
|
||||
│ 15.0 │
|
||||
└─────────────┘
|
||||
```
|
||||
@@ -10,15 +10,21 @@ A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb)
|
||||
npm install vectordb
|
||||
```
|
||||
|
||||
This will download the appropriate native library for your platform. We currently
|
||||
support x86_64 Linux, aarch64 Linux, Intel MacOS, and ARM (M1/M2) MacOS. We do not
|
||||
yet support Windows or musl-based Linux (such as Alpine Linux).
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
|
||||
```javascript
|
||||
const lancedb = require('vectordb');
|
||||
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>');
|
||||
const table = await db.openTable('my_table');
|
||||
const query = await table.search([0.1, 0.3]).setLimit(20).execute();
|
||||
const db = await lancedb.connect('data/sample-lancedb');
|
||||
const table = await db.createTable("my_table",
|
||||
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
||||
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
||||
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
@@ -26,17 +32,33 @@ The [examples](./examples) folder contains complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
The LanceDB javascript is built with npm:
|
||||
To build everything fresh:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
npm run tsc
|
||||
npm run build
|
||||
```
|
||||
|
||||
Then you should be able to run the tests with:
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
|
||||
### Rebuilding Rust library
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
### Rebuilding Typescript
|
||||
|
||||
```bash
|
||||
npm run tsc
|
||||
```
|
||||
|
||||
Run the tests with
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
### Fix lints
|
||||
|
||||
To run the linter and have it automatically fix all errors
|
||||
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / Connection
|
||||
|
||||
# Class: Connection
|
||||
|
||||
A connection to a LanceDB database.
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](Connection.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_db](Connection.md#_db)
|
||||
- [\_uri](Connection.md#_uri)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [uri](Connection.md#uri)
|
||||
|
||||
### Methods
|
||||
|
||||
- [createTable](Connection.md#createtable)
|
||||
- [createTableArrow](Connection.md#createtablearrow)
|
||||
- [openTable](Connection.md#opentable)
|
||||
- [tableNames](Connection.md#tablenames)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new Connection**(`db`, `uri`)
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `db` | `any` |
|
||||
| `uri` | `string` |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:46](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L46)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_db
|
||||
|
||||
• `Private` `Readonly` **\_db**: `any`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:44](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L44)
|
||||
|
||||
___
|
||||
|
||||
### \_uri
|
||||
|
||||
• `Private` `Readonly` **\_uri**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:43](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L43)
|
||||
|
||||
## Accessors
|
||||
|
||||
### uri
|
||||
|
||||
• `get` **uri**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:51](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L51)
|
||||
|
||||
## Methods
|
||||
|
||||
### createTable
|
||||
|
||||
▸ **createTable**(`name`, `data`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:91](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L91)
|
||||
|
||||
▸ **createTable**<`T`\>(`name`, `data`, `embeddings`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:99](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L99)
|
||||
|
||||
___
|
||||
|
||||
### createTableArrow
|
||||
|
||||
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `table` | `Table`<`any`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:109](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L109)
|
||||
|
||||
___
|
||||
|
||||
### openTable
|
||||
|
||||
▸ **openTable**(`name`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:67](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L67)
|
||||
|
||||
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:74](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L74)
|
||||
|
||||
___
|
||||
|
||||
### tableNames
|
||||
|
||||
▸ **tableNames**(): `Promise`<`string`[]\>
|
||||
|
||||
Get the names of all tables in the database.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`string`[]\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:58](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L58)
|
||||
350
docs/src/javascript/classes/LocalConnection.md
Normal file
350
docs/src/javascript/classes/LocalConnection.md
Normal file
@@ -0,0 +1,350 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / LocalConnection
|
||||
|
||||
# Class: LocalConnection
|
||||
|
||||
A connection to a LanceDB database.
|
||||
|
||||
## Implements
|
||||
|
||||
- [`Connection`](../interfaces/Connection.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](LocalConnection.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_db](LocalConnection.md#_db)
|
||||
- [\_options](LocalConnection.md#_options)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [uri](LocalConnection.md#uri)
|
||||
|
||||
### Methods
|
||||
|
||||
- [createTable](LocalConnection.md#createtable)
|
||||
- [createTableArrow](LocalConnection.md#createtablearrow)
|
||||
- [dropTable](LocalConnection.md#droptable)
|
||||
- [openTable](LocalConnection.md#opentable)
|
||||
- [tableNames](LocalConnection.md#tablenames)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new LocalConnection**(`db`, `options`)
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `db` | `any` |
|
||||
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:184](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L184)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_db
|
||||
|
||||
• `Private` `Readonly` **\_db**: `any`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:182](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L182)
|
||||
|
||||
___
|
||||
|
||||
### \_options
|
||||
|
||||
• `Private` `Readonly` **\_options**: [`ConnectionOptions`](../interfaces/ConnectionOptions.md)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:181](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L181)
|
||||
|
||||
## Accessors
|
||||
|
||||
### uri
|
||||
|
||||
• `get` **uri**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[uri](../interfaces/Connection.md#uri)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:189](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L189)
|
||||
|
||||
## Methods
|
||||
|
||||
### createTable
|
||||
|
||||
▸ **createTable**(`name`, `data`, `mode?`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||
| `mode?` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[createTable](../interfaces/Connection.md#createtable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:230](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L230)
|
||||
|
||||
▸ **createTable**(`name`, `data`, `mode`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] |
|
||||
| `mode` | [`WriteMode`](../enums/WriteMode.md) |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.createTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:231](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L231)
|
||||
|
||||
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table |
|
||||
| `mode` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.createTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:241](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L241)
|
||||
|
||||
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings?`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] |
|
||||
| `mode` | [`WriteMode`](../enums/WriteMode.md) |
|
||||
| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.createTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:242](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L242)
|
||||
|
||||
___
|
||||
|
||||
### createTableArrow
|
||||
|
||||
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `table` | `Table`<`any`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[createTableArrow](../interfaces/Connection.md#createtablearrow)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:266](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L266)
|
||||
|
||||
___
|
||||
|
||||
### dropTable
|
||||
|
||||
▸ **dropTable**(`name`): `Promise`<`void`\>
|
||||
|
||||
Drop an existing table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table to drop. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[dropTable](../interfaces/Connection.md#droptable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:276](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L276)
|
||||
|
||||
___
|
||||
|
||||
### openTable
|
||||
|
||||
▸ **openTable**(`name`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[openTable](../interfaces/Connection.md#opentable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:205](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L205)
|
||||
|
||||
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.openTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:212](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L212)
|
||||
|
||||
▸ **openTable**<`T`\>(`name`, `embeddings?`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.openTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:213](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L213)
|
||||
|
||||
___
|
||||
|
||||
### tableNames
|
||||
|
||||
▸ **tableNames**(): `Promise`<`string`[]\>
|
||||
|
||||
Get the names of all tables in the database.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`string`[]\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[tableNames](../interfaces/Connection.md#tablenames)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:196](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L196)
|
||||
302
docs/src/javascript/classes/LocalTable.md
Normal file
302
docs/src/javascript/classes/LocalTable.md
Normal file
@@ -0,0 +1,302 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / LocalTable
|
||||
|
||||
# Class: LocalTable<T\>
|
||||
|
||||
A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||
|
||||
## Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
## Implements
|
||||
|
||||
- [`Table`](../interfaces/Table.md)<`T`\>
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](LocalTable.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_embeddings](LocalTable.md#_embeddings)
|
||||
- [\_name](LocalTable.md#_name)
|
||||
- [\_options](LocalTable.md#_options)
|
||||
- [\_tbl](LocalTable.md#_tbl)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [name](LocalTable.md#name)
|
||||
|
||||
### Methods
|
||||
|
||||
- [add](LocalTable.md#add)
|
||||
- [countRows](LocalTable.md#countrows)
|
||||
- [createIndex](LocalTable.md#createindex)
|
||||
- [delete](LocalTable.md#delete)
|
||||
- [overwrite](LocalTable.md#overwrite)
|
||||
- [search](LocalTable.md#search)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new LocalTable**<`T`\>(`tbl`, `name`, `options`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `tbl` | `any` |
|
||||
| `name` | `string` |
|
||||
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:287](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L287)
|
||||
|
||||
• **new LocalTable**<`T`\>(`tbl`, `name`, `options`, `embeddings`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `tbl` | `any` | |
|
||||
| `name` | `string` | |
|
||||
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) | |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:294](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L294)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_embeddings
|
||||
|
||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:284](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L284)
|
||||
|
||||
___
|
||||
|
||||
### \_name
|
||||
|
||||
• `Private` `Readonly` **\_name**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:283](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L283)
|
||||
|
||||
___
|
||||
|
||||
### \_options
|
||||
|
||||
• `Private` `Readonly` **\_options**: [`ConnectionOptions`](../interfaces/ConnectionOptions.md)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:285](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L285)
|
||||
|
||||
___
|
||||
|
||||
### \_tbl
|
||||
|
||||
• `Private` `Readonly` **\_tbl**: `any`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:282](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L282)
|
||||
|
||||
## Accessors
|
||||
|
||||
### name
|
||||
|
||||
• `get` **name**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[name](../interfaces/Table.md#name)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:302](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L302)
|
||||
|
||||
## Methods
|
||||
|
||||
### add
|
||||
|
||||
▸ **add**(`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[add](../interfaces/Table.md#add)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:320](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L320)
|
||||
|
||||
___
|
||||
|
||||
### countRows
|
||||
|
||||
▸ **countRows**(): `Promise`<`number`\>
|
||||
|
||||
Returns the number of rows in this table.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[countRows](../interfaces/Table.md#countrows)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:362](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L362)
|
||||
|
||||
___
|
||||
|
||||
### createIndex
|
||||
|
||||
▸ **createIndex**(`indexParams`): `Promise`<`any`\>
|
||||
|
||||
Create an ANN index on this Table vector index.
|
||||
|
||||
**`See`**
|
||||
|
||||
VectorIndexParams.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `indexParams` | [`IvfPQIndexConfig`](../interfaces/IvfPQIndexConfig.md) | The parameters of this Index, |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`any`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[createIndex](../interfaces/Table.md#createindex)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:355](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L355)
|
||||
|
||||
___
|
||||
|
||||
### delete
|
||||
|
||||
▸ **delete**(`filter`): `Promise`<`void`\>
|
||||
|
||||
Delete rows from this table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[delete](../interfaces/Table.md#delete)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:371](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L371)
|
||||
|
||||
___
|
||||
|
||||
### overwrite
|
||||
|
||||
▸ **overwrite**(`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table, replacing its contents.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[overwrite](../interfaces/Table.md#overwrite)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:338](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L338)
|
||||
|
||||
___
|
||||
|
||||
### search
|
||||
|
||||
▸ **search**(`query`): [`Query`](Query.md)<`T`\>
|
||||
|
||||
Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `query` | `T` | The query search term |
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Query`](Query.md)<`T`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[search](../interfaces/Table.md#search)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:310](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L310)
|
||||
@@ -40,7 +40,7 @@ An embedding function that automatically creates vector representation for a giv
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L21)
|
||||
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L21)
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -50,7 +50,7 @@ An embedding function that automatically creates vector representation for a giv
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L19)
|
||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L19)
|
||||
|
||||
___
|
||||
|
||||
@@ -60,7 +60,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L18)
|
||||
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L18)
|
||||
|
||||
___
|
||||
|
||||
@@ -76,7 +76,7 @@ The name of the column that will be used as input for the Embedding Function.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L50)
|
||||
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L50)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -102,4 +102,4 @@ Creates a vector representation for the given values.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/openai.ts#L38)
|
||||
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L38)
|
||||
|
||||
@@ -18,7 +18,6 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_columns](Query.md#_columns)
|
||||
- [\_embeddings](Query.md#_embeddings)
|
||||
- [\_filter](Query.md#_filter)
|
||||
- [\_limit](Query.md#_limit)
|
||||
@@ -27,7 +26,9 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
- [\_query](Query.md#_query)
|
||||
- [\_queryVector](Query.md#_queryvector)
|
||||
- [\_refineFactor](Query.md#_refinefactor)
|
||||
- [\_select](Query.md#_select)
|
||||
- [\_tbl](Query.md#_tbl)
|
||||
- [where](Query.md#where)
|
||||
|
||||
### Methods
|
||||
|
||||
@@ -37,6 +38,7 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
- [metricType](Query.md#metrictype)
|
||||
- [nprobes](Query.md#nprobes)
|
||||
- [refineFactor](Query.md#refinefactor)
|
||||
- [select](Query.md#select)
|
||||
|
||||
## Constructors
|
||||
|
||||
@@ -60,27 +62,17 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:241](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L241)
|
||||
[index.ts:448](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L448)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_columns
|
||||
|
||||
• `Private` `Optional` `Readonly` **\_columns**: `string`[]
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:236](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L236)
|
||||
|
||||
___
|
||||
|
||||
### \_embeddings
|
||||
|
||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:239](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L239)
|
||||
[index.ts:446](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L446)
|
||||
|
||||
___
|
||||
|
||||
@@ -90,7 +82,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:237](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L237)
|
||||
[index.ts:444](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L444)
|
||||
|
||||
___
|
||||
|
||||
@@ -100,7 +92,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:233](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L233)
|
||||
[index.ts:440](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L440)
|
||||
|
||||
___
|
||||
|
||||
@@ -110,7 +102,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:238](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L238)
|
||||
[index.ts:445](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L445)
|
||||
|
||||
___
|
||||
|
||||
@@ -120,7 +112,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:235](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L235)
|
||||
[index.ts:442](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L442)
|
||||
|
||||
___
|
||||
|
||||
@@ -130,7 +122,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:231](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L231)
|
||||
[index.ts:438](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L438)
|
||||
|
||||
___
|
||||
|
||||
@@ -140,7 +132,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:232](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L232)
|
||||
[index.ts:439](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L439)
|
||||
|
||||
___
|
||||
|
||||
@@ -150,7 +142,17 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:234](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L234)
|
||||
[index.ts:441](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L441)
|
||||
|
||||
___
|
||||
|
||||
### \_select
|
||||
|
||||
• `Private` `Optional` **\_select**: `string`[]
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:443](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L443)
|
||||
|
||||
___
|
||||
|
||||
@@ -160,7 +162,33 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:230](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L230)
|
||||
[index.ts:437](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L437)
|
||||
|
||||
___
|
||||
|
||||
### where
|
||||
|
||||
• **where**: (`value`: `string`) => [`Query`](Query.md)<`T`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`value`): [`Query`](Query.md)<`T`\>
|
||||
|
||||
A filter statement to be applied to this query.
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `value` | `string` | A filter in the same format used by a sql WHERE clause. |
|
||||
|
||||
##### Returns
|
||||
|
||||
[`Query`](Query.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:496](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L496)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -182,7 +210,7 @@ Execute the query and return the results as an Array of Objects
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:301](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L301)
|
||||
[index.ts:519](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L519)
|
||||
|
||||
___
|
||||
|
||||
@@ -204,7 +232,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:284](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L284)
|
||||
[index.ts:491](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L491)
|
||||
|
||||
___
|
||||
|
||||
@@ -226,7 +254,7 @@ Sets the number of results that will be returned
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:257](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L257)
|
||||
[index.ts:464](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L464)
|
||||
|
||||
___
|
||||
|
||||
@@ -252,7 +280,7 @@ MetricType for the different options
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:293](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L293)
|
||||
[index.ts:511](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L511)
|
||||
|
||||
___
|
||||
|
||||
@@ -274,7 +302,7 @@ The number of probes used. A higher number makes search more accurate but also s
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:275](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L275)
|
||||
[index.ts:482](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L482)
|
||||
|
||||
___
|
||||
|
||||
@@ -296,4 +324,26 @@ Refine the results by reading extra elements and re-ranking them in memory.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:266](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L266)
|
||||
[index.ts:473](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L473)
|
||||
|
||||
___
|
||||
|
||||
### select
|
||||
|
||||
▸ **select**(`value`): [`Query`](Query.md)<`T`\>
|
||||
|
||||
Return only the specified columns.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `value` | `string`[] | Only select the specified columns. If not specified, all columns will be returned. |
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Query`](Query.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:502](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L502)
|
||||
|
||||
@@ -1,215 +0,0 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / Table
|
||||
|
||||
# Class: Table<T\>
|
||||
|
||||
## Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](Table.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_embeddings](Table.md#_embeddings)
|
||||
- [\_name](Table.md#_name)
|
||||
- [\_tbl](Table.md#_tbl)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [name](Table.md#name)
|
||||
|
||||
### Methods
|
||||
|
||||
- [add](Table.md#add)
|
||||
- [create\_index](Table.md#create_index)
|
||||
- [overwrite](Table.md#overwrite)
|
||||
- [search](Table.md#search)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new Table**<`T`\>(`tbl`, `name`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `tbl` | `any` |
|
||||
| `name` | `string` |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:121](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L121)
|
||||
|
||||
• **new Table**<`T`\>(`tbl`, `name`, `embeddings`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `tbl` | `any` | |
|
||||
| `name` | `string` | |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:127](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L127)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_embeddings
|
||||
|
||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:119](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L119)
|
||||
|
||||
___
|
||||
|
||||
### \_name
|
||||
|
||||
• `Private` `Readonly` **\_name**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:118](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L118)
|
||||
|
||||
___
|
||||
|
||||
### \_tbl
|
||||
|
||||
• `Private` `Readonly` **\_tbl**: `any`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:117](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L117)
|
||||
|
||||
## Accessors
|
||||
|
||||
### name
|
||||
|
||||
• `get` **name**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:134](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L134)
|
||||
|
||||
## Methods
|
||||
|
||||
### add
|
||||
|
||||
▸ **add**(`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:152](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L152)
|
||||
|
||||
___
|
||||
|
||||
### create\_index
|
||||
|
||||
▸ **create_index**(`indexParams`): `Promise`<`any`\>
|
||||
|
||||
Create an ANN index on this Table vector index.
|
||||
|
||||
**`See`**
|
||||
|
||||
VectorIndexParams.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`any`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:171](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L171)
|
||||
|
||||
___
|
||||
|
||||
### overwrite
|
||||
|
||||
▸ **overwrite**(`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table, replacing its contents.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:162](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L162)
|
||||
|
||||
___
|
||||
|
||||
### search
|
||||
|
||||
▸ **search**(`query`): [`Query`](Query.md)<`T`\>
|
||||
|
||||
Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `query` | `T` | The query search term |
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Query`](Query.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:142](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L142)
|
||||
@@ -9,6 +9,7 @@ Distance metrics type.
|
||||
### Enumeration Members
|
||||
|
||||
- [Cosine](MetricType.md#cosine)
|
||||
- [Dot](MetricType.md#dot)
|
||||
- [L2](MetricType.md#l2)
|
||||
|
||||
## Enumeration Members
|
||||
@@ -21,7 +22,19 @@ Cosine distance
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:341](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L341)
|
||||
[index.ts:567](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L567)
|
||||
|
||||
___
|
||||
|
||||
### Dot
|
||||
|
||||
• **Dot** = ``"dot"``
|
||||
|
||||
Dot product
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:572](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L572)
|
||||
|
||||
___
|
||||
|
||||
@@ -33,4 +46,4 @@ Euclidean distance
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:336](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L336)
|
||||
[index.ts:562](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L562)
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
|
||||
# Enumeration: WriteMode
|
||||
|
||||
Write mode for writing a table.
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Enumeration Members
|
||||
|
||||
- [Append](WriteMode.md#append)
|
||||
- [Create](WriteMode.md#create)
|
||||
- [Overwrite](WriteMode.md#overwrite)
|
||||
|
||||
## Enumeration Members
|
||||
@@ -15,9 +18,23 @@
|
||||
|
||||
• **Append** = ``"append"``
|
||||
|
||||
Append new data to the table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:326](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L326)
|
||||
[index.ts:552](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L552)
|
||||
|
||||
___
|
||||
|
||||
### Create
|
||||
|
||||
• **Create** = ``"create"``
|
||||
|
||||
Create a new [Table](../interfaces/Table.md).
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:548](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L548)
|
||||
|
||||
___
|
||||
|
||||
@@ -25,6 +42,8 @@ ___
|
||||
|
||||
• **Overwrite** = ``"overwrite"``
|
||||
|
||||
Overwrite the existing [Table](../interfaces/Table.md) if presented.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:325](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L325)
|
||||
[index.ts:550](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L550)
|
||||
|
||||
41
docs/src/javascript/interfaces/AwsCredentials.md
Normal file
41
docs/src/javascript/interfaces/AwsCredentials.md
Normal file
@@ -0,0 +1,41 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / AwsCredentials
|
||||
|
||||
# Interface: AwsCredentials
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Properties
|
||||
|
||||
- [accessKeyId](AwsCredentials.md#accesskeyid)
|
||||
- [secretKey](AwsCredentials.md#secretkey)
|
||||
- [sessionToken](AwsCredentials.md#sessiontoken)
|
||||
|
||||
## Properties
|
||||
|
||||
### accessKeyId
|
||||
|
||||
• **accessKeyId**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:31](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L31)
|
||||
|
||||
___
|
||||
|
||||
### secretKey
|
||||
|
||||
• **secretKey**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:33](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L33)
|
||||
|
||||
___
|
||||
|
||||
### sessionToken
|
||||
|
||||
• `Optional` **sessionToken**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:35](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L35)
|
||||
152
docs/src/javascript/interfaces/Connection.md
Normal file
152
docs/src/javascript/interfaces/Connection.md
Normal file
@@ -0,0 +1,152 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / Connection
|
||||
|
||||
# Interface: Connection
|
||||
|
||||
A LanceDB Connection that allows you to open tables and create new ones.
|
||||
|
||||
Connection could be local against filesystem or remote against a server.
|
||||
|
||||
## Implemented by
|
||||
|
||||
- [`LocalConnection`](../classes/LocalConnection.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Properties
|
||||
|
||||
- [uri](Connection.md#uri)
|
||||
|
||||
### Methods
|
||||
|
||||
- [createTable](Connection.md#createtable)
|
||||
- [createTableArrow](Connection.md#createtablearrow)
|
||||
- [dropTable](Connection.md#droptable)
|
||||
- [openTable](Connection.md#opentable)
|
||||
- [tableNames](Connection.md#tablenames)
|
||||
|
||||
## Properties
|
||||
|
||||
### uri
|
||||
|
||||
• **uri**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:70](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L70)
|
||||
|
||||
## Methods
|
||||
|
||||
### createTable
|
||||
|
||||
▸ **createTable**<`T`\>(`name`, `data`, `mode?`, `embeddings?`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `mode?` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. |
|
||||
| `embeddings?` | [`EmbeddingFunction`](EmbeddingFunction.md)<`T`\> | An embedding function to use on this table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:90](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L90)
|
||||
|
||||
___
|
||||
|
||||
### createTableArrow
|
||||
|
||||
▸ **createTableArrow**(`name`, `table`): `Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `table` | `Table`<`any`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`number`[]\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:92](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L92)
|
||||
|
||||
___
|
||||
|
||||
### dropTable
|
||||
|
||||
▸ **dropTable**(`name`): `Promise`<`void`\>
|
||||
|
||||
Drop an existing table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table to drop. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:98](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L98)
|
||||
|
||||
___
|
||||
|
||||
### openTable
|
||||
|
||||
▸ **openTable**<`T`\>(`name`, `embeddings?`): `Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `embeddings?` | [`EmbeddingFunction`](EmbeddingFunction.md)<`T`\> | An embedding function to use on this table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)<`T`\>\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:80](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L80)
|
||||
|
||||
___
|
||||
|
||||
### tableNames
|
||||
|
||||
▸ **tableNames**(): `Promise`<`string`[]\>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`string`[]\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:72](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L72)
|
||||
30
docs/src/javascript/interfaces/ConnectionOptions.md
Normal file
30
docs/src/javascript/interfaces/ConnectionOptions.md
Normal file
@@ -0,0 +1,30 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / ConnectionOptions
|
||||
|
||||
# Interface: ConnectionOptions
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Properties
|
||||
|
||||
- [awsCredentials](ConnectionOptions.md#awscredentials)
|
||||
- [uri](ConnectionOptions.md#uri)
|
||||
|
||||
## Properties
|
||||
|
||||
### awsCredentials
|
||||
|
||||
• `Optional` **awsCredentials**: [`AwsCredentials`](AwsCredentials.md)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:40](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L40)
|
||||
|
||||
___
|
||||
|
||||
### uri
|
||||
|
||||
• **uri**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:39](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L39)
|
||||
@@ -45,7 +45,7 @@ Creates a vector representation for the given values.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/embedding_function.ts#L27)
|
||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/embedding_function.ts#L27)
|
||||
|
||||
___
|
||||
|
||||
@@ -57,4 +57,4 @@ The name of the column that will be used as input for the Embedding Function.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/31dab97/node/src/embedding/embedding_function.ts#L22)
|
||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/embedding_function.ts#L22)
|
||||
|
||||
149
docs/src/javascript/interfaces/IvfPQIndexConfig.md
Normal file
149
docs/src/javascript/interfaces/IvfPQIndexConfig.md
Normal file
@@ -0,0 +1,149 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / IvfPQIndexConfig
|
||||
|
||||
# Interface: IvfPQIndexConfig
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Properties
|
||||
|
||||
- [column](IvfPQIndexConfig.md#column)
|
||||
- [index\_name](IvfPQIndexConfig.md#index_name)
|
||||
- [max\_iters](IvfPQIndexConfig.md#max_iters)
|
||||
- [max\_opq\_iters](IvfPQIndexConfig.md#max_opq_iters)
|
||||
- [metric\_type](IvfPQIndexConfig.md#metric_type)
|
||||
- [num\_bits](IvfPQIndexConfig.md#num_bits)
|
||||
- [num\_partitions](IvfPQIndexConfig.md#num_partitions)
|
||||
- [num\_sub\_vectors](IvfPQIndexConfig.md#num_sub_vectors)
|
||||
- [replace](IvfPQIndexConfig.md#replace)
|
||||
- [type](IvfPQIndexConfig.md#type)
|
||||
- [use\_opq](IvfPQIndexConfig.md#use_opq)
|
||||
|
||||
## Properties
|
||||
|
||||
### column
|
||||
|
||||
• `Optional` **column**: `string`
|
||||
|
||||
The column to be indexed
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:382](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L382)
|
||||
|
||||
___
|
||||
|
||||
### index\_name
|
||||
|
||||
• `Optional` **index\_name**: `string`
|
||||
|
||||
A unique name for the index
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:387](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L387)
|
||||
|
||||
___
|
||||
|
||||
### max\_iters
|
||||
|
||||
• `Optional` **max\_iters**: `number`
|
||||
|
||||
The max number of iterations for kmeans training.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:402](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L402)
|
||||
|
||||
___
|
||||
|
||||
### max\_opq\_iters
|
||||
|
||||
• `Optional` **max\_opq\_iters**: `number`
|
||||
|
||||
Max number of iterations to train OPQ, if `use_opq` is true.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:421](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L421)
|
||||
|
||||
___
|
||||
|
||||
### metric\_type
|
||||
|
||||
• `Optional` **metric\_type**: [`MetricType`](../enums/MetricType.md)
|
||||
|
||||
Metric type, L2 or Cosine
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:392](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L392)
|
||||
|
||||
___
|
||||
|
||||
### num\_bits
|
||||
|
||||
• `Optional` **num\_bits**: `number`
|
||||
|
||||
The number of bits to present one PQ centroid.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:416](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L416)
|
||||
|
||||
___
|
||||
|
||||
### num\_partitions
|
||||
|
||||
• `Optional` **num\_partitions**: `number`
|
||||
|
||||
The number of partitions this index
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:397](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L397)
|
||||
|
||||
___
|
||||
|
||||
### num\_sub\_vectors
|
||||
|
||||
• `Optional` **num\_sub\_vectors**: `number`
|
||||
|
||||
Number of subvectors to build PQ code
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:412](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L412)
|
||||
|
||||
___
|
||||
|
||||
### replace
|
||||
|
||||
• `Optional` **replace**: `boolean`
|
||||
|
||||
Replace an existing index with the same name if it exists.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:426](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L426)
|
||||
|
||||
___
|
||||
|
||||
### type
|
||||
|
||||
• **type**: ``"ivf_pq"``
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:428](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L428)
|
||||
|
||||
___
|
||||
|
||||
### use\_opq
|
||||
|
||||
• `Optional` **use\_opq**: `boolean`
|
||||
|
||||
Train as optimized product quantization.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:407](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L407)
|
||||
221
docs/src/javascript/interfaces/Table.md
Normal file
221
docs/src/javascript/interfaces/Table.md
Normal file
@@ -0,0 +1,221 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / Table
|
||||
|
||||
# Interface: Table<T\>
|
||||
|
||||
A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||
|
||||
## Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
## Implemented by
|
||||
|
||||
- [`LocalTable`](../classes/LocalTable.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Properties
|
||||
|
||||
- [add](Table.md#add)
|
||||
- [countRows](Table.md#countrows)
|
||||
- [createIndex](Table.md#createindex)
|
||||
- [delete](Table.md#delete)
|
||||
- [name](Table.md#name)
|
||||
- [overwrite](Table.md#overwrite)
|
||||
- [search](Table.md#search)
|
||||
|
||||
## Properties
|
||||
|
||||
### add
|
||||
|
||||
• **add**: (`data`: `Record`<`string`, `unknown`\>[]) => `Promise`<`number`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table.
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:120](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L120)
|
||||
|
||||
___
|
||||
|
||||
### countRows
|
||||
|
||||
• **countRows**: () => `Promise`<`number`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (): `Promise`<`number`\>
|
||||
|
||||
Returns the number of rows in this table.
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:140](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L140)
|
||||
|
||||
___
|
||||
|
||||
### createIndex
|
||||
|
||||
• **createIndex**: (`indexParams`: [`IvfPQIndexConfig`](IvfPQIndexConfig.md)) => `Promise`<`any`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`indexParams`): `Promise`<`any`\>
|
||||
|
||||
Create an ANN index on this Table vector index.
|
||||
|
||||
**`See`**
|
||||
|
||||
VectorIndexParams.
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `indexParams` | [`IvfPQIndexConfig`](IvfPQIndexConfig.md) | The parameters of this Index, |
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`any`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:135](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L135)
|
||||
|
||||
___
|
||||
|
||||
### delete
|
||||
|
||||
• **delete**: (`filter`: `string`) => `Promise`<`void`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`filter`): `Promise`<`void`\>
|
||||
|
||||
Delete rows from this table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
sometimes no rows (if your predicate matches nothing).
|
||||
|
||||
**`Examples`**
|
||||
|
||||
```ts
|
||||
const con = await lancedb.connect("./.lancedb")
|
||||
const data = [
|
||||
{id: 1, vector: [1, 2]},
|
||||
{id: 2, vector: [3, 4]},
|
||||
{id: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await con.createTable("my_table", data)
|
||||
await tbl.delete("id = 2")
|
||||
await tbl.countRows() // Returns 2
|
||||
```
|
||||
|
||||
If you have a list of values to delete, you can combine them into a
|
||||
stringified list and use the `IN` operator:
|
||||
|
||||
```ts
|
||||
const to_remove = [1, 5];
|
||||
await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||
await tbl.countRows() // Returns 1
|
||||
```
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. The filter must not be empty. |
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`void`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:174](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L174)
|
||||
|
||||
___
|
||||
|
||||
### name
|
||||
|
||||
• **name**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:106](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L106)
|
||||
|
||||
___
|
||||
|
||||
### overwrite
|
||||
|
||||
• **overwrite**: (`data`: `Record`<`string`, `unknown`\>[]) => `Promise`<`number`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`data`): `Promise`<`number`\>
|
||||
|
||||
Insert records into this Table, replacing its contents.
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:128](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L128)
|
||||
|
||||
___
|
||||
|
||||
### search
|
||||
|
||||
• **search**: (`query`: `T`) => [`Query`](../classes/Query.md)<`T`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
▸ (`query`): [`Query`](../classes/Query.md)<`T`\>
|
||||
|
||||
Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
##### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `query` | `T` | The query search term |
|
||||
|
||||
##### Returns
|
||||
|
||||
[`Query`](../classes/Query.md)<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:112](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L112)
|
||||
@@ -11,14 +11,19 @@
|
||||
|
||||
### Classes
|
||||
|
||||
- [Connection](classes/Connection.md)
|
||||
- [LocalConnection](classes/LocalConnection.md)
|
||||
- [LocalTable](classes/LocalTable.md)
|
||||
- [OpenAIEmbeddingFunction](classes/OpenAIEmbeddingFunction.md)
|
||||
- [Query](classes/Query.md)
|
||||
- [Table](classes/Table.md)
|
||||
|
||||
### Interfaces
|
||||
|
||||
- [AwsCredentials](interfaces/AwsCredentials.md)
|
||||
- [Connection](interfaces/Connection.md)
|
||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
|
||||
- [IvfPQIndexConfig](interfaces/IvfPQIndexConfig.md)
|
||||
- [Table](interfaces/Table.md)
|
||||
|
||||
### Type Aliases
|
||||
|
||||
@@ -32,17 +37,17 @@
|
||||
|
||||
### VectorIndexParams
|
||||
|
||||
Ƭ **VectorIndexParams**: `IvfPQIndexConfig`
|
||||
Ƭ **VectorIndexParams**: [`IvfPQIndexConfig`](interfaces/IvfPQIndexConfig.md)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:224](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L224)
|
||||
[index.ts:431](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L431)
|
||||
|
||||
## Functions
|
||||
|
||||
### connect
|
||||
|
||||
▸ **connect**(`uri`): `Promise`<[`Connection`](classes/Connection.md)\>
|
||||
▸ **connect**(`uri`): `Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||
|
||||
Connect to a LanceDB instance at the given URI
|
||||
|
||||
@@ -54,8 +59,24 @@ Connect to a LanceDB instance at the given URI
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Connection`](classes/Connection.md)\>
|
||||
`Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:34](https://github.com/lancedb/lancedb/blob/31dab97/node/src/index.ts#L34)
|
||||
[index.ts:47](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L47)
|
||||
|
||||
▸ **connect**(`opts`): `Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `opts` | `Partial`<[`ConnectionOptions`](interfaces/ConnectionOptions.md)\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:48](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L48)
|
||||
|
||||
@@ -21,12 +21,13 @@ from argparse import ArgumentParser
|
||||
from multiprocessing import Pool
|
||||
|
||||
import lance
|
||||
import lancedb
|
||||
import pyarrow as pa
|
||||
from datasets import load_dataset
|
||||
from PIL import Image
|
||||
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast
|
||||
|
||||
import lancedb
|
||||
|
||||
MODEL_ID = "openai/clip-vit-base-patch32"
|
||||
|
||||
device = "cuda"
|
||||
|
||||
101
docs/src/python/arrow.md
Normal file
101
docs/src/python/arrow.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Pandas and PyArrow
|
||||
|
||||
|
||||
Built on top of [Apache Arrow](https://arrow.apache.org/),
|
||||
`LanceDB` is easy to integrate with the Python ecosystem, including [Pandas](https://pandas.pydata.org/)
|
||||
and PyArrow.
|
||||
|
||||
## Create dataset
|
||||
|
||||
First, we need to connect to a `LanceDB` database.
|
||||
|
||||
```py
|
||||
|
||||
import lancedb
|
||||
|
||||
db = lancedb.connect("data/sample-lancedb")
|
||||
```
|
||||
|
||||
Afterwards, we write a `Pandas DataFrame` to LanceDB directly.
|
||||
|
||||
```py
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame({
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
"item": ["foo", "bar"],
|
||||
"price": [10.0, 20.0]
|
||||
})
|
||||
table = db.create_table("pd_table", data=data)
|
||||
```
|
||||
|
||||
Similar to [`pyarrow.write_dataset()`](https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html),
|
||||
[db.create_table()](../python/#lancedb.db.DBConnection.create_table) accepts a wide-range of forms of data.
|
||||
|
||||
For example, if you have a dataset that is larger than memory size, you can create table with `Iterator[pyarrow.RecordBatch]`,
|
||||
to lazily generate data:
|
||||
|
||||
```py
|
||||
|
||||
from typing import Iterable
|
||||
import pyarrow as pa
|
||||
import lancedb
|
||||
|
||||
def make_batches() -> Iterable[pa.RecordBatch]:
|
||||
for i in range(5):
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
[
|
||||
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
pa.array(["foo", "bar"]),
|
||||
pa.array([10.0, 20.0]),
|
||||
],
|
||||
["vector", "item", "price"])
|
||||
|
||||
schema=pa.schema([
|
||||
pa.field("vector", pa.list_(pa.float32())),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("price", pa.float32()),
|
||||
])
|
||||
|
||||
table = db.create_table("iterable_table", data=make_batches(), schema=schema)
|
||||
```
|
||||
|
||||
You will find detailed instructions of creating dataset in
|
||||
[Basic Operations](../basic.md) and [API](../python/#lancedb.db.DBConnection.create_table)
|
||||
sections.
|
||||
|
||||
## Vector Search
|
||||
|
||||
We can now perform similarity search via `LanceDB` Python API.
|
||||
|
||||
```py
|
||||
# Open the table previously created.
|
||||
table = db.open_table("pd_table")
|
||||
|
||||
query_vector = [100, 100]
|
||||
# Pandas DataFrame
|
||||
df = table.search(query_vector).limit(1).to_df()
|
||||
print(df)
|
||||
```
|
||||
|
||||
```
|
||||
vector item price score
|
||||
0 [5.9, 26.5] bar 20.0 14257.05957
|
||||
```
|
||||
|
||||
If you have a simple filter, it's faster to provide a `where clause` to `LanceDB`'s search query.
|
||||
If you have more complex criteria, you can always apply the filter to the resulting Pandas `DataFrame`.
|
||||
|
||||
```python
|
||||
|
||||
# Apply the filter via LanceDB
|
||||
results = table.search([100, 100]).where("price < 15").to_df()
|
||||
assert len(results) == 1
|
||||
assert results["item"].iloc[0] == "foo"
|
||||
|
||||
# Apply the filter via Pandas
|
||||
df = results = table.search([100, 100]).to_df()
|
||||
results = df[df.price < 15]
|
||||
assert len(results) == 1
|
||||
assert results["item"].iloc[0] == "foo"
|
||||
```
|
||||
56
docs/src/python/duckdb.md
Normal file
56
docs/src/python/duckdb.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# DuckDB
|
||||
|
||||
`LanceDB` works with `DuckDB` via [PyArrow integration](https://duckdb.org/docs/guides/python/sql_on_arrow).
|
||||
|
||||
Let us start with installing `duckdb` and `lancedb`.
|
||||
|
||||
```shell
|
||||
pip install duckdb lancedb
|
||||
```
|
||||
|
||||
We will re-use [the dataset created previously](./arrow.md):
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import lancedb
|
||||
|
||||
db = lancedb.connect("data/sample-lancedb")
|
||||
data = pd.DataFrame({
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
"item": ["foo", "bar"],
|
||||
"price": [10.0, 20.0]
|
||||
})
|
||||
table = db.create_table("pd_table", data=data)
|
||||
arrow_table = table.to_arrow()
|
||||
```
|
||||
|
||||
`DuckDB` can directly query the `arrow_table`:
|
||||
|
||||
```python
|
||||
import duckdb
|
||||
|
||||
duckdb.query("SELECT * FROM arrow_table")
|
||||
```
|
||||
|
||||
```
|
||||
┌─────────────┬─────────┬────────┐
|
||||
│ vector │ item │ price │
|
||||
│ float[] │ varchar │ double │
|
||||
├─────────────┼─────────┼────────┤
|
||||
│ [3.1, 4.1] │ foo │ 10.0 │
|
||||
│ [5.9, 26.5] │ bar │ 20.0 │
|
||||
└─────────────┴─────────┴────────┘
|
||||
```
|
||||
|
||||
```py
|
||||
duckdb.query("SELECT mean(price) FROM arrow_table")
|
||||
```
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ mean(price) │
|
||||
│ double │
|
||||
├─────────────┤
|
||||
│ 15.0 │
|
||||
└─────────────┘
|
||||
```
|
||||
7
docs/src/python/integration.md
Normal file
7
docs/src/python/integration.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Integration
|
||||
|
||||
Built on top of [Apache Arrow](https://arrow.apache.org/),
|
||||
`LanceDB` is very easy to be integrate with Python ecosystems.
|
||||
|
||||
* [Pandas and Arrow Integration](./arrow.md)
|
||||
* [DuckDB Integration](./duckdb.md)
|
||||
35
docs/src/python/pydantic.md
Normal file
35
docs/src/python/pydantic.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Pydantic
|
||||
|
||||
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
||||
|
||||
## Schema
|
||||
|
||||
LanceDB supports to create Apache Arrow Schema from a
|
||||
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
||||
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
## Vector Field
|
||||
|
||||
LanceDB provides a [`vector(dim)`](python.md#lancedb.pydantic.vector) method to define a
|
||||
vector Field in a Pydantic Model.
|
||||
|
||||
::: lancedb.pydantic.vector
|
||||
|
||||
## Type Conversion
|
||||
|
||||
LanceDB automatically convert Pydantic fields to
|
||||
[Apache Arrow DataType](https://arrow.apache.org/docs/python/generated/pyarrow.DataType.html#pyarrow.DataType).
|
||||
|
||||
Current supported type conversions:
|
||||
|
||||
| Pydantic Field Type | PyArrow Data Type |
|
||||
| ------------------- | ----------------- |
|
||||
| `int` | `pyarrow.int64` |
|
||||
| `float` | `pyarrow.float64` |
|
||||
| `bool` | `pyarrow.bool` |
|
||||
| `str` | `pyarrow.utf8()` |
|
||||
| `list` | `pyarrow.List` |
|
||||
| `BaseModel` | `pyarrow.Struct` |
|
||||
| `vector(n)` | `pyarrow.FixedSizeList(float32, n)` |
|
||||
@@ -10,14 +10,16 @@ pip install lancedb
|
||||
|
||||
::: lancedb.connect
|
||||
|
||||
::: lancedb.LanceDBConnection
|
||||
::: lancedb.db.DBConnection
|
||||
|
||||
## Table
|
||||
|
||||
::: lancedb.table.LanceTable
|
||||
::: lancedb.table.Table
|
||||
|
||||
## Querying
|
||||
|
||||
::: lancedb.query.Query
|
||||
|
||||
::: lancedb.query.LanceQueryBuilder
|
||||
|
||||
::: lancedb.query.LanceFtsQueryBuilder
|
||||
@@ -41,3 +43,17 @@ pip install lancedb
|
||||
::: lancedb.fts.populate_index
|
||||
|
||||
::: lancedb.fts.search_index
|
||||
|
||||
## Utilities
|
||||
|
||||
::: lancedb.vector
|
||||
|
||||
## Integrations
|
||||
|
||||
### Pydantic
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
::: lancedb.pydantic.vector
|
||||
|
||||
|
||||
|
||||
@@ -18,26 +18,55 @@ Currently, we support the following metrics:
|
||||
| ----------- | ------------------------------------ |
|
||||
| `L2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
|
||||
| `Cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)|
|
||||
| `Dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) |
|
||||
|
||||
|
||||
## Search
|
||||
|
||||
### Flat Search
|
||||
|
||||
If LanceDB does not create a vector index, LanceDB would need to scan (`Flat Search`) the entire vector column
|
||||
and compute the distance for each vector in order to find the closest matches.
|
||||
|
||||
If there is no [vector index is created](ann_indexes.md), LanceDB will just brute-force scan
|
||||
the vector column and compute the distance.
|
||||
|
||||
<!-- Setup Code
|
||||
```python
|
||||
import lancedb
|
||||
import numpy as np
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
|
||||
data = [{"vector": row, "item": f"item {i}"}
|
||||
for i, row in enumerate(np.random.random((10_000, 1536)).astype('float32'))]
|
||||
|
||||
db.create_table("my_vectors", data=data)
|
||||
```
|
||||
-->
|
||||
<!-- Setup Code
|
||||
```javascript
|
||||
const vectordb_setup = require('vectordb')
|
||||
const db_setup = await vectordb_setup.connect('data/sample-lancedb')
|
||||
|
||||
let data = []
|
||||
for (let i = 0; i < 10_000; i++) {
|
||||
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||
}
|
||||
await db_setup.createTable('my_vectors', data)
|
||||
```
|
||||
-->
|
||||
=== "Python"
|
||||
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
import numpy as np
|
||||
|
||||
db = lancedb.connect("data/sample-lancedb")
|
||||
|
||||
tbl = db.open_table("my_vectors")
|
||||
|
||||
df = tbl.search(np.random.random((768)))
|
||||
.limit(10)
|
||||
df = tbl.search(np.random.random((1536))) \
|
||||
.limit(10) \
|
||||
.to_df()
|
||||
```
|
||||
|
||||
@@ -47,10 +76,10 @@ the vector column and compute the distance.
|
||||
const vectordb = require('vectordb')
|
||||
const db = await vectordb.connect('data/sample-lancedb')
|
||||
|
||||
tbl = db.open_table("my_vectors")
|
||||
const tbl = await db.openTable("my_vectors")
|
||||
|
||||
const results = await tbl.search(Array(768))
|
||||
.limit(20)
|
||||
const results_1 = await tbl.search(Array(1536).fill(1.2))
|
||||
.limit(10)
|
||||
.execute()
|
||||
```
|
||||
|
||||
@@ -60,26 +89,33 @@ as well.
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
df = tbl.search(np.random.random((768)))
|
||||
.metric("cosine")
|
||||
.limit(10)
|
||||
df = tbl.search(np.random.random((1536))) \
|
||||
.metric("cosine") \
|
||||
.limit(10) \
|
||||
.to_df()
|
||||
```
|
||||
|
||||
|
||||
=== "JavaScript"
|
||||
|
||||
```javascript
|
||||
const vectordb = require('vectordb')
|
||||
const db = await vectordb.connect('data/sample-lancedb')
|
||||
|
||||
tbl = db.open_table("my_vectors")
|
||||
|
||||
const results = await tbl.search(Array(768))
|
||||
.metric("cosine")
|
||||
.limit(20)
|
||||
const results_2 = await tbl.search(Array(1536).fill(1.2))
|
||||
.metricType("cosine")
|
||||
.limit(10)
|
||||
.execute()
|
||||
```
|
||||
|
||||
### Search with Vector Index.
|
||||
|
||||
### Approximate Nearest Neighbor (ANN) Search with Vector Index.
|
||||
|
||||
To accelerate vector retrievals, it is common to build vector indices.
|
||||
A vector index is a data structure specifically designed to efficiently organize and
|
||||
search vector data based on their similarity or distance metrics.
|
||||
By constructing a vector index, you can reduce the search space and avoid the need
|
||||
for brute-force scanning of the entire vector column.
|
||||
|
||||
However, fast vector search using indices often entails making a trade-off with accuracy to some extent.
|
||||
This is why it is often called **Approximate Nearest Neighbors (ANN)** search, while the Flat Search (KNN)
|
||||
always returns 100% recall.
|
||||
|
||||
See [ANN Index](ann_indexes.md) for more details.
|
||||
120
docs/src/sql.md
Normal file
120
docs/src/sql.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# SQL filters
|
||||
|
||||
LanceDB embraces the utilization of standard SQL expressions as predicates for hybrid
|
||||
filters. It can be used during hybrid vector search and deletion operations.
|
||||
|
||||
Currently, Lance supports a growing list of expressions.
|
||||
|
||||
* ``>``, ``>=``, ``<``, ``<=``, ``=``
|
||||
* ``AND``, ``OR``, ``NOT``
|
||||
* ``IS NULL``, ``IS NOT NULL``
|
||||
* ``IS TRUE``, ``IS NOT TRUE``, ``IS FALSE``, ``IS NOT FALSE``
|
||||
* ``IN``
|
||||
* ``LIKE``, ``NOT LIKE``
|
||||
* ``CAST``
|
||||
* ``regexp_match(column, pattern)``
|
||||
|
||||
For example, the following filter string is acceptable:
|
||||
<!-- Setup Code
|
||||
```python
|
||||
import lancedb
|
||||
import numpy as np
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri)
|
||||
|
||||
data = [{"vector": row, "item": f"item {i}"}
|
||||
for i, row in enumerate(np.random.random((10_000, 2)).astype('int'))]
|
||||
|
||||
tbl = db.create_table("my_vectors", data=data)
|
||||
```
|
||||
-->
|
||||
<!-- Setup Code
|
||||
```javascript
|
||||
const vectordb = require('vectordb')
|
||||
const db = await vectordb.connect('data/sample-lancedb')
|
||||
|
||||
let data = []
|
||||
for (let i = 0; i < 10_000; i++) {
|
||||
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
|
||||
}
|
||||
const tbl = await db.createTable('my_vectors', data)
|
||||
```
|
||||
-->
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
tbl.search([100, 102]) \
|
||||
.where("""(
|
||||
(label IN [10, 20])
|
||||
AND
|
||||
(note.email IS NOT NULL)
|
||||
) OR NOT note.created
|
||||
""")
|
||||
|
||||
```
|
||||
=== "Javascript"
|
||||
|
||||
```javascript
|
||||
tbl.search([100, 102])
|
||||
.where(`(
|
||||
(label IN [10, 20])
|
||||
AND
|
||||
(note.email IS NOT NULL)
|
||||
) OR NOT note.created
|
||||
`)
|
||||
```
|
||||
|
||||
|
||||
If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
|
||||
you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the
|
||||
path must be wrapped in backticks.
|
||||
|
||||
=== "SQL"
|
||||
```sql
|
||||
`CUBE` = 10 AND `column name with space` IS NOT NULL
|
||||
AND `nested with space`.`inner with space` < 2
|
||||
```
|
||||
|
||||
!!! warning
|
||||
Field names containing periods (``.``) are not supported.
|
||||
|
||||
Literals for dates, timestamps, and decimals can be written by writing the string
|
||||
value after the type name. For example
|
||||
|
||||
=== "SQL"
|
||||
```sql
|
||||
date_col = date '2021-01-01'
|
||||
and timestamp_col = timestamp '2021-01-01 00:00:00'
|
||||
and decimal_col = decimal(8,3) '1.000'
|
||||
```
|
||||
|
||||
For timestamp columns, the precision can be specified as a number in the type
|
||||
parameter. Microsecond precision (6) is the default.
|
||||
|
||||
| SQL | Time unit |
|
||||
|------------------|--------------|
|
||||
| ``timestamp(0)`` | Seconds |
|
||||
| ``timestamp(3)`` | Milliseconds |
|
||||
| ``timestamp(6)`` | Microseconds |
|
||||
| ``timestamp(9)`` | Nanoseconds |
|
||||
|
||||
LanceDB internally stores data in [Apache Arrow](https://arrow.apache.org/) format.
|
||||
The mapping from SQL types to Arrow types is:
|
||||
|
||||
| SQL type | Arrow type |
|
||||
|----------|------------|
|
||||
| ``boolean`` | ``Boolean`` |
|
||||
| ``tinyint`` / ``tinyint unsigned`` | ``Int8`` / ``UInt8`` |
|
||||
| ``smallint`` / ``smallint unsigned`` | ``Int16`` / ``UInt16`` |
|
||||
| ``int`` or ``integer`` / ``int unsigned`` or ``integer unsigned`` | ``Int32`` / ``UInt32`` |
|
||||
| ``bigint`` / ``bigint unsigned`` | ``Int64`` / ``UInt64`` |
|
||||
| ``float`` | ``Float32`` |
|
||||
| ``double`` | ``Float64`` |
|
||||
| ``decimal(precision, scale)`` | ``Decimal128`` |
|
||||
| ``date`` | ``Date32`` |
|
||||
| ``timestamp`` | ``Timestamp`` [^1] |
|
||||
| ``string`` | ``Utf8`` |
|
||||
| ``binary`` | ``Binary`` |
|
||||
|
||||
[^1]: See precision mapping in previous table.
|
||||
|
||||
52
docs/test/md_testing.js
Normal file
52
docs/test/md_testing.js
Normal file
@@ -0,0 +1,52 @@
|
||||
const glob = require("glob");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const excludedFiles = [
|
||||
"../src/fts.md",
|
||||
"../src/embedding.md",
|
||||
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||
"../src/examples/transformerjs_embedding_search_nodejs.md",
|
||||
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
||||
];
|
||||
const nodePrefix = "javascript";
|
||||
const nodeFile = ".js";
|
||||
const nodeFolder = "node";
|
||||
const globString = "../src/**/*.md";
|
||||
const asyncPrefix = "(async () => {\n";
|
||||
const asyncSuffix = "})();";
|
||||
|
||||
function* yieldLines(lines, prefix, suffix) {
|
||||
let inCodeBlock = false;
|
||||
for (const line of lines) {
|
||||
if (line.trim().startsWith(prefix + nodePrefix)) {
|
||||
inCodeBlock = true;
|
||||
} else if (inCodeBlock && line.trim().startsWith(suffix)) {
|
||||
inCodeBlock = false;
|
||||
yield "\n";
|
||||
} else if (inCodeBlock) {
|
||||
yield line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const files = glob.sync(globString, { recursive: true });
|
||||
|
||||
for (const file of files.filter((file) => !excludedFiles.includes(file))) {
|
||||
const lines = [];
|
||||
const data = fs.readFileSync(file, "utf-8");
|
||||
const fileLines = data.split("\n");
|
||||
|
||||
for (const line of yieldLines(fileLines, "```", "```")) {
|
||||
lines.push(line);
|
||||
}
|
||||
|
||||
if (lines.length > 0) {
|
||||
const fileName = path.basename(file, ".md");
|
||||
const outPath = path.join(nodeFolder, fileName, `${fileName}${nodeFile}`);
|
||||
console.log(outPath)
|
||||
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||
fs.writeFileSync(outPath, asyncPrefix + "\n" + lines.join("\n") + asyncSuffix);
|
||||
}
|
||||
}
|
||||
41
docs/test/md_testing.py
Normal file
41
docs/test/md_testing.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import glob
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
excluded_files = [
|
||||
"../src/fts.md",
|
||||
"../src/embedding.md",
|
||||
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||
"../src/examples/youtube_transcript_bot_with_nodejs.md"
|
||||
]
|
||||
|
||||
python_prefix = "py"
|
||||
python_file = ".py"
|
||||
python_folder = "python"
|
||||
glob_string = "../src/**/*.md"
|
||||
|
||||
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
|
||||
in_code_block = False
|
||||
# Python code has strict indentation
|
||||
strip_length = 0
|
||||
for line in lines:
|
||||
if line.strip().startswith(prefix + python_prefix):
|
||||
in_code_block = True
|
||||
strip_length = len(line) - len(line.lstrip())
|
||||
elif in_code_block and line.strip().startswith(suffix):
|
||||
in_code_block = False
|
||||
yield "\n"
|
||||
elif in_code_block:
|
||||
yield line[strip_length:]
|
||||
|
||||
for file in filter(lambda file: file not in excluded_files, glob.glob(glob_string, recursive=True)):
|
||||
with open(file, "r") as f:
|
||||
lines = list(yield_lines(iter(f), "```", "```"))
|
||||
|
||||
if len(lines) > 0:
|
||||
out_path = Path(python_folder) / Path(file).name.strip(".md") / (Path(file).name.strip(".md") + python_file)
|
||||
print(out_path)
|
||||
out_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
with open(out_path, "w") as out:
|
||||
out.writelines(lines)
|
||||
13
docs/test/package.json
Normal file
13
docs/test/package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "lancedb-docs-test",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"fs": "^0.0.1-security",
|
||||
"glob": "^10.2.7",
|
||||
"path": "^0.12.7",
|
||||
"vectordb": "https://gitpkg.now.sh/lancedb/lancedb/node?main"
|
||||
}
|
||||
}
|
||||
5
docs/test/requirements.txt
Normal file
5
docs/test/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
lancedb @ git+https://github.com/lancedb/lancedb.git#egg=subdir&subdirectory=python
|
||||
numpy
|
||||
pandas
|
||||
pylance
|
||||
duckdb
|
||||
@@ -12,5 +12,6 @@ module.exports = {
|
||||
sourceType: 'module'
|
||||
},
|
||||
rules: {
|
||||
"@typescript-eslint/method-signature-style": "off",
|
||||
}
|
||||
}
|
||||
|
||||
4
node/.npmignore
Normal file
4
node/.npmignore
Normal file
@@ -0,0 +1,4 @@
|
||||
gen_test_data.py
|
||||
index.node
|
||||
dist/lancedb*.tgz
|
||||
vectordb*.tgz
|
||||
@@ -8,15 +8,21 @@ A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb)
|
||||
npm install vectordb
|
||||
```
|
||||
|
||||
This will download the appropriate native library for your platform. We currently
|
||||
support x86_64 Linux, aarch64 Linux, Intel MacOS, and ARM (M1/M2) MacOS. We do not
|
||||
yet support Windows or musl-based Linux (such as Alpine Linux).
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
|
||||
```javascript
|
||||
const lancedb = require('vectordb');
|
||||
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>');
|
||||
const table = await db.openTable('my_table');
|
||||
const query = await table.search([0.1, 0.3]).setLimit(20).execute();
|
||||
const db = await lancedb.connect('data/sample-lancedb');
|
||||
const table = await db.createTable("my_table",
|
||||
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
||||
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
||||
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
@@ -24,17 +30,33 @@ The [examples](./examples) folder contains complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
The LanceDB javascript is built with npm:
|
||||
To build everything fresh:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
npm run tsc
|
||||
npm run build
|
||||
```
|
||||
|
||||
Then you should be able to run the tests with:
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
|
||||
### Rebuilding Rust library
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
### Rebuilding Typescript
|
||||
|
||||
```bash
|
||||
npm run tsc
|
||||
```
|
||||
|
||||
Run the tests with
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
### Fix lints
|
||||
|
||||
To run the linter and have it automatically fix all errors
|
||||
|
||||
|
||||
66
node/examples/js-transformers/index.js
Normal file
66
node/examples/js-transformers/index.js
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
'use strict'
|
||||
|
||||
|
||||
async function example() {
|
||||
|
||||
const lancedb = require('vectordb')
|
||||
|
||||
// Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
|
||||
const { pipeline } = await import('@xenova/transformers')
|
||||
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||
|
||||
|
||||
// Create embedding function from pipeline which returns a list of vectors from batch
|
||||
// sourceColumn is the name of the column in the data to be embedded
|
||||
//
|
||||
// Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
|
||||
const embed_fun = {}
|
||||
embed_fun.sourceColumn = 'text'
|
||||
embed_fun.embed = async function (batch) {
|
||||
let result = []
|
||||
for (let text of batch) {
|
||||
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||
result.push(Array.from(res['data']))
|
||||
}
|
||||
return (result)
|
||||
}
|
||||
|
||||
// Link a folder and create a table with data
|
||||
const db = await lancedb.connect('data/sample-lancedb')
|
||||
|
||||
const data = [
|
||||
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||
]
|
||||
|
||||
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||
|
||||
|
||||
// Query the table
|
||||
const results = await table
|
||||
.search("a sweet fruit to eat")
|
||||
.metricType("cosine")
|
||||
.limit(2)
|
||||
.execute()
|
||||
console.log(results.map(r => r.text))
|
||||
|
||||
}
|
||||
|
||||
example().then(_ => { console.log("Done!") })
|
||||
16
node/examples/js-transformers/package.json
Normal file
16
node/examples/js-transformers/package.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "vectordb-example-js-transformers",
|
||||
"version": "1.0.0",
|
||||
"description": "Example for using transformers.js with lancedb",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@xenova/transformers": "^2.4.1",
|
||||
"vectordb": "^0.1.12"
|
||||
}
|
||||
|
||||
}
|
||||
@@ -12,29 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
const { currentTarget } = require('@neon-rs/load');
|
||||
|
||||
let nativeLib;
|
||||
|
||||
function getPlatformLibrary() {
|
||||
if (process.platform === "darwin" && process.arch == "arm64") {
|
||||
return require('./aarch64-apple-darwin.node');
|
||||
} else if (process.platform === "darwin" && process.arch == "x64") {
|
||||
return require('./x86_64-apple-darwin.node');
|
||||
} else if (process.platform === "linux" && process.arch == "x64") {
|
||||
return require('./x86_64-unknown-linux-gnu.node');
|
||||
} else {
|
||||
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
nativeLib = require('./index.node')
|
||||
nativeLib = require(`@lancedb/vectordb-${currentTarget()}`);
|
||||
} catch (e) {
|
||||
if (e.code === "MODULE_NOT_FOUND") {
|
||||
nativeLib = getPlatformLibrary();
|
||||
} else {
|
||||
throw new Error('vectordb: failed to load native library. Please file a bug report at https://github.com/lancedb/lancedb/issues');
|
||||
try {
|
||||
// Might be developing locally, so try that. But don't expose that error
|
||||
// to the user.
|
||||
nativeLib = require("./index.node");
|
||||
} catch {
|
||||
throw new Error(`vectordb: failed to load native library.
|
||||
You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
|
||||
|
||||
If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
|
||||
|
||||
Source error: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = nativeLib
|
||||
|
||||
// Dynamic require for runtime.
|
||||
module.exports = nativeLib;
|
||||
|
||||
296
node/package-lock.json
generated
296
node/package-lock.json
generated
@@ -1,19 +1,32 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.15",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.15",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^12.0.0",
|
||||
"apache-arrow": "^12.0.0"
|
||||
"@neon-rs/load": "^0.0.74",
|
||||
"apache-arrow": "^12.0.0",
|
||||
"axios": "^1.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@neon-rs/cli": "^0.0.74",
|
||||
"@types/chai": "^4.3.4",
|
||||
"@types/chai-as-promised": "^7.1.5",
|
||||
"@types/mocha": "^10.0.1",
|
||||
"@types/node": "^18.16.2",
|
||||
"@types/sinon": "^10.0.15",
|
||||
@@ -21,9 +34,10 @@
|
||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||
"cargo-cp-artifact": "^0.1",
|
||||
"chai": "^4.3.7",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
"eslint": "^8.39.0",
|
||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-import": "^2.26.0",
|
||||
"eslint-plugin-n": "^15.7.0",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"mocha": "^10.2.0",
|
||||
@@ -35,6 +49,13 @@
|
||||
"typedoc": "^0.24.7",
|
||||
"typedoc-plugin-markdown": "^3.15.3",
|
||||
"typescript": "*"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.1.15",
|
||||
"@lancedb/vectordb-darwin-x64": "0.1.15",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.15",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.15",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.15"
|
||||
}
|
||||
},
|
||||
"node_modules/@apache-arrow/ts": {
|
||||
@@ -202,6 +223,20 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
|
||||
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"neon": "index.js"
|
||||
}
|
||||
},
|
||||
"node_modules/@neon-rs/load": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
|
||||
"integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
|
||||
},
|
||||
"node_modules/@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
@@ -311,6 +346,15 @@
|
||||
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/chai-as-promised": {
|
||||
"version": "7.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/chai-as-promised/-/chai-as-promised-7.1.5.tgz",
|
||||
"integrity": "sha512-jStwss93SITGBwt/niYrkf2C+/1KTeZCZl1LaeezTlqppAKeoQC7jxyqYuP72sxBGKCIbw7oHgbYssIRzT5FCQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/chai": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/command-line-args": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
||||
@@ -787,24 +831,6 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/array.prototype.flatmap": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"call-bind": "^1.0.2",
|
||||
"define-properties": "^1.1.4",
|
||||
"es-abstract": "^1.20.4",
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/assertion-error": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||
@@ -817,8 +843,7 @@
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||
"dev": true
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/available-typed-arrays": {
|
||||
"version": "1.0.5",
|
||||
@@ -833,12 +858,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "0.26.1",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||
"dev": true,
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
|
||||
"integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.14.8"
|
||||
"follow-redirects": "^1.15.0",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/balanced-match": {
|
||||
@@ -960,6 +986,18 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/chai-as-promised": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/chai-as-promised/-/chai-as-promised-7.1.1.tgz",
|
||||
"integrity": "sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"check-error": "^1.0.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"chai": ">= 2.1.2 < 5"
|
||||
}
|
||||
},
|
||||
"node_modules/chalk": {
|
||||
"version": "4.1.2",
|
||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
||||
@@ -1057,7 +1095,6 @@
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
@@ -1280,7 +1317,6 @@
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
@@ -1633,25 +1669,23 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import": {
|
||||
"version": "2.27.5",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
||||
"version": "2.26.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"array-includes": "^3.1.6",
|
||||
"array.prototype.flat": "^1.3.1",
|
||||
"array.prototype.flatmap": "^1.3.1",
|
||||
"debug": "^3.2.7",
|
||||
"array-includes": "^3.1.4",
|
||||
"array.prototype.flat": "^1.2.5",
|
||||
"debug": "^2.6.9",
|
||||
"doctrine": "^2.1.0",
|
||||
"eslint-import-resolver-node": "^0.3.7",
|
||||
"eslint-module-utils": "^2.7.4",
|
||||
"eslint-import-resolver-node": "^0.3.6",
|
||||
"eslint-module-utils": "^2.7.3",
|
||||
"has": "^1.0.3",
|
||||
"is-core-module": "^2.11.0",
|
||||
"is-core-module": "^2.8.1",
|
||||
"is-glob": "^4.0.3",
|
||||
"minimatch": "^3.1.2",
|
||||
"object.values": "^1.1.6",
|
||||
"resolve": "^1.22.1",
|
||||
"semver": "^6.3.0",
|
||||
"object.values": "^1.1.5",
|
||||
"resolve": "^1.22.0",
|
||||
"tsconfig-paths": "^3.14.1"
|
||||
},
|
||||
"engines": {
|
||||
@@ -1662,12 +1696,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/debug": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"ms": "^2.1.1"
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/doctrine": {
|
||||
@@ -1682,14 +1716,11 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-import/node_modules/semver": {
|
||||
"version": "6.3.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"semver": "bin/semver.js"
|
||||
}
|
||||
"node_modules/eslint-plugin-import/node_modules/ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/eslint-plugin-n": {
|
||||
"version": "15.7.0",
|
||||
@@ -2052,7 +2083,6 @@
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
@@ -2081,7 +2111,6 @@
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
@@ -2955,7 +2984,6 @@
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
@@ -2964,7 +2992,6 @@
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"mime-db": "1.52.0"
|
||||
},
|
||||
@@ -3258,6 +3285,15 @@
|
||||
"form-data": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/axios": {
|
||||
"version": "0.26.1",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.14.8"
|
||||
}
|
||||
},
|
||||
"node_modules/optionator": {
|
||||
"version": "0.9.1",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
|
||||
@@ -3409,6 +3445,11 @@
|
||||
"node": ">= 0.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
||||
@@ -3619,9 +3660,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.5.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
||||
"version": "7.5.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"lru-cache": "^6.0.0"
|
||||
@@ -4601,6 +4642,17 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"@neon-rs/cli": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
|
||||
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
|
||||
"dev": true
|
||||
},
|
||||
"@neon-rs/load": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
|
||||
"integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
|
||||
},
|
||||
"@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
@@ -4703,6 +4755,15 @@
|
||||
"integrity": "sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/chai-as-promised": {
|
||||
"version": "7.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/chai-as-promised/-/chai-as-promised-7.1.5.tgz",
|
||||
"integrity": "sha512-jStwss93SITGBwt/niYrkf2C+/1KTeZCZl1LaeezTlqppAKeoQC7jxyqYuP72sxBGKCIbw7oHgbYssIRzT5FCQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@types/chai": "*"
|
||||
}
|
||||
},
|
||||
"@types/command-line-args": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
||||
@@ -5038,18 +5099,6 @@
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"array.prototype.flatmap": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.1.tgz",
|
||||
"integrity": "sha512-8UGn9O1FDVvMNB0UlLv4voxRMze7+FpHyF5mSMRjWHUMlpoDViniy05870VlxhfgTnLbpuwTzvD76MTtWxB/mQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"call-bind": "^1.0.2",
|
||||
"define-properties": "^1.1.4",
|
||||
"es-abstract": "^1.20.4",
|
||||
"es-shim-unscopables": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"assertion-error": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||
@@ -5059,8 +5108,7 @@
|
||||
"asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||
"dev": true
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"available-typed-arrays": {
|
||||
"version": "1.0.5",
|
||||
@@ -5069,12 +5117,13 @@
|
||||
"dev": true
|
||||
},
|
||||
"axios": {
|
||||
"version": "0.26.1",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||
"dev": true,
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
|
||||
"integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
|
||||
"requires": {
|
||||
"follow-redirects": "^1.14.8"
|
||||
"follow-redirects": "^1.15.0",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"balanced-match": {
|
||||
@@ -5172,6 +5221,15 @@
|
||||
"type-detect": "^4.0.5"
|
||||
}
|
||||
},
|
||||
"chai-as-promised": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/chai-as-promised/-/chai-as-promised-7.1.1.tgz",
|
||||
"integrity": "sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"check-error": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"chalk": {
|
||||
"version": "4.1.2",
|
||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
|
||||
@@ -5245,7 +5303,6 @@
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
}
|
||||
@@ -5412,8 +5469,7 @@
|
||||
"delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"dev": true
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
|
||||
},
|
||||
"diff": {
|
||||
"version": "4.0.2",
|
||||
@@ -5707,35 +5763,33 @@
|
||||
}
|
||||
},
|
||||
"eslint-plugin-import": {
|
||||
"version": "2.27.5",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
|
||||
"integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
|
||||
"version": "2.26.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.26.0.tgz",
|
||||
"integrity": "sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"array-includes": "^3.1.6",
|
||||
"array.prototype.flat": "^1.3.1",
|
||||
"array.prototype.flatmap": "^1.3.1",
|
||||
"debug": "^3.2.7",
|
||||
"array-includes": "^3.1.4",
|
||||
"array.prototype.flat": "^1.2.5",
|
||||
"debug": "^2.6.9",
|
||||
"doctrine": "^2.1.0",
|
||||
"eslint-import-resolver-node": "^0.3.7",
|
||||
"eslint-module-utils": "^2.7.4",
|
||||
"eslint-import-resolver-node": "^0.3.6",
|
||||
"eslint-module-utils": "^2.7.3",
|
||||
"has": "^1.0.3",
|
||||
"is-core-module": "^2.11.0",
|
||||
"is-core-module": "^2.8.1",
|
||||
"is-glob": "^4.0.3",
|
||||
"minimatch": "^3.1.2",
|
||||
"object.values": "^1.1.6",
|
||||
"resolve": "^1.22.1",
|
||||
"semver": "^6.3.0",
|
||||
"object.values": "^1.1.5",
|
||||
"resolve": "^1.22.0",
|
||||
"tsconfig-paths": "^3.14.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"debug": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"ms": "^2.1.1"
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"doctrine": {
|
||||
@@ -5747,10 +5801,10 @@
|
||||
"esutils": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"semver": {
|
||||
"version": "6.3.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
|
||||
"integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
|
||||
"ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
@@ -5985,8 +6039,7 @@
|
||||
"follow-redirects": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
||||
"dev": true
|
||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
|
||||
},
|
||||
"for-each": {
|
||||
"version": "0.3.3",
|
||||
@@ -6001,7 +6054,6 @@
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
@@ -6615,14 +6667,12 @@
|
||||
"mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"dev": true
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
|
||||
},
|
||||
"mime-types": {
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"mime-db": "1.52.0"
|
||||
}
|
||||
@@ -6848,6 +6898,17 @@
|
||||
"requires": {
|
||||
"axios": "^0.26.0",
|
||||
"form-data": "^4.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": {
|
||||
"version": "0.26.1",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"follow-redirects": "^1.14.8"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"optionator": {
|
||||
@@ -6956,6 +7017,11 @@
|
||||
"integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
|
||||
"dev": true
|
||||
},
|
||||
"proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"punycode": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
||||
@@ -7078,9 +7144,9 @@
|
||||
}
|
||||
},
|
||||
"semver": {
|
||||
"version": "7.5.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.0.tgz",
|
||||
"integrity": "sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==",
|
||||
"version": "7.5.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz",
|
||||
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"lru-cache": "^6.0.0"
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.15",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"scripts": {
|
||||
"tsc": "tsc -b",
|
||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json-render-diagnostics",
|
||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
|
||||
"build-release": "npm run build -- --release",
|
||||
"test": "mocha -recursive dist/test",
|
||||
"test": "npm run tsc && mocha -recursive dist/test",
|
||||
"lint": "eslint src --ext .js,.ts",
|
||||
"clean": "rm -rf node_modules *.node dist/"
|
||||
"clean": "rm -rf node_modules *.node dist/",
|
||||
"pack-build": "neon pack-build",
|
||||
"check-npm": "printenv && which node && which npm && npm --version"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -25,7 +27,9 @@
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
"@neon-rs/cli": "^0.0.74",
|
||||
"@types/chai": "^4.3.4",
|
||||
"@types/chai-as-promised": "^7.1.5",
|
||||
"@types/mocha": "^10.0.1",
|
||||
"@types/node": "^18.16.2",
|
||||
"@types/sinon": "^10.0.15",
|
||||
@@ -33,9 +37,10 @@
|
||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||
"cargo-cp-artifact": "^0.1",
|
||||
"chai": "^4.3.7",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
"eslint": "^8.39.0",
|
||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-import": "^2.26.0",
|
||||
"eslint-plugin-n": "^15.7.0",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"mocha": "^10.2.0",
|
||||
@@ -50,6 +55,33 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^12.0.0",
|
||||
"apache-arrow": "^12.0.0"
|
||||
"@neon-rs/load": "^0.0.74",
|
||||
"apache-arrow": "^12.0.0",
|
||||
"axios": "^1.4.0"
|
||||
},
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"neon": {
|
||||
"targets": {
|
||||
"x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
|
||||
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
|
||||
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
|
||||
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
|
||||
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.1.15",
|
||||
"@lancedb/vectordb-darwin-x64": "0.1.15",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.15",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.15",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.15"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,42 +14,196 @@
|
||||
|
||||
import {
|
||||
RecordBatchFileWriter,
|
||||
type Table as ArrowTable,
|
||||
tableFromIPC,
|
||||
Vector
|
||||
type Table as ArrowTable
|
||||
} from 'apache-arrow'
|
||||
import { fromRecordsToBuffer } from './arrow'
|
||||
import type { EmbeddingFunction } from './embedding/embedding_function'
|
||||
import { RemoteConnection } from './remote'
|
||||
import { Query } from './query'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
|
||||
|
||||
export type { EmbeddingFunction }
|
||||
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
||||
|
||||
export interface AwsCredentials {
|
||||
accessKeyId: string
|
||||
|
||||
secretKey: string
|
||||
|
||||
sessionToken?: string
|
||||
}
|
||||
|
||||
export interface ConnectionOptions {
|
||||
uri: string
|
||||
|
||||
awsCredentials?: AwsCredentials
|
||||
|
||||
// API key for the remote connections
|
||||
apiKey?: string
|
||||
// Region to connect
|
||||
region?: string
|
||||
|
||||
// override the host for the remote connections
|
||||
hostOverride?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI
|
||||
* @param uri The uri of the database.
|
||||
*/
|
||||
export async function connect (uri: string): Promise<Connection> {
|
||||
const db = await databaseNew(uri)
|
||||
return new Connection(db, uri)
|
||||
export async function connect (uri: string): Promise<Connection>
|
||||
export async function connect (opts: Partial<ConnectionOptions>): Promise<Connection>
|
||||
export async function connect (arg: string | Partial<ConnectionOptions>): Promise<Connection> {
|
||||
let opts: ConnectionOptions
|
||||
if (typeof arg === 'string') {
|
||||
opts = { uri: arg }
|
||||
} else {
|
||||
// opts = { uri: arg.uri, awsCredentials = arg.awsCredentials }
|
||||
opts = Object.assign({
|
||||
uri: '',
|
||||
awsCredentials: undefined,
|
||||
apiKey: undefined,
|
||||
region: 'us-west-2'
|
||||
}, arg)
|
||||
}
|
||||
|
||||
if (opts.uri.startsWith('db://')) {
|
||||
// Remote connection
|
||||
return new RemoteConnection(opts)
|
||||
}
|
||||
const db = await databaseNew(opts.uri)
|
||||
return new LocalConnection(db, opts)
|
||||
}
|
||||
|
||||
/**
|
||||
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||
*
|
||||
* Connection could be local against filesystem or remote against a server.
|
||||
*/
|
||||
export interface Connection {
|
||||
uri: string
|
||||
|
||||
tableNames(): Promise<string[]>
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param embeddings An embedding function to use on this table
|
||||
*/
|
||||
openTable<T>(name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
* @param {string} name - The name of the table.
|
||||
* @param data - Non-empty Array of Records to be inserted into the table
|
||||
* @param {WriteMode} mode - The write mode to use when creating the table.
|
||||
* @param {EmbeddingFunction} embeddings - An embedding function to use on this table
|
||||
*/
|
||||
createTable<T>(name: string, data: Array<Record<string, unknown>>, mode?: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
|
||||
createTableArrow(name: string, table: ArrowTable): Promise<Table>
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param name The name of the table to drop.
|
||||
*/
|
||||
dropTable(name: string): Promise<void>
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||
*/
|
||||
export interface Table<T = number[]> {
|
||||
name: string
|
||||
|
||||
/**
|
||||
* Creates a search query to find the nearest neighbors of the given search term
|
||||
* @param query The query search term
|
||||
*/
|
||||
search: (query: T) => Query<T>
|
||||
|
||||
/**
|
||||
* Insert records into this Table.
|
||||
*
|
||||
* @param data Records to be inserted into the Table
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
add: (data: Array<Record<string, unknown>>) => Promise<number>
|
||||
|
||||
/**
|
||||
* Insert records into this Table, replacing its contents.
|
||||
*
|
||||
* @param data Records to be inserted into the Table
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
overwrite: (data: Array<Record<string, unknown>>) => Promise<number>
|
||||
|
||||
/**
|
||||
* Create an ANN index on this Table vector index.
|
||||
*
|
||||
* @param indexParams The parameters of this Index, @see VectorIndexParams.
|
||||
*/
|
||||
createIndex: (indexParams: VectorIndexParams) => Promise<any>
|
||||
|
||||
/**
|
||||
* Returns the number of rows in this table.
|
||||
*/
|
||||
countRows: () => Promise<number>
|
||||
|
||||
/**
|
||||
* Delete rows from this table.
|
||||
*
|
||||
* This can be used to delete a single row, many rows, all rows, or
|
||||
* sometimes no rows (if your predicate matches nothing).
|
||||
*
|
||||
* @param filter A filter in the same format used by a sql WHERE clause. The
|
||||
* filter must not be empty.
|
||||
*
|
||||
* @examples
|
||||
*
|
||||
* ```ts
|
||||
* const con = await lancedb.connect("./.lancedb")
|
||||
* const data = [
|
||||
* {id: 1, vector: [1, 2]},
|
||||
* {id: 2, vector: [3, 4]},
|
||||
* {id: 3, vector: [5, 6]},
|
||||
* ];
|
||||
* const tbl = await con.createTable("my_table", data)
|
||||
* await tbl.delete("id = 2")
|
||||
* await tbl.countRows() // Returns 2
|
||||
* ```
|
||||
*
|
||||
* If you have a list of values to delete, you can combine them into a
|
||||
* stringified list and use the `IN` operator:
|
||||
*
|
||||
* ```ts
|
||||
* const to_remove = [1, 5];
|
||||
* await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||
* await tbl.countRows() // Returns 1
|
||||
* ```
|
||||
*/
|
||||
delete: (filter: string) => Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* A connection to a LanceDB database.
|
||||
*/
|
||||
export class Connection {
|
||||
private readonly _uri: string
|
||||
export class LocalConnection implements Connection {
|
||||
private readonly _options: ConnectionOptions
|
||||
private readonly _db: any
|
||||
|
||||
constructor (db: any, uri: string) {
|
||||
this._uri = uri
|
||||
constructor (db: any, options: ConnectionOptions) {
|
||||
this._options = options
|
||||
this._db = db
|
||||
}
|
||||
|
||||
get uri (): string {
|
||||
return this._uri
|
||||
return this._options.uri
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -65,6 +219,7 @@ export class Connection {
|
||||
* @param name The name of the table.
|
||||
*/
|
||||
async openTable (name: string): Promise<Table>
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
*
|
||||
@@ -72,12 +227,13 @@ export class Connection {
|
||||
* @param embeddings An embedding function to use on this Table
|
||||
*/
|
||||
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
const tbl = await databaseOpenTable.call(this._db, name)
|
||||
if (embeddings !== undefined) {
|
||||
return new Table(tbl, name, embeddings)
|
||||
return new LocalTable(tbl, name, this._options, embeddings)
|
||||
} else {
|
||||
return new Table(tbl, name)
|
||||
return new LocalTable(tbl, name, this._options)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,23 +242,41 @@ export class Connection {
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param data Non-empty Array of Records to be inserted into the Table
|
||||
* @param mode The write mode to use when creating the table.
|
||||
*/
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>, mode?: WriteMode): Promise<Table>
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>, mode: WriteMode): Promise<Table>
|
||||
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param data Non-empty Array of Records to be inserted into the Table
|
||||
* @param mode The write mode to use when creating the table.
|
||||
* @param embeddings An embedding function to use on this Table
|
||||
*/
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
if (mode === undefined) {
|
||||
mode = WriteMode.Create
|
||||
}
|
||||
|
||||
const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), mode.toLowerCase()]
|
||||
if (this._options.awsCredentials !== undefined) {
|
||||
createArgs.push(this._options.awsCredentials.accessKeyId)
|
||||
createArgs.push(this._options.awsCredentials.secretKey)
|
||||
if (this._options.awsCredentials.sessionToken !== undefined) {
|
||||
createArgs.push(this._options.awsCredentials.sessionToken)
|
||||
}
|
||||
}
|
||||
|
||||
const tbl = await tableCreate.call(...createArgs)
|
||||
|
||||
if (embeddings !== undefined) {
|
||||
return new Table(tbl, name, embeddings)
|
||||
return new LocalTable(tbl, name, this._options, embeddings)
|
||||
} else {
|
||||
return new Table(tbl, name)
|
||||
return new LocalTable(tbl, name, this._options)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,24 +285,35 @@ export class Connection {
|
||||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
||||
return await this.openTable(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param name The name of the table to drop.
|
||||
*/
|
||||
async dropTable (name: string): Promise<void> {
|
||||
await databaseDropTable.call(this._db, name)
|
||||
}
|
||||
}
|
||||
|
||||
export class Table<T = number[]> {
|
||||
export class LocalTable<T = number[]> implements Table<T> {
|
||||
private readonly _tbl: any
|
||||
private readonly _name: string
|
||||
private readonly _embeddings?: EmbeddingFunction<T>
|
||||
private readonly _options: ConnectionOptions
|
||||
|
||||
constructor (tbl: any, name: string)
|
||||
constructor (tbl: any, name: string, options: ConnectionOptions)
|
||||
/**
|
||||
* @param tbl
|
||||
* @param name
|
||||
* @param options
|
||||
* @param embeddings An embedding function to use when interacting with this table
|
||||
*/
|
||||
constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>)
|
||||
constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) {
|
||||
constructor (tbl: any, name: string, options: ConnectionOptions, embeddings: EmbeddingFunction<T>)
|
||||
constructor (tbl: any, name: string, options: ConnectionOptions, embeddings?: EmbeddingFunction<T>) {
|
||||
this._tbl = tbl
|
||||
this._name = name
|
||||
this._embeddings = embeddings
|
||||
this._options = options
|
||||
}
|
||||
|
||||
get name (): string {
|
||||
@@ -140,7 +325,7 @@ export class Table<T = number[]> {
|
||||
* @param query The query search term
|
||||
*/
|
||||
search (query: T): Query<T> {
|
||||
return new Query(this._tbl, query, this._embeddings)
|
||||
return new Query(query, this._tbl, this._embeddings)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -150,7 +335,15 @@ export class Table<T = number[]> {
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async add (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString())
|
||||
const callArgs = [this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString()]
|
||||
if (this._options.awsCredentials !== undefined) {
|
||||
callArgs.push(this._options.awsCredentials.accessKeyId)
|
||||
callArgs.push(this._options.awsCredentials.secretKey)
|
||||
if (this._options.awsCredentials.sessionToken !== undefined) {
|
||||
callArgs.push(this._options.awsCredentials.sessionToken)
|
||||
}
|
||||
}
|
||||
return tableAdd.call(...callArgs)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -160,6 +353,14 @@ export class Table<T = number[]> {
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
const callArgs = [this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString()]
|
||||
if (this._options.awsCredentials !== undefined) {
|
||||
callArgs.push(this._options.awsCredentials.accessKeyId)
|
||||
callArgs.push(this._options.awsCredentials.secretKey)
|
||||
if (this._options.awsCredentials.sessionToken !== undefined) {
|
||||
callArgs.push(this._options.awsCredentials.sessionToken)
|
||||
}
|
||||
}
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString())
|
||||
}
|
||||
|
||||
@@ -173,14 +374,25 @@ export class Table<T = number[]> {
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use [Table.createIndex]
|
||||
* Returns the number of rows in this table.
|
||||
*/
|
||||
async create_index (indexParams: VectorIndexParams): Promise<any> {
|
||||
return await this.createIndex(indexParams)
|
||||
async countRows (): Promise<number> {
|
||||
return tableCountRows.call(this._tbl)
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete rows from this table.
|
||||
*
|
||||
* @param filter A filter in the same format used by a sql WHERE clause.
|
||||
*/
|
||||
async delete (filter: string): Promise<void> {
|
||||
return tableDelete.call(this._tbl, filter)
|
||||
}
|
||||
}
|
||||
|
||||
interface IvfPQIndexConfig {
|
||||
/// Config to build IVF_PQ index.
|
||||
///
|
||||
export interface IvfPQIndexConfig {
|
||||
/**
|
||||
* The column to be indexed
|
||||
*/
|
||||
@@ -225,120 +437,25 @@ interface IvfPQIndexConfig {
|
||||
*/
|
||||
max_opq_iters?: number
|
||||
|
||||
/**
|
||||
* Replace an existing index with the same name if it exists.
|
||||
*/
|
||||
replace?: boolean
|
||||
|
||||
type: 'ivf_pq'
|
||||
}
|
||||
|
||||
export type VectorIndexParams = IvfPQIndexConfig
|
||||
|
||||
/**
|
||||
* A builder for nearest neighbor queries for LanceDB.
|
||||
* Write mode for writing a table.
|
||||
*/
|
||||
export class Query<T = number[]> {
|
||||
private readonly _tbl: any
|
||||
private readonly _query: T
|
||||
private _queryVector?: number[]
|
||||
private _limit: number
|
||||
private _refineFactor?: number
|
||||
private _nprobes: number
|
||||
private _select?: string[]
|
||||
private _filter?: string
|
||||
private _metricType?: MetricType
|
||||
private readonly _embeddings?: EmbeddingFunction<T>
|
||||
|
||||
constructor (tbl: any, query: T, embeddings?: EmbeddingFunction<T>) {
|
||||
this._tbl = tbl
|
||||
this._query = query
|
||||
this._limit = 10
|
||||
this._nprobes = 20
|
||||
this._refineFactor = undefined
|
||||
this._select = undefined
|
||||
this._filter = undefined
|
||||
this._metricType = undefined
|
||||
this._embeddings = embeddings
|
||||
}
|
||||
|
||||
/***
|
||||
* Sets the number of results that will be returned
|
||||
* @param value number of results
|
||||
*/
|
||||
limit (value: number): Query<T> {
|
||||
this._limit = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Refine the results by reading extra elements and re-ranking them in memory.
|
||||
* @param value refine factor to use in this query.
|
||||
*/
|
||||
refineFactor (value: number): Query<T> {
|
||||
this._refineFactor = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of probes used. A higher number makes search more accurate but also slower.
|
||||
* @param value The number of probes used.
|
||||
*/
|
||||
nprobes (value: number): Query<T> {
|
||||
this._nprobes = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
* @param value A filter in the same format used by a sql WHERE clause.
|
||||
*/
|
||||
filter (value: string): Query<T> {
|
||||
this._filter = value
|
||||
return this
|
||||
}
|
||||
|
||||
/** Return only the specified columns.
|
||||
*
|
||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
||||
*/
|
||||
select (value: string[]): Query<T> {
|
||||
this._select = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* The MetricType used for this Query.
|
||||
* @param value The metric to the. @see MetricType for the different options
|
||||
*/
|
||||
metricType (value: MetricType): Query<T> {
|
||||
this._metricType = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the query and return the results as an Array of Objects
|
||||
*/
|
||||
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
||||
if (this._embeddings !== undefined) {
|
||||
this._queryVector = (await this._embeddings.embed([this._query]))[0]
|
||||
} else {
|
||||
this._queryVector = this._query as number[]
|
||||
}
|
||||
|
||||
const buffer = await tableSearch.call(this._tbl, this)
|
||||
const data = tableFromIPC(buffer)
|
||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||
const newObject: Record<string, unknown> = {}
|
||||
Object.keys(entry).forEach((key: string) => {
|
||||
if (entry[key] instanceof Vector) {
|
||||
newObject[key] = (entry[key] as Vector).toArray()
|
||||
} else {
|
||||
newObject[key] = entry[key]
|
||||
}
|
||||
})
|
||||
return newObject as unknown as T
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export enum WriteMode {
|
||||
/** Create a new {@link Table}. */
|
||||
Create = 'create',
|
||||
/** Overwrite the existing {@link Table} if presented. */
|
||||
Overwrite = 'overwrite',
|
||||
/** Append new data to the table. */
|
||||
Append = 'append'
|
||||
}
|
||||
|
||||
@@ -354,5 +471,10 @@ export enum MetricType {
|
||||
/**
|
||||
* Cosine distance
|
||||
*/
|
||||
Cosine = 'cosine'
|
||||
Cosine = 'cosine',
|
||||
|
||||
/**
|
||||
* Dot product
|
||||
*/
|
||||
Dot = 'dot'
|
||||
}
|
||||
|
||||
130
node/src/query.ts
Normal file
130
node/src/query.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { Vector, tableFromIPC } from 'apache-arrow'
|
||||
import { type EmbeddingFunction } from './embedding/embedding_function'
|
||||
import { type MetricType } from '.'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { tableSearch } = require('../native.js')
|
||||
|
||||
/**
|
||||
* A builder for nearest neighbor queries for LanceDB.
|
||||
*/
|
||||
export class Query<T = number[]> {
|
||||
private readonly _query: T
|
||||
private readonly _tbl?: any
|
||||
private _queryVector?: number[]
|
||||
private _limit: number
|
||||
private _refineFactor?: number
|
||||
private _nprobes: number
|
||||
private _select?: string[]
|
||||
private _filter?: string
|
||||
private _metricType?: MetricType
|
||||
protected readonly _embeddings?: EmbeddingFunction<T>
|
||||
|
||||
constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
|
||||
this._tbl = tbl
|
||||
this._query = query
|
||||
this._limit = 10
|
||||
this._nprobes = 20
|
||||
this._refineFactor = undefined
|
||||
this._select = undefined
|
||||
this._filter = undefined
|
||||
this._metricType = undefined
|
||||
this._embeddings = embeddings
|
||||
}
|
||||
|
||||
/***
|
||||
* Sets the number of results that will be returned
|
||||
* @param value number of results
|
||||
*/
|
||||
limit (value: number): Query<T> {
|
||||
this._limit = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Refine the results by reading extra elements and re-ranking them in memory.
|
||||
* @param value refine factor to use in this query.
|
||||
*/
|
||||
refineFactor (value: number): Query<T> {
|
||||
this._refineFactor = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of probes used. A higher number makes search more accurate but also slower.
|
||||
* @param value The number of probes used.
|
||||
*/
|
||||
nprobes (value: number): Query<T> {
|
||||
this._nprobes = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
* @param value A filter in the same format used by a sql WHERE clause.
|
||||
*/
|
||||
filter (value: string): Query<T> {
|
||||
this._filter = value
|
||||
return this
|
||||
}
|
||||
|
||||
where = this.filter
|
||||
|
||||
/** Return only the specified columns.
|
||||
*
|
||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
||||
*/
|
||||
select (value: string[]): Query<T> {
|
||||
this._select = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* The MetricType used for this Query.
|
||||
* @param value The metric to the. @see MetricType for the different options
|
||||
*/
|
||||
metricType (value: MetricType): Query<T> {
|
||||
this._metricType = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the query and return the results as an Array of Objects
|
||||
*/
|
||||
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
||||
if (this._embeddings !== undefined) {
|
||||
this._queryVector = (await this._embeddings.embed([this._query]))[0]
|
||||
} else {
|
||||
this._queryVector = this._query as number[]
|
||||
}
|
||||
|
||||
const buffer = await tableSearch.call(this._tbl, this)
|
||||
const data = tableFromIPC(buffer)
|
||||
|
||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||
const newObject: Record<string, unknown> = {}
|
||||
Object.keys(entry).forEach((key: string) => {
|
||||
if (entry[key] instanceof Vector) {
|
||||
newObject[key] = (entry[key] as Vector).toArray()
|
||||
} else {
|
||||
newObject[key] = entry[key]
|
||||
}
|
||||
})
|
||||
return newObject as unknown as T
|
||||
})
|
||||
}
|
||||
}
|
||||
105
node/src/remote/client.ts
Normal file
105
node/src/remote/client.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import axios, { type AxiosResponse } from 'axios'
|
||||
|
||||
import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
|
||||
|
||||
export class HttpLancedbClient {
|
||||
private readonly _url: string
|
||||
|
||||
public constructor (
|
||||
url: string,
|
||||
private readonly _apiKey: string,
|
||||
private readonly _dbName?: string
|
||||
) {
|
||||
this._url = url
|
||||
}
|
||||
|
||||
get uri (): string {
|
||||
return this._url
|
||||
}
|
||||
|
||||
public async search (
|
||||
tableName: string,
|
||||
vector: number[],
|
||||
k: number,
|
||||
nprobes: number,
|
||||
refineFactor?: number,
|
||||
columns?: string[],
|
||||
filter?: string
|
||||
): Promise<ArrowTable<any>> {
|
||||
const response = await axios.post(
|
||||
`${this._url}/v1/table/${tableName}/query/`,
|
||||
{
|
||||
vector,
|
||||
k,
|
||||
nprobes,
|
||||
refineFactor,
|
||||
columns,
|
||||
filter
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': this._apiKey,
|
||||
...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
|
||||
},
|
||||
responseType: 'arraybuffer',
|
||||
timeout: 10000
|
||||
}
|
||||
).catch((err) => {
|
||||
console.error('error: ', err)
|
||||
return err.response
|
||||
})
|
||||
if (response.status !== 200) {
|
||||
const errorData = new TextDecoder().decode(response.data)
|
||||
throw new Error(
|
||||
`Server Error, status: ${response.status as number}, ` +
|
||||
`message: ${response.statusText as string}: ${errorData}`
|
||||
)
|
||||
}
|
||||
|
||||
const table = tableFromIPC(response.data)
|
||||
return table
|
||||
}
|
||||
|
||||
/**
|
||||
* Sent GET request.
|
||||
*/
|
||||
public async get (path: string, params?: Record<string, string | number>): Promise<AxiosResponse> {
|
||||
const response = await axios.get(
|
||||
`${this._url}${path}`,
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': this._apiKey
|
||||
},
|
||||
params,
|
||||
timeout: 10000
|
||||
}
|
||||
).catch((err) => {
|
||||
console.error('error: ', err)
|
||||
return err.response
|
||||
})
|
||||
if (response.status !== 200) {
|
||||
const errorData = new TextDecoder().decode(response.data)
|
||||
throw new Error(
|
||||
`Server Error, status: ${response.status as number}, ` +
|
||||
`message: ${response.statusText as string}: ${errorData}`
|
||||
)
|
||||
}
|
||||
return response
|
||||
}
|
||||
}
|
||||
168
node/src/remote/index.ts
Normal file
168
node/src/remote/index.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
type EmbeddingFunction, type Table, type VectorIndexParams, type Connection,
|
||||
type ConnectionOptions
|
||||
} from '../index'
|
||||
import { Query } from '../query'
|
||||
|
||||
import { type Table as ArrowTable, Vector } from 'apache-arrow'
|
||||
import { HttpLancedbClient } from './client'
|
||||
|
||||
/**
|
||||
* Remote connection.
|
||||
*/
|
||||
export class RemoteConnection implements Connection {
|
||||
private readonly _client: HttpLancedbClient
|
||||
private readonly _dbName: string
|
||||
|
||||
constructor (opts: ConnectionOptions) {
|
||||
if (!opts.uri.startsWith('db://')) {
|
||||
throw new Error(`Invalid remote DB URI: ${opts.uri}`)
|
||||
}
|
||||
if (opts.apiKey === undefined || opts.region === undefined) {
|
||||
throw new Error('API key and region are not supported for remote connections')
|
||||
}
|
||||
|
||||
this._dbName = opts.uri.slice('db://'.length)
|
||||
let server: string
|
||||
if (opts.hostOverride === undefined) {
|
||||
server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
|
||||
} else {
|
||||
server = opts.hostOverride
|
||||
}
|
||||
this._client = new HttpLancedbClient(server, opts.apiKey, opts.hostOverride === undefined ? undefined : this._dbName)
|
||||
}
|
||||
|
||||
get uri (): string {
|
||||
// add the lancedb+ prefix back
|
||||
return 'db://' + this._client.uri
|
||||
}
|
||||
|
||||
async tableNames (): Promise<string[]> {
|
||||
const response = await this._client.get('/v1/table/')
|
||||
return response.data.tables
|
||||
}
|
||||
|
||||
async openTable (name: string): Promise<Table>
|
||||
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
if (embeddings !== undefined) {
|
||||
return new RemoteTable(this._client, name, embeddings)
|
||||
} else {
|
||||
return new RemoteTable(this._client, name)
|
||||
}
|
||||
}
|
||||
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async dropTable (name: string): Promise<void> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
}
|
||||
|
||||
export class RemoteQuery<T = number[]> extends Query<T> {
|
||||
constructor (query: T, private readonly _client: HttpLancedbClient,
|
||||
private readonly _name: string, embeddings?: EmbeddingFunction<T>) {
|
||||
super(query, undefined, embeddings)
|
||||
}
|
||||
|
||||
// TODO: refactor this to a base class + queryImpl pattern
|
||||
async execute<T = Record<string, unknown>>(): Promise<T[]> {
|
||||
const embeddings = this._embeddings
|
||||
const query = (this as any)._query
|
||||
let queryVector: number[]
|
||||
|
||||
if (embeddings !== undefined) {
|
||||
queryVector = (await embeddings.embed([query]))[0]
|
||||
} else {
|
||||
queryVector = query as number[]
|
||||
}
|
||||
|
||||
const data = await this._client.search(
|
||||
this._name,
|
||||
queryVector,
|
||||
(this as any)._limit,
|
||||
(this as any)._nprobes,
|
||||
(this as any)._refineFactor,
|
||||
(this as any)._select,
|
||||
(this as any)._filter
|
||||
)
|
||||
|
||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||
const newObject: Record<string, unknown> = {}
|
||||
Object.keys(entry).forEach((key: string) => {
|
||||
if (entry[key] instanceof Vector) {
|
||||
newObject[key] = (entry[key] as Vector).toArray()
|
||||
} else {
|
||||
newObject[key] = entry[key]
|
||||
}
|
||||
})
|
||||
return newObject as unknown as T
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// we are using extend until we have next next version release
|
||||
// Table and Connection has both been refactored to interfaces
|
||||
export class RemoteTable<T = number[]> implements Table<T> {
|
||||
private readonly _client: HttpLancedbClient
|
||||
private readonly _embeddings?: EmbeddingFunction<T>
|
||||
private readonly _name: string
|
||||
|
||||
constructor (client: HttpLancedbClient, name: string)
|
||||
constructor (client: HttpLancedbClient, name: string, embeddings: EmbeddingFunction<T>)
|
||||
constructor (client: HttpLancedbClient, name: string, embeddings?: EmbeddingFunction<T>) {
|
||||
this._client = client
|
||||
this._name = name
|
||||
this._embeddings = embeddings
|
||||
}
|
||||
|
||||
get name (): string {
|
||||
return this._name
|
||||
}
|
||||
|
||||
search (query: T): Query<T> {
|
||||
return new RemoteQuery(query, this._client, this._name)//, this._embeddings_new)
|
||||
}
|
||||
|
||||
async add (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async createIndex (indexParams: VectorIndexParams): Promise<any> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async countRows (): Promise<number> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
|
||||
async delete (filter: string): Promise<void> {
|
||||
throw new Error('Not implemented')
|
||||
}
|
||||
}
|
||||
@@ -18,26 +18,48 @@ import { describe } from 'mocha'
|
||||
import { assert } from 'chai'
|
||||
|
||||
import * as lancedb from '../index'
|
||||
import { type ConnectionOptions } from '../index'
|
||||
|
||||
describe('LanceDB S3 client', function () {
|
||||
if (process.env.TEST_S3_BASE_URL != null) {
|
||||
const baseUri = process.env.TEST_S3_BASE_URL
|
||||
it('should have a valid url', async function () {
|
||||
const uri = `${baseUri}/valid_url`
|
||||
const table = await createTestDB(uri, 2, 20)
|
||||
const con = await lancedb.connect(uri)
|
||||
assert.equal(con.uri, uri)
|
||||
const opts = { uri: `${baseUri}/valid_url` }
|
||||
const table = await createTestDB(opts, 2, 20)
|
||||
const con = await lancedb.connect(opts)
|
||||
assert.equal(con.uri, opts.uri)
|
||||
|
||||
const results = await table.search([0.1, 0.3]).limit(5).execute()
|
||||
assert.equal(results.length, 5)
|
||||
})
|
||||
}).timeout(10_000)
|
||||
} else {
|
||||
describe.skip('Skip S3 test', function () {})
|
||||
}
|
||||
|
||||
if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
|
||||
const baseUri = process.env.TEST_S3_BASE_URL
|
||||
it('use custom credentials', async function () {
|
||||
const opts: ConnectionOptions = {
|
||||
uri: `${baseUri}/custom_credentials`,
|
||||
awsCredentials: {
|
||||
accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
|
||||
secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
|
||||
}
|
||||
}
|
||||
const table = await createTestDB(opts, 2, 20)
|
||||
const con = await lancedb.connect(opts)
|
||||
assert.equal(con.uri, opts.uri)
|
||||
|
||||
const results = await table.search([0.1, 0.3]).limit(5).execute()
|
||||
assert.equal(results.length, 5)
|
||||
}).timeout(10_000)
|
||||
} else {
|
||||
describe.skip('Skip S3 test', function () {})
|
||||
}
|
||||
})
|
||||
|
||||
async function createTestDB (uri: string, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
|
||||
const con = await lancedb.connect(uri)
|
||||
async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
|
||||
const con = await lancedb.connect(opts)
|
||||
|
||||
const data = []
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -13,11 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
import { describe } from 'mocha'
|
||||
import { assert } from 'chai'
|
||||
import { track } from 'temp'
|
||||
import * as chai from 'chai'
|
||||
import * as chaiAsPromised from 'chai-as-promised'
|
||||
|
||||
import * as lancedb from '../index'
|
||||
import { type EmbeddingFunction, MetricType, Query } from '../index'
|
||||
import { type AwsCredentials, type EmbeddingFunction, MetricType, WriteMode } from '../index'
|
||||
import { Query } from '../query'
|
||||
|
||||
const expect = chai.expect
|
||||
const assert = chai.assert
|
||||
chai.use(chaiAsPromised)
|
||||
|
||||
describe('LanceDB client', function () {
|
||||
describe('when creating a connection to lancedb', function () {
|
||||
@@ -27,6 +33,22 @@ describe('LanceDB client', function () {
|
||||
assert.equal(con.uri, uri)
|
||||
})
|
||||
|
||||
it('should accept an options object', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect({ uri })
|
||||
assert.equal(con.uri, uri)
|
||||
})
|
||||
|
||||
it('should accept custom aws credentials', async function () {
|
||||
const uri = await createTestDB()
|
||||
const awsCredentials: AwsCredentials = {
|
||||
accessKeyId: '',
|
||||
secretKey: ''
|
||||
}
|
||||
const con = await lancedb.connect({ uri, awsCredentials })
|
||||
assert.equal(con.uri, uri)
|
||||
})
|
||||
|
||||
it('should return the existing table names', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
@@ -64,13 +86,20 @@ describe('LanceDB client', function () {
|
||||
assert.equal(results[0].id, 1)
|
||||
})
|
||||
|
||||
it('uses a filter', async function () {
|
||||
it('uses a filter / where clause', async function () {
|
||||
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
|
||||
const assertResults = (results: Array<Record<string, unknown>>) => {
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 2)
|
||||
}
|
||||
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 2)
|
||||
let results = await table.search([0.1, 0.1]).filter('id == 2').execute()
|
||||
assertResults(results)
|
||||
results = await table.search([0.1, 0.1]).where('id == 2').execute()
|
||||
assertResults(results)
|
||||
})
|
||||
|
||||
it('select only a subset of columns', async function () {
|
||||
@@ -103,9 +132,32 @@ describe('LanceDB client', function () {
|
||||
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
|
||||
const table = await con.createTable(tableName, data)
|
||||
assert.equal(table.name, tableName)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
})
|
||||
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
it('use overwrite flag to overwrite existing table', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
||||
]
|
||||
|
||||
const tableName = 'overwrite'
|
||||
await con.createTable(tableName, data, WriteMode.Create)
|
||||
|
||||
const newData = [
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50 },
|
||||
{ id: 3, vector: [1.1, 1.2], price: 50 }
|
||||
]
|
||||
|
||||
await expect(con.createTable(tableName, newData)).to.be.rejectedWith(Error, 'already exists')
|
||||
|
||||
const table = await con.createTable(tableName, newData, WriteMode.Overwrite)
|
||||
assert.equal(table.name, tableName)
|
||||
assert.equal(await table.countRows(), 3)
|
||||
})
|
||||
|
||||
it('appends records to an existing table ', async function () {
|
||||
@@ -118,16 +170,14 @@ describe('LanceDB client', function () {
|
||||
]
|
||||
|
||||
const table = await con.createTable('vectors', data)
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
const dataAdd = [
|
||||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
|
||||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
|
||||
]
|
||||
await table.add(dataAdd)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 4)
|
||||
assert.equal(await table.countRows(), 4)
|
||||
})
|
||||
|
||||
it('overwrite all records in a table', async function () {
|
||||
@@ -135,16 +185,25 @@ describe('LanceDB client', function () {
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
const dataOver = [
|
||||
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
|
||||
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
|
||||
]
|
||||
await table.overwrite(dataOver)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 2)
|
||||
assert.equal(await table.countRows(), 2)
|
||||
})
|
||||
|
||||
it('can delete records from a table', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const table = await con.openTable('vectors')
|
||||
assert.equal(await table.countRows(), 2)
|
||||
|
||||
await table.delete('price = 10')
|
||||
assert.equal(await table.countRows(), 1)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -153,8 +212,25 @@ describe('LanceDB client', function () {
|
||||
const uri = await createTestDB(32, 300)
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
|
||||
|
||||
it('replace an existing index', async function () {
|
||||
const uri = await createTestDB(16, 300)
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
|
||||
// Replace should fail if the index already exists
|
||||
await expect(table.createIndex({
|
||||
type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2, replace: false
|
||||
})
|
||||
).to.be.rejectedWith('LanceError(Index)')
|
||||
|
||||
// Default replace = true
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
}).timeout(50_000)
|
||||
})
|
||||
|
||||
describe('when using a custom embedding function', function () {
|
||||
@@ -184,7 +260,7 @@ describe('LanceDB client', function () {
|
||||
{ price: 10, name: 'foo' },
|
||||
{ price: 50, name: 'bar' }
|
||||
]
|
||||
const table = await con.createTable('vectors', data, embeddings)
|
||||
const table = await con.createTable('vectors', data, WriteMode.Create, embeddings)
|
||||
const results = await table.search('foo').execute()
|
||||
assert.equal(results.length, 2)
|
||||
})
|
||||
@@ -193,7 +269,7 @@ describe('LanceDB client', function () {
|
||||
|
||||
describe('Query object', function () {
|
||||
it('sets custom parameters', async function () {
|
||||
const query = new Query(undefined, [0.1, 0.3])
|
||||
const query = new Query([0.1, 0.3])
|
||||
.limit(1)
|
||||
.metricType(MetricType.Cosine)
|
||||
.refineFactor(100)
|
||||
@@ -223,3 +299,22 @@ async function createTestDB (numDimensions: number = 2, numRows: number = 2): Pr
|
||||
await con.createTable('vectors', data)
|
||||
return dir
|
||||
}
|
||||
|
||||
describe('Drop table', function () {
|
||||
it('drop a table', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ price: 10, name: 'foo', vector: [1, 2, 3] },
|
||||
{ price: 50, name: 'bar', vector: [4, 5, 6] }
|
||||
]
|
||||
await con.createTable('t1', data)
|
||||
await con.createTable('t2', data)
|
||||
|
||||
assert.deepEqual(await con.tableNames(), ['t1', 't2'])
|
||||
|
||||
await con.dropTable('t1')
|
||||
assert.deepEqual(await con.tableNames(), ['t2'])
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.1.8
|
||||
current_version = 0.1.14
|
||||
commit = True
|
||||
message = [python] Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
85
python/README.md
Normal file
85
python/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# LanceDB
|
||||
|
||||
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>')
|
||||
table = db.open_table('my_table')
|
||||
results = table.search([0.1, 0.3]).limit(20).to_df()
|
||||
print(results)
|
||||
```
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
Create a virtual environment and activate it:
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
. ./venv/bin/activate
|
||||
```
|
||||
|
||||
Install the necessary packages:
|
||||
|
||||
```bash
|
||||
python -m pip install .
|
||||
```
|
||||
|
||||
To run the unit tests:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
To run linter and automatically fix all errors:
|
||||
|
||||
```bash
|
||||
black .
|
||||
isort .
|
||||
```
|
||||
|
||||
If any packages are missing, install them with:
|
||||
|
||||
```bash
|
||||
pip install <PACKAGE_NAME>
|
||||
```
|
||||
|
||||
|
||||
___
|
||||
For **Windows** users, there may be errors when installing packages, so these commands may be helpful:
|
||||
|
||||
Activate the virtual environment:
|
||||
```bash
|
||||
. .\venv\Scripts\activate
|
||||
```
|
||||
|
||||
You may need to run the installs separately:
|
||||
```bash
|
||||
pip install -e .[tests]
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
|
||||
`tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation:
|
||||
```bash
|
||||
pip install wheel
|
||||
pip install cargo
|
||||
conda install rust
|
||||
pip install tantivy
|
||||
```
|
||||
|
||||
To run the unit tests:
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
@@ -11,16 +11,25 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from .db import URI, LanceDBConnection
|
||||
from typing import Optional
|
||||
|
||||
from .db import URI, DBConnection, LanceDBConnection
|
||||
from .remote.db import RemoteDBConnection
|
||||
from .schema import vector
|
||||
|
||||
|
||||
def connect(uri: URI) -> LanceDBConnection:
|
||||
"""Connect to a LanceDB instance at the given URI
|
||||
def connect(
|
||||
uri: URI, *, api_key: Optional[str] = None, region: str = "us-west-2"
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri: str or Path
|
||||
The uri of the database.
|
||||
api_token: str, optional
|
||||
If presented, connect to LanceDB cloud.
|
||||
Otherwise, connect to a database on file system or cloud storage.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -34,9 +43,17 @@ def connect(uri: URI) -> LanceDBConnection:
|
||||
|
||||
>>> db = lancedb.connect("s3://my-bucket/lancedb")
|
||||
|
||||
Connect to LancdDB cloud:
|
||||
|
||||
>>> db = lancedb.connect("db://my_database", api_key="ldb_...")
|
||||
|
||||
Returns
|
||||
-------
|
||||
conn : LanceDBConnection
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if isinstance(uri, str) and uri.startswith("db://"):
|
||||
if api_key is None:
|
||||
raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
|
||||
return RemoteDBConnection(uri, api_key, region)
|
||||
return LanceDBConnection(uri)
|
||||
|
||||
@@ -23,3 +23,13 @@ URI = Union[str, Path]
|
||||
# TODO support generator
|
||||
DATA = Union[List[dict], dict, pd.DataFrame]
|
||||
VECTOR_COLUMN_NAME = "vector"
|
||||
|
||||
|
||||
class Credential(str):
|
||||
"""Credential field"""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "********"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "********"
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import builtins
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
# import lancedb so we don't have to in every example
|
||||
import lancedb
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
from .exceptions import MissingValueError, MissingColumnError
|
||||
|
||||
from .exceptions import MissingColumnError, MissingValueError
|
||||
|
||||
|
||||
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
@@ -52,14 +53,16 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
6 the lazy dog 1
|
||||
7 lazy dog I 1
|
||||
8 dog I love 1
|
||||
>>> contextualize(data).window(7).stride(1).text_col('token').to_df()
|
||||
9 I love sandwiches 2
|
||||
10 love sandwiches 2
|
||||
>>> contextualize(data).window(7).stride(1).min_window_size(7).text_col('token').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over the 1
|
||||
1 quick brown fox jumped over the lazy 1
|
||||
2 brown fox jumped over the lazy dog 1
|
||||
3 fox jumped over the lazy dog I 1
|
||||
4 jumped over the lazy dog I love 1
|
||||
|
||||
5 over the lazy dog I love sandwiches 1
|
||||
|
||||
``stride`` determines how many rows to skip between each window start. This can
|
||||
be used to reduce the total number of windows generated.
|
||||
@@ -70,6 +73,8 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
2 brown fox jumped over 1
|
||||
4 jumped over the lazy 1
|
||||
6 the lazy dog I 1
|
||||
8 dog I love sandwiches 1
|
||||
10 love sandwiches 2
|
||||
|
||||
``groupby`` determines how to group the rows. For example, we would like to have
|
||||
context windows that don't cross document boundaries. In this case, we can
|
||||
@@ -80,6 +85,25 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
0 The quick brown fox 1
|
||||
2 brown fox jumped over 1
|
||||
4 jumped over the lazy 1
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
``min_window_size`` determines the minimum size of the context windows that are generated
|
||||
This can be used to trim the last few context windows which have size less than
|
||||
``min_window_size``. By default context windows of size 1 are skipped.
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_df()
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
|
||||
"""
|
||||
return Contextualizer(raw_df)
|
||||
|
||||
@@ -92,6 +116,7 @@ class Contextualizer:
|
||||
self._groupby = None
|
||||
self._stride = None
|
||||
self._window = None
|
||||
self._min_window_size = 2
|
||||
self._raw_df = raw_df
|
||||
|
||||
def window(self, window: int) -> Contextualizer:
|
||||
@@ -139,6 +164,17 @@ class Contextualizer:
|
||||
self._text_col = text_col
|
||||
return self
|
||||
|
||||
def min_window_size(self, min_window_size: int) -> Contextualizer:
|
||||
"""Set the (optional) min_window_size size for the context window.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_window_size: int
|
||||
The min_window_size.
|
||||
"""
|
||||
self._min_window_size = min_window_size
|
||||
return self
|
||||
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
"""Create the context windows and return a DataFrame."""
|
||||
|
||||
@@ -159,12 +195,19 @@ class Contextualizer:
|
||||
|
||||
def process_group(grp):
|
||||
# For each group, create the text rolling window
|
||||
# with values of size >= min_window_size
|
||||
text = grp[self._text_col].values
|
||||
contexts = grp.iloc[: -self._window : self._stride, :].copy()
|
||||
contexts[self._text_col] = [
|
||||
" ".join(text[start_i : start_i + self._window])
|
||||
for start_i in range(0, len(grp) - self._window, self._stride)
|
||||
contexts = grp.iloc[:: self._stride, :].copy()
|
||||
windows = [
|
||||
" ".join(text[start_i : min(start_i + self._window, len(grp))])
|
||||
for start_i in range(0, len(grp), self._stride)
|
||||
if start_i + self._window <= len(grp)
|
||||
or len(grp) - start_i >= self._min_window_size
|
||||
]
|
||||
# if last few rows dropped
|
||||
if len(windows) < len(contexts):
|
||||
contexts = contexts.iloc[: len(windows)]
|
||||
contexts[self._text_col] = windows
|
||||
return contexts
|
||||
|
||||
if self._groupby is None:
|
||||
|
||||
@@ -14,128 +14,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
import os
|
||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from pyarrow import fs
|
||||
|
||||
from .common import DATA, URI
|
||||
from .table import LanceTable
|
||||
from .util import get_uri_scheme, get_uri_location
|
||||
from .table import LanceTable, Table
|
||||
from .util import fs_from_uri, get_uri_location, get_uri_scheme
|
||||
|
||||
|
||||
class LanceDBConnection:
|
||||
"""
|
||||
A connection to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri: str or Path
|
||||
The root uri of the database.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
|
||||
... {"vector": [0.5, 1.3], "b": 4}])
|
||||
LanceTable(my_table)
|
||||
>>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
|
||||
LanceTable(another_table)
|
||||
>>> db.table_names()
|
||||
['another_table', 'my_table']
|
||||
>>> len(db)
|
||||
2
|
||||
>>> db["my_table"]
|
||||
LanceTable(my_table)
|
||||
>>> "my_table" in db
|
||||
True
|
||||
>>> db.drop_table("my_table")
|
||||
>>> db.drop_table("another_table")
|
||||
"""
|
||||
|
||||
def __init__(self, uri: URI):
|
||||
is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file"
|
||||
if is_local:
|
||||
if isinstance(uri, str):
|
||||
uri = Path(uri)
|
||||
uri = uri.expanduser().absolute()
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
self._uri = str(uri)
|
||||
|
||||
@property
|
||||
def uri(self) -> str:
|
||||
return self._uri
|
||||
class DBConnection(ABC):
|
||||
"""An active LanceDB connection interface."""
|
||||
|
||||
@abstractmethod
|
||||
def table_names(self) -> list[str]:
|
||||
"""Get the names of all tables in the database.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of str
|
||||
A list of table names.
|
||||
"""
|
||||
try:
|
||||
filesystem, path = fs.FileSystem.from_uri(self.uri)
|
||||
except pa.ArrowInvalid:
|
||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
||||
|
||||
try:
|
||||
paths = filesystem.get_file_info(
|
||||
fs.FileSelector(get_uri_location(self.uri))
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# It is ok if the file does not exist since it will be created
|
||||
paths = []
|
||||
tables = [
|
||||
os.path.splitext(file_info.base_name)[0]
|
||||
for file_info in paths
|
||||
if file_info.extension == "lance"
|
||||
]
|
||||
return tables
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.table_names())
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in self.table_names()
|
||||
|
||||
def __getitem__(self, name: str) -> LanceTable:
|
||||
return self.open_table(name)
|
||||
"""List all table names in the database."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def create_table(
|
||||
self,
|
||||
name: str,
|
||||
data: DATA = None,
|
||||
schema: pa.Schema = None,
|
||||
data: Optional[
|
||||
Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
|
||||
] = None,
|
||||
schema: Optional[pa.Schema] = None,
|
||||
mode: str = "create",
|
||||
) -> LanceTable:
|
||||
"""Create a table in the database.
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> Table:
|
||||
"""Create a [Table][lancedb.table.Table] in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
data: list, tuple, dict, pd.DataFrame; optional
|
||||
The data to insert into the table.
|
||||
The data to initialize the table. User must provide at least one of `data` or `schema`.
|
||||
schema: pyarrow.Schema; optional
|
||||
The schema of the table.
|
||||
mode: str; default "create"
|
||||
The mode to use when creating the table.
|
||||
The mode to use when creating the table. Can be either "create" or "overwrite".
|
||||
By default, if the table already exists, an exception is raised.
|
||||
If you want to overwrite the table, use mode="overwrite".
|
||||
|
||||
Note
|
||||
----
|
||||
The vector index won't be created by default.
|
||||
To create the index, call the `create_index` method on the table.
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceTable
|
||||
A reference to the newly created table.
|
||||
|
||||
!!! note
|
||||
|
||||
The vector index won't be created by default.
|
||||
To create the index, call the `create_index` method on the table.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
@@ -181,7 +122,7 @@ class LanceDBConnection:
|
||||
|
||||
Data is converted to Arrow before being written to disk. For maximum
|
||||
control over how data is saved, either provide the PyArrow schema to
|
||||
convert to or else provide a PyArrow table directly.
|
||||
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||
|
||||
>>> custom_schema = pa.schema([
|
||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
@@ -200,11 +141,168 @@ class LanceDBConnection:
|
||||
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||
lat: [[45.5,40.1]]
|
||||
long: [[-122.7,-74.1]]
|
||||
|
||||
|
||||
It is also possible to create an table from `[Iterable[pa.RecordBatch]]`:
|
||||
|
||||
|
||||
>>> import pyarrow as pa
|
||||
>>> def make_batches():
|
||||
... for i in range(5):
|
||||
... yield pa.RecordBatch.from_arrays(
|
||||
... [
|
||||
... pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
... pa.array(["foo", "bar"]),
|
||||
... pa.array([10.0, 20.0]),
|
||||
... ],
|
||||
... ["vector", "item", "price"],
|
||||
... )
|
||||
>>> schema=pa.schema([
|
||||
... pa.field("vector", pa.list_(pa.float32())),
|
||||
... pa.field("item", pa.utf8()),
|
||||
... pa.field("price", pa.float32()),
|
||||
... ])
|
||||
>>> db.create_table("table4", make_batches(), schema=schema)
|
||||
LanceTable(table4)
|
||||
|
||||
"""
|
||||
if data is not None:
|
||||
tbl = LanceTable.create(self, name, data, schema, mode=mode)
|
||||
else:
|
||||
tbl = LanceTable(self, name)
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, name: str) -> LanceTable:
|
||||
return self.open_table(name)
|
||||
|
||||
def open_table(self, name: str) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_table(self, name: str):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
A connection to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri: str or Path
|
||||
The root uri of the database.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
|
||||
... {"vector": [0.5, 1.3], "b": 4}])
|
||||
LanceTable(my_table)
|
||||
>>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
|
||||
LanceTable(another_table)
|
||||
>>> sorted(db.table_names())
|
||||
['another_table', 'my_table']
|
||||
>>> len(db)
|
||||
2
|
||||
>>> db["my_table"]
|
||||
LanceTable(my_table)
|
||||
>>> "my_table" in db
|
||||
True
|
||||
>>> db.drop_table("my_table")
|
||||
>>> db.drop_table("another_table")
|
||||
"""
|
||||
|
||||
def __init__(self, uri: URI):
|
||||
if not isinstance(uri, Path):
|
||||
scheme = get_uri_scheme(uri)
|
||||
is_local = isinstance(uri, Path) or scheme == "file"
|
||||
if is_local:
|
||||
if isinstance(uri, str):
|
||||
uri = Path(uri)
|
||||
uri = uri.expanduser().absolute()
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
self._uri = str(uri)
|
||||
|
||||
self._entered = False
|
||||
|
||||
@property
|
||||
def uri(self) -> str:
|
||||
return self._uri
|
||||
|
||||
def table_names(self) -> list[str]:
|
||||
"""Get the names of all tables in the database.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of str
|
||||
A list of table names.
|
||||
"""
|
||||
try:
|
||||
filesystem, path = fs_from_uri(self.uri)
|
||||
except pa.ArrowInvalid:
|
||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
||||
|
||||
try:
|
||||
paths = filesystem.get_file_info(
|
||||
fs.FileSelector(get_uri_location(self.uri))
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# It is ok if the file does not exist since it will be created
|
||||
paths = []
|
||||
tables = [
|
||||
os.path.splitext(file_info.base_name)[0]
|
||||
for file_info in paths
|
||||
if file_info.extension == "lance"
|
||||
]
|
||||
return tables
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.table_names())
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in self.table_names()
|
||||
|
||||
def create_table(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
|
||||
schema: pa.Schema = None,
|
||||
mode: str = "create",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> LanceTable:
|
||||
"""Create a table in the database.
|
||||
|
||||
See
|
||||
---
|
||||
DBConnection.create_table
|
||||
"""
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
|
||||
tbl = LanceTable.create(
|
||||
self,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
return tbl
|
||||
|
||||
def open_table(self, name: str) -> LanceTable:
|
||||
@@ -219,7 +317,7 @@ class LanceDBConnection:
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
return LanceTable(self, name)
|
||||
return LanceTable.open(self, name)
|
||||
|
||||
def drop_table(self, name: str):
|
||||
"""Drop a table from the database.
|
||||
@@ -229,6 +327,6 @@ class LanceDBConnection:
|
||||
name: str
|
||||
The name of the table.
|
||||
"""
|
||||
filesystem, path = pa.fs.FileSystem.from_uri(self.uri)
|
||||
filesystem, path = fs_from_uri(self.uri)
|
||||
table_path = os.path.join(path, name + ".lance")
|
||||
filesystem.delete_dir(table_path)
|
||||
|
||||
251
python/lancedb/pydantic.py
Normal file
251
python/lancedb/pydantic.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Pydantic (v1 / v2) adapter for LanceDB"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
import types
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Callable, Dict, Generator, List, Type, Union, _GenericAlias
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pydantic
|
||||
import semver
|
||||
|
||||
PYDANTIC_VERSION = semver.Version.parse(pydantic.__version__)
|
||||
try:
|
||||
from pydantic_core import CoreSchema, core_schema
|
||||
except ImportError:
|
||||
if PYDANTIC_VERSION >= (2,):
|
||||
raise
|
||||
|
||||
|
||||
class FixedSizeListMixin(ABC):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def dim() -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def value_arrow_type() -> pa.DataType:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def vector(
|
||||
dim: int, value_type: pa.DataType = pa.float32()
|
||||
) -> Type[FixedSizeListMixin]:
|
||||
"""Pydantic Vector Type.
|
||||
|
||||
!!! warning
|
||||
Experimental feature.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dim : int
|
||||
The dimension of the vector.
|
||||
value_type : pyarrow.DataType, optional
|
||||
The value type of the vector, by default pa.float32()
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import pydantic
|
||||
>>> from lancedb.pydantic import vector
|
||||
...
|
||||
>>> class MyModel(pydantic.BaseModel):
|
||||
... id: int
|
||||
... url: str
|
||||
... embeddings: vector(768)
|
||||
>>> schema = pydantic_to_schema(MyModel)
|
||||
>>> assert schema == pa.schema([
|
||||
... pa.field("id", pa.int64(), False),
|
||||
... pa.field("url", pa.utf8(), False),
|
||||
... pa.field("embeddings", pa.list_(pa.float32(), 768), False)
|
||||
... ])
|
||||
"""
|
||||
|
||||
# TODO: make a public parameterized type.
|
||||
class FixedSizeList(list, FixedSizeListMixin):
|
||||
def __repr__(self):
|
||||
return f"FixedSizeList(dim={dim})"
|
||||
|
||||
@staticmethod
|
||||
def dim() -> int:
|
||||
return dim
|
||||
|
||||
@staticmethod
|
||||
def value_arrow_type() -> pa.DataType:
|
||||
return value_type
|
||||
|
||||
@classmethod
|
||||
def __get_pydantic_core_schema__(
|
||||
cls, _source_type: Any, _handler: pydantic.GetCoreSchemaHandler
|
||||
) -> CoreSchema:
|
||||
return core_schema.no_info_after_validator_function(
|
||||
cls,
|
||||
core_schema.list_schema(
|
||||
min_length=dim,
|
||||
max_length=dim,
|
||||
items_schema=core_schema.float_schema(),
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def __get_validators__(cls) -> Generator[Callable, None, None]:
|
||||
yield cls.validate
|
||||
|
||||
# For pydantic v1
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim:
|
||||
raise TypeError("A list of numbers or numpy.ndarray is needed")
|
||||
return v
|
||||
|
||||
if PYDANTIC_VERSION < (2, 0):
|
||||
|
||||
@classmethod
|
||||
def __modify_schema__(cls, field_schema: Dict[str, Any]):
|
||||
field_schema["items"] = {"type": "number"}
|
||||
field_schema["maxItems"] = dim
|
||||
field_schema["minItems"] = dim
|
||||
|
||||
return FixedSizeList
|
||||
|
||||
|
||||
def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType:
|
||||
"""Convert Python Type to Arrow DataType.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
If the type is not supported.
|
||||
"""
|
||||
if py_type == int:
|
||||
return pa.int64()
|
||||
elif py_type == float:
|
||||
return pa.float64()
|
||||
elif py_type == str:
|
||||
return pa.utf8()
|
||||
elif py_type == bool:
|
||||
return pa.bool_()
|
||||
elif py_type == bytes:
|
||||
return pa.binary()
|
||||
raise TypeError(
|
||||
f"Converting Pydantic type to Arrow Type: unsupported type {py_type}"
|
||||
)
|
||||
|
||||
|
||||
if PYDANTIC_VERSION.major < 2:
|
||||
|
||||
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
|
||||
return [
|
||||
_pydantic_to_field(name, field) for name, field in model.__fields__.items()
|
||||
]
|
||||
|
||||
else:
|
||||
|
||||
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
|
||||
return [
|
||||
_pydantic_to_field(name, field)
|
||||
for name, field in model.model_fields.items()
|
||||
]
|
||||
|
||||
|
||||
def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType:
|
||||
"""Convert a Pydantic FieldInfo to Arrow DataType"""
|
||||
if isinstance(field.annotation, _GenericAlias) or (
|
||||
sys.version_info > (3, 9) and isinstance(field.annotation, types.GenericAlias)
|
||||
):
|
||||
origin = field.annotation.__origin__
|
||||
args = field.annotation.__args__
|
||||
if origin == list:
|
||||
child = args[0]
|
||||
return pa.list_(_py_type_to_arrow_type(child))
|
||||
elif origin == Union:
|
||||
if len(args) == 2 and args[1] == type(None):
|
||||
return _py_type_to_arrow_type(args[0])
|
||||
elif inspect.isclass(field.annotation):
|
||||
if issubclass(field.annotation, pydantic.BaseModel):
|
||||
# Struct
|
||||
fields = _pydantic_model_to_fields(field.annotation)
|
||||
return pa.struct(fields)
|
||||
elif issubclass(field.annotation, FixedSizeListMixin):
|
||||
return pa.list_(field.annotation.value_arrow_type(), field.annotation.dim())
|
||||
return _py_type_to_arrow_type(field.annotation)
|
||||
|
||||
|
||||
def is_nullable(field: pydantic.fields.FieldInfo) -> bool:
|
||||
"""Check if a Pydantic FieldInfo is nullable."""
|
||||
if isinstance(field.annotation, _GenericAlias):
|
||||
origin = field.annotation.__origin__
|
||||
args = field.annotation.__args__
|
||||
if origin == Union:
|
||||
if len(args) == 2 and args[1] == type(None):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _pydantic_to_field(name: str, field: pydantic.fields.FieldInfo) -> pa.Field:
|
||||
"""Convert a Pydantic field to a PyArrow Field."""
|
||||
dt = _pydantic_to_arrow_type(field)
|
||||
return pa.field(name, dt, is_nullable(field))
|
||||
|
||||
|
||||
def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
|
||||
"""Convert a Pydantic model to a PyArrow Schema.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : Type[pydantic.BaseModel]
|
||||
The Pydantic BaseModel to convert to Arrow Schema.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pyarrow.Schema
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from typing import List, Optional
|
||||
>>> import pydantic
|
||||
>>> from lancedb.pydantic import pydantic_to_schema
|
||||
...
|
||||
>>> class InnerModel(pydantic.BaseModel):
|
||||
... a: str
|
||||
... b: Optional[float]
|
||||
>>>
|
||||
>>> class FooModel(pydantic.BaseModel):
|
||||
... id: int
|
||||
... s: Optional[str] = None
|
||||
... vec: List[float]
|
||||
... li: List[int]
|
||||
... inner: InnerModel
|
||||
>>> schema = pydantic_to_schema(FooModel)
|
||||
>>> assert schema == pa.schema([
|
||||
... pa.field("id", pa.int64(), False),
|
||||
... pa.field("s", pa.utf8(), True),
|
||||
... pa.field("vec", pa.list_(pa.float64()), False),
|
||||
... pa.field("li", pa.list_(pa.int64()), False),
|
||||
... pa.field("inner", pa.struct([
|
||||
... pa.field("a", pa.utf8(), False),
|
||||
... pa.field("b", pa.float64(), True),
|
||||
... ]), False),
|
||||
... ])
|
||||
"""
|
||||
fields = _pydantic_model_to_fields(model)
|
||||
return pa.schema(fields)
|
||||
@@ -10,16 +10,47 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .common import VECTOR_COLUMN_NAME
|
||||
|
||||
|
||||
class Query(BaseModel):
|
||||
"""A Query"""
|
||||
|
||||
vector_column: str = VECTOR_COLUMN_NAME
|
||||
|
||||
# vector to search for
|
||||
vector: List[float]
|
||||
|
||||
# sql filter to refine the query with
|
||||
filter: Optional[str] = None
|
||||
|
||||
# top k results to return
|
||||
k: int
|
||||
|
||||
# # metrics
|
||||
metric: str = "L2"
|
||||
|
||||
# which columns to return in the results
|
||||
columns: Optional[List[str]] = None
|
||||
|
||||
# optional query parameters for tuning the results,
|
||||
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||
nprobes: int = 10
|
||||
|
||||
# Refine factor.
|
||||
refine_factor: Optional[int] = None
|
||||
|
||||
|
||||
class LanceQueryBuilder:
|
||||
"""
|
||||
A builder for nearest neighbor queries for LanceDB.
|
||||
@@ -43,7 +74,12 @@ class LanceQueryBuilder:
|
||||
0 6 [0.4, 0.4] 0.0
|
||||
"""
|
||||
|
||||
def __init__(self, table: "lancedb.table.LanceTable", query: np.ndarray):
|
||||
def __init__(
|
||||
self,
|
||||
table: "lancedb.table.Table",
|
||||
query: Union[np.ndarray, str],
|
||||
vector_column: str = VECTOR_COLUMN_NAME,
|
||||
):
|
||||
self._metric = "L2"
|
||||
self._nprobes = 20
|
||||
self._refine_factor = None
|
||||
@@ -52,6 +88,7 @@ class LanceQueryBuilder:
|
||||
self._limit = 10
|
||||
self._columns = None
|
||||
self._where = None
|
||||
self._vector_column = vector_column
|
||||
|
||||
def limit(self, limit: int) -> LanceQueryBuilder:
|
||||
"""Set the maximum number of results to return.
|
||||
@@ -168,24 +205,34 @@ class LanceQueryBuilder:
|
||||
and also the "score" column which is the distance between the query
|
||||
vector and the returned vector.
|
||||
"""
|
||||
ds = self._table.to_lance()
|
||||
tbl = ds.to_table(
|
||||
columns=self._columns,
|
||||
|
||||
return self.to_arrow().to_pandas()
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""
|
||||
Execute the query and return the results as an
|
||||
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
||||
|
||||
In addition to the selected columns, LanceDB also returns a vector
|
||||
and also the "score" column which is the distance between the query
|
||||
vector and the returned vectors.
|
||||
"""
|
||||
vector = self._query if isinstance(self._query, list) else self._query.tolist()
|
||||
query = Query(
|
||||
vector=vector,
|
||||
filter=self._where,
|
||||
nearest={
|
||||
"column": VECTOR_COLUMN_NAME,
|
||||
"q": self._query,
|
||||
"k": self._limit,
|
||||
"metric": self._metric,
|
||||
"nprobes": self._nprobes,
|
||||
"refine_factor": self._refine_factor,
|
||||
},
|
||||
k=self._limit,
|
||||
metric=self._metric,
|
||||
columns=self._columns,
|
||||
nprobes=self._nprobes,
|
||||
refine_factor=self._refine_factor,
|
||||
vector_column=self._vector_column,
|
||||
)
|
||||
return tbl.to_pandas()
|
||||
return self._table._execute_query(query)
|
||||
|
||||
|
||||
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
def to_arrow(self) -> pd.Table:
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
@@ -202,8 +249,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
# get the scores and doc ids
|
||||
row_ids, scores = search_index(index, self._query, self._limit)
|
||||
if len(row_ids) == 0:
|
||||
return pd.DataFrame()
|
||||
empty_schema = pa.schema([pa.field("score", pa.float32())])
|
||||
return pa.Table.from_pylist([], schema=empty_schema)
|
||||
scores = pa.array(scores)
|
||||
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
||||
output_tbl = output_tbl.append_column("score", scores)
|
||||
return output_tbl.to_pandas()
|
||||
return output_tbl
|
||||
|
||||
60
python/lancedb/remote/__init__.py
Normal file
60
python/lancedb/remote/__init__.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
from typing import List, Optional
|
||||
|
||||
import attr
|
||||
import pyarrow as pa
|
||||
from pydantic import BaseModel
|
||||
|
||||
__all__ = ["LanceDBClient", "VectorQuery", "VectorQueryResult"]
|
||||
|
||||
|
||||
class VectorQuery(BaseModel):
|
||||
# vector to search for
|
||||
vector: List[float]
|
||||
|
||||
# sql filter to refine the query with
|
||||
filter: Optional[str] = None
|
||||
|
||||
# top k results to return
|
||||
k: int
|
||||
|
||||
# # metrics
|
||||
_metric: str = "L2"
|
||||
|
||||
# which columns to return in the results
|
||||
columns: Optional[List[str]] = None
|
||||
|
||||
# optional query parameters for tuning the results,
|
||||
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||
nprobes: int = 10
|
||||
|
||||
refine_factor: Optional[int] = None
|
||||
|
||||
|
||||
@attr.define
|
||||
class VectorQueryResult:
|
||||
# for now the response is directly seralized into a pandas dataframe
|
||||
tbl: pa.Table
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
return self.tbl
|
||||
|
||||
|
||||
class LanceDBClient(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||
"""Query the LanceDB server for the given table and query."""
|
||||
pass
|
||||
22
python/lancedb/remote/arrow.py
Normal file
22
python/lancedb/remote/arrow.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
|
||||
def to_ipc_binary(table: pa.Table) -> bytes:
|
||||
"""Serialize a PyArrow Table to IPC binary."""
|
||||
sink = pa.BufferOutputStream()
|
||||
with pa.ipc.new_stream(sink, table.schema) as writer:
|
||||
writer.write_table(table)
|
||||
return sink.getvalue().to_pybytes()
|
||||
147
python/lancedb/remote/client.py
Normal file
147
python/lancedb/remote/client.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import functools
|
||||
from typing import Any, Callable, Dict, Optional, Union
|
||||
|
||||
import aiohttp
|
||||
import attr
|
||||
import pyarrow as pa
|
||||
from pydantic import BaseModel
|
||||
|
||||
from lancedb.common import Credential
|
||||
from lancedb.remote import VectorQuery, VectorQueryResult
|
||||
from lancedb.remote.errors import LanceDBClientError
|
||||
|
||||
ARROW_STREAM_CONTENT_TYPE = "application/vnd.apache.arrow.stream"
|
||||
|
||||
|
||||
def _check_not_closed(f):
|
||||
@functools.wraps(f)
|
||||
def wrapped(self, *args, **kwargs):
|
||||
if self.closed:
|
||||
raise ValueError("Connection is closed")
|
||||
return f(self, *args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
async def _read_ipc(resp: aiohttp.ClientResponse) -> pa.Table:
|
||||
resp_body = await resp.read()
|
||||
with pa.ipc.open_file(pa.BufferReader(resp_body)) as reader:
|
||||
return reader.read_all()
|
||||
|
||||
|
||||
@attr.define(slots=False)
|
||||
class RestfulLanceDBClient:
|
||||
db_name: str
|
||||
region: str
|
||||
api_key: Credential
|
||||
closed: bool = attr.field(default=False, init=False)
|
||||
|
||||
@functools.cached_property
|
||||
def session(self) -> aiohttp.ClientSession:
|
||||
url = f"https://{self.db_name}.{self.region}.api.lancedb.com"
|
||||
return aiohttp.ClientSession(url)
|
||||
|
||||
async def close(self):
|
||||
await self.session.close()
|
||||
self.closed = True
|
||||
|
||||
@functools.cached_property
|
||||
def headers(self) -> Dict[str, str]:
|
||||
headers = {
|
||||
"x-api-key": self.api_key,
|
||||
}
|
||||
if self.region == "local": # Local test mode
|
||||
headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
async def _check_status(resp: aiohttp.ClientResponse):
|
||||
if resp.status == 404:
|
||||
raise LanceDBClientError(f"Not found: {await resp.text()}")
|
||||
elif 400 <= resp.status < 500:
|
||||
raise LanceDBClientError(
|
||||
f"Bad Request: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
elif 500 <= resp.status < 600:
|
||||
raise LanceDBClientError(
|
||||
f"Internal Server Error: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
elif resp.status != 200:
|
||||
raise LanceDBClientError(
|
||||
f"Unknown Error: {resp.status}, error: {await resp.text()}"
|
||||
)
|
||||
|
||||
@_check_not_closed
|
||||
async def get(self, uri: str, params: Union[Dict[str, Any], BaseModel] = None):
|
||||
"""Send a GET request and returns the deserialized response payload."""
|
||||
if isinstance(params, BaseModel):
|
||||
params: Dict[str, Any] = params.dict(exclude_none=True)
|
||||
async with self.session.get(uri, params=params, headers=self.headers) as resp:
|
||||
await self._check_status(resp)
|
||||
return await resp.json()
|
||||
|
||||
@_check_not_closed
|
||||
async def post(
|
||||
self,
|
||||
uri: str,
|
||||
data: Union[Dict[str, Any], BaseModel, bytes],
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
content_type: Optional[str] = None,
|
||||
deserialize: Callable = lambda resp: resp.json(),
|
||||
) -> Dict[str, Any]:
|
||||
"""Send a POST request and returns the deserialized response payload.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
The uri to send the POST request to.
|
||||
data: Union[Dict[str, Any], BaseModel]
|
||||
|
||||
"""
|
||||
if isinstance(data, BaseModel):
|
||||
data: Dict[str, Any] = data.dict(exclude_none=True)
|
||||
if isinstance(data, bytes):
|
||||
req_kwargs = {"data": data}
|
||||
else:
|
||||
req_kwargs = {"json": data}
|
||||
|
||||
headers = self.headers.copy()
|
||||
if content_type is not None:
|
||||
headers["content-type"] = content_type
|
||||
async with self.session.post(
|
||||
uri,
|
||||
headers=headers,
|
||||
params=params,
|
||||
**req_kwargs,
|
||||
) as resp:
|
||||
resp: aiohttp.ClientResponse = resp
|
||||
await self._check_status(resp)
|
||||
return await deserialize(resp)
|
||||
|
||||
@_check_not_closed
|
||||
async def list_tables(self):
|
||||
"""List all tables in the database."""
|
||||
json = await self.get("/v1/table/", {})
|
||||
return json["tables"]
|
||||
|
||||
@_check_not_closed
|
||||
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||
"""Query a table."""
|
||||
tbl = await self.post(
|
||||
f"/v1/table/{table_name}/query/", query, deserialize=_read_ipc
|
||||
)
|
||||
return VectorQueryResult(tbl)
|
||||
104
python/lancedb/remote/db.py
Normal file
104
python/lancedb/remote/db.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import List
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.common import DATA
|
||||
from lancedb.db import DBConnection
|
||||
from lancedb.schema import schema_to_json
|
||||
from lancedb.table import Table, _sanitize_data
|
||||
|
||||
from .arrow import to_ipc_binary
|
||||
from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
|
||||
|
||||
|
||||
class RemoteDBConnection(DBConnection):
|
||||
"""A connection to a remote LanceDB database."""
|
||||
|
||||
def __init__(self, db_url: str, api_key: str, region: str):
|
||||
"""Connect to a remote LanceDB database."""
|
||||
parsed = urlparse(db_url)
|
||||
if parsed.scheme != "db":
|
||||
raise ValueError(f"Invalid scheme: {parsed.scheme}, only accepts db://")
|
||||
self.db_name = parsed.netloc
|
||||
self.api_key = api_key
|
||||
self._client = RestfulLanceDBClient(self.db_name, region, api_key)
|
||||
try:
|
||||
self._loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
self._loop = asyncio.get_event_loop()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"RemoveConnect(name={self.db_name})"
|
||||
|
||||
def table_names(self) -> List[str]:
|
||||
"""List the names of all tables in the database."""
|
||||
result = self._loop.run_until_complete(self._client.list_tables())
|
||||
return result
|
||||
|
||||
def open_table(self, name: str) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
# TODO: check if table exists
|
||||
|
||||
return RemoteTable(self, name)
|
||||
|
||||
def create_table(
|
||||
self,
|
||||
name: str,
|
||||
data: DATA = None,
|
||||
schema: pa.Schema = None,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> Table:
|
||||
if data is None and schema is None:
|
||||
raise ValueError("Either data or schema must be provided.")
|
||||
if data is not None:
|
||||
data = _sanitize_data(
|
||||
data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
else:
|
||||
if schema is None:
|
||||
raise ValueError("Either data or schema must be provided")
|
||||
data = pa.Table.from_pylist([], schema=schema)
|
||||
|
||||
from .table import RemoteTable
|
||||
|
||||
data = to_ipc_binary(data)
|
||||
request_id = uuid.uuid4().hex
|
||||
|
||||
self._loop.run_until_complete(
|
||||
self._client.post(
|
||||
f"/v1/table/{name}/create",
|
||||
data=data,
|
||||
params={"request_id": request_id},
|
||||
content_type=ARROW_STREAM_CONTENT_TYPE,
|
||||
)
|
||||
)
|
||||
return RemoteTable(self, name)
|
||||
16
python/lancedb/remote/errors.py
Normal file
16
python/lancedb/remote/errors.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class LanceDBClientError(RuntimeError):
|
||||
pass
|
||||
93
python/lancedb/remote/table.py
Normal file
93
python/lancedb/remote/table.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import uuid
|
||||
from functools import cached_property
|
||||
from typing import Union
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
|
||||
from ..query import LanceQueryBuilder, Query
|
||||
from ..schema import json_to_schema
|
||||
from ..table import Query, Table, _sanitize_data
|
||||
from .arrow import to_ipc_binary
|
||||
from .client import ARROW_STREAM_CONTENT_TYPE
|
||||
from .db import RemoteDBConnection
|
||||
|
||||
|
||||
class RemoteTable(Table):
|
||||
def __init__(self, conn: RemoteDBConnection, name: str):
|
||||
self._conn = conn
|
||||
self._name = name
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"RemoteTable({self._conn.db_name}.{self._name})"
|
||||
|
||||
@cached_property
|
||||
def schema(self) -> pa.Schema:
|
||||
"""Return the schema of the table."""
|
||||
resp = self._conn._loop.run_until_complete(
|
||||
self._conn._client.post(f"/v1/table/{self._name}/describe/")
|
||||
)
|
||||
schema = json_to_schema(resp["schema"])
|
||||
return schema
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
raise NotImplementedError
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
def add(
|
||||
self,
|
||||
data: DATA,
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> int:
|
||||
data = _sanitize_data(
|
||||
data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
payload = to_ipc_binary(data)
|
||||
|
||||
request_id = uuid.uuid4().hex
|
||||
|
||||
self._conn._loop.run_until_complete(
|
||||
self._conn._client.post(
|
||||
f"/v1/table/{self._name}/insert/",
|
||||
data=payload,
|
||||
params={"request_id": request_id, "mode": mode},
|
||||
content_type=ARROW_STREAM_CONTENT_TYPE,
|
||||
)
|
||||
)
|
||||
|
||||
def search(
|
||||
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
||||
) -> LanceQueryBuilder:
|
||||
return LanceQueryBuilder(self, query, vector_column)
|
||||
|
||||
def _execute_query(self, query: Query) -> pa.Table:
|
||||
result = self._conn._client.query(self._name, query)
|
||||
return self._conn._loop.run_until_complete(result).to_arrow()
|
||||
|
||||
def delete(self, predicate: str):
|
||||
raise NotImplementedError
|
||||
45
python/lancedb/schema.py
Normal file
45
python/lancedb/schema.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Schema related utilities."""
|
||||
|
||||
from typing import Any, Dict, Type
|
||||
|
||||
import pyarrow as pa
|
||||
from lance import json_to_schema, schema_to_json
|
||||
|
||||
|
||||
def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
|
||||
"""A help function to create a vector type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dimension: The dimension of the vector.
|
||||
value_type: pa.DataType, optional
|
||||
The type of the value in the vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A PyArrow DataType for vectors.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import pyarrow as pa
|
||||
>>> import lancedb
|
||||
>>> schema = pa.schema([
|
||||
... pa.field("id", pa.int64()),
|
||||
... pa.field("vector", lancedb.vector(756)),
|
||||
... ])
|
||||
"""
|
||||
return pa.list_(value_type, dimension)
|
||||
@@ -14,44 +14,49 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import cached_property
|
||||
from typing import List, Union
|
||||
from typing import Iterable, List, Union
|
||||
|
||||
import lance
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
from lance import LanceDataset
|
||||
from lance.vector import vec_to_table
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .query import LanceFtsQueryBuilder, LanceQueryBuilder
|
||||
from .util import get_uri_scheme
|
||||
from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
|
||||
from .util import fs_from_uri
|
||||
|
||||
|
||||
def _sanitize_data(data, schema):
|
||||
def _sanitize_data(data, schema, on_bad_vectors, fill_value):
|
||||
if isinstance(data, list):
|
||||
data = pa.Table.from_pylist(data)
|
||||
data = _sanitize_schema(data, schema=schema)
|
||||
data = _sanitize_schema(
|
||||
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
if isinstance(data, dict):
|
||||
data = vec_to_table(data)
|
||||
if isinstance(data, pd.DataFrame):
|
||||
data = pa.Table.from_pandas(data)
|
||||
data = _sanitize_schema(data, schema=schema)
|
||||
if not isinstance(data, pa.Table):
|
||||
data = _sanitize_schema(
|
||||
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
if not isinstance(data, (pa.Table, Iterable)):
|
||||
raise TypeError(f"Unsupported data type: {type(data)}")
|
||||
return data
|
||||
|
||||
|
||||
class LanceTable:
|
||||
class Table(ABC):
|
||||
"""
|
||||
A table in a LanceDB database.
|
||||
A [Table](Table) is a collection of Records in a LanceDB [Database](Database).
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Create using [LanceDBConnection.create_table][lancedb.LanceDBConnection.create_table]
|
||||
Create using [DBConnection.create_table][lancedb.DBConnection.create_table]
|
||||
(more examples in that method's documentation).
|
||||
|
||||
>>> import lancedb
|
||||
@@ -66,12 +71,11 @@ class LanceTable:
|
||||
vector: [[[1.1,1.2]]]
|
||||
b: [[2]]
|
||||
|
||||
Can append new data with [LanceTable.add][lancedb.table.LanceTable.add].
|
||||
Can append new data with [Table.add()][lancedb.table.Table.add].
|
||||
|
||||
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
||||
2
|
||||
|
||||
Can query the table with [LanceTable.search][lancedb.table.LanceTable.search].
|
||||
Can query the table with [Table.search][lancedb.table.Table.search].
|
||||
|
||||
>>> table.search([0.4, 0.4]).select(["b"]).to_df()
|
||||
b vector score
|
||||
@@ -79,8 +83,169 @@ class LanceTable:
|
||||
1 2 [1.1, 1.2] 1.13
|
||||
|
||||
Search queries are much faster when an index is created. See
|
||||
[LanceTable.create_index][lancedb.table.LanceTable.create_index].
|
||||
[Table.create_index][lancedb.table.Table.create_index].
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def schema(self) -> pa.Schema:
|
||||
"""Return the [Arrow Schema](https://arrow.apache.org/docs/python/api/datatypes.html#) of
|
||||
this [Table](Table)
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_pandas(self) -> pd.DataFrame:
|
||||
"""Return the table as a pandas DataFrame.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
return self.to_arrow().to_pandas()
|
||||
|
||||
@abstractmethod
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""Return the table as a pyarrow Table.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pa.Table
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
):
|
||||
"""Create an index on the table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: str, default "L2"
|
||||
The distance metric to use when creating the index.
|
||||
Valid values are "L2", "cosine", or "dot".
|
||||
L2 is euclidean distance.
|
||||
num_partitions: int
|
||||
The number of IVF partitions to use when creating the index.
|
||||
Default is 256.
|
||||
num_sub_vectors: int
|
||||
The number of PQ sub-vectors to use when creating the index.
|
||||
Default is 96.
|
||||
vector_column_name: str, default "vector"
|
||||
The vector column name to create the index.
|
||||
replace: bool, default True
|
||||
If True, replace the existing index if it exists.
|
||||
If False, raise an error if duplicate index exists.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def add(
|
||||
self,
|
||||
data: DATA,
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
"""Add more data to the [Table](Table).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data: list-of-dict, dict, pd.DataFrame
|
||||
The data to insert into the table.
|
||||
mode: str
|
||||
The mode to use when writing the data. Valid values are
|
||||
"append" and "overwrite".
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def search(
|
||||
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
||||
) -> LanceQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
query: list, np.ndarray
|
||||
The query vector.
|
||||
vector_column: str, default "vector"
|
||||
The name of the vector column to search.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
A query builder object representing the query.
|
||||
Once executed, the query returns selected columns, the vector,
|
||||
and also the "score" column which is the distance between the query
|
||||
vector and the returned vector.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def _execute_query(self, query: Query) -> pa.Table:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, where: str):
|
||||
"""Delete rows from the table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
sometimes no rows (if your predicate matches nothing).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where: str
|
||||
The SQL where clause to use when deleting rows. For example, 'x = 2'
|
||||
or 'x IN (1, 2, 3)'. The filter must not be empty, or it will error.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> import pandas as pd
|
||||
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> table = db.create_table("my_table", data)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 3 [5.0, 6.0]
|
||||
|
||||
If you have a list of values to delete, you can combine them into a
|
||||
stringified list and use the `IN` operator:
|
||||
|
||||
>>> to_remove = [1, 5]
|
||||
>>> to_remove = ", ".join([str(v) for v in to_remove])
|
||||
>>> to_remove
|
||||
'1, 5'
|
||||
>>> table.delete(f"x IN ({to_remove})")
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 3 [5.0, 6.0]
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanceTable(Table):
|
||||
"""
|
||||
A table in a LanceDB database.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -92,6 +257,7 @@ class LanceTable:
|
||||
|
||||
def _reset_dataset(self):
|
||||
try:
|
||||
if "_dataset" in self.__dict__:
|
||||
del self.__dict__["_dataset"]
|
||||
except AttributeError:
|
||||
pass
|
||||
@@ -136,7 +302,6 @@ class LanceTable:
|
||||
vector type
|
||||
0 [1.1, 0.9] vector
|
||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
||||
2
|
||||
>>> table.version
|
||||
2
|
||||
>>> table.checkout(1)
|
||||
@@ -184,27 +349,22 @@ class LanceTable:
|
||||
def _dataset_uri(self) -> str:
|
||||
return os.path.join(self._conn.uri, f"{self.name}.lance")
|
||||
|
||||
def create_index(self, metric="L2", num_partitions=256, num_sub_vectors=96):
|
||||
"""Create an index on the table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: str, default "L2"
|
||||
The distance metric to use when creating the index. Valid values are "L2" or "cosine".
|
||||
L2 is euclidean distance.
|
||||
num_partitions: int
|
||||
The number of IVF partitions to use when creating the index.
|
||||
Default is 256.
|
||||
num_sub_vectors: int
|
||||
The number of PQ sub-vectors to use when creating the index.
|
||||
Default is 96.
|
||||
"""
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name=VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
):
|
||||
"""Create an index on the table."""
|
||||
self._dataset.create_index(
|
||||
column=VECTOR_COLUMN_NAME,
|
||||
column=vector_column_name,
|
||||
index_type="IVF_PQ",
|
||||
metric=metric,
|
||||
num_partitions=num_partitions,
|
||||
num_sub_vectors=num_sub_vectors,
|
||||
replace=replace,
|
||||
)
|
||||
self._reset_dataset()
|
||||
|
||||
@@ -237,7 +397,13 @@ class LanceTable:
|
||||
"""Return the LanceDataset backing this table."""
|
||||
return self._dataset
|
||||
|
||||
def add(self, data: DATA, mode: str = "append") -> int:
|
||||
def add(
|
||||
self,
|
||||
data: DATA,
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
"""Add data to the table.
|
||||
|
||||
Parameters
|
||||
@@ -247,18 +413,27 @@ class LanceTable:
|
||||
mode: str
|
||||
The mode to use when writing the data. Valid values are
|
||||
"append" and "overwrite".
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
The number of vectors in the table.
|
||||
"""
|
||||
data = _sanitize_data(data, self.schema)
|
||||
# TODO: manage table listing and metadata separately
|
||||
data = _sanitize_data(
|
||||
data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
lance.write_dataset(data, self._dataset_uri, mode=mode)
|
||||
self._reset_dataset()
|
||||
return len(self)
|
||||
|
||||
def search(self, query: Union[VEC, str]) -> LanceQueryBuilder:
|
||||
def search(
|
||||
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
||||
) -> LanceQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector.
|
||||
|
||||
@@ -266,6 +441,8 @@ class LanceTable:
|
||||
----------
|
||||
query: list, np.ndarray
|
||||
The query vector.
|
||||
vector_column_name: str, default "vector"
|
||||
The name of the vector column to search.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -277,7 +454,7 @@ class LanceTable:
|
||||
"""
|
||||
if isinstance(query, str):
|
||||
# fts
|
||||
return LanceFtsQueryBuilder(self, query)
|
||||
return LanceFtsQueryBuilder(self, query, vector_column_name)
|
||||
|
||||
if isinstance(query, list):
|
||||
query = np.array(query)
|
||||
@@ -285,17 +462,104 @@ class LanceTable:
|
||||
query = query.astype(np.float32)
|
||||
else:
|
||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||
return LanceQueryBuilder(self, query)
|
||||
return LanceQueryBuilder(self, query, vector_column_name)
|
||||
|
||||
@classmethod
|
||||
def create(cls, db, name, data, schema=None, mode="create"):
|
||||
def create(
|
||||
cls,
|
||||
db,
|
||||
name,
|
||||
data=None,
|
||||
schema=None,
|
||||
mode="create",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
):
|
||||
"""
|
||||
Create a new table.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> import pandas as pd
|
||||
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> table = db.create_table("my_table", data)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db: LanceDB
|
||||
The LanceDB instance to create the table in.
|
||||
name: str
|
||||
The name of the table to create.
|
||||
data: list-of-dict, dict, pd.DataFrame, default None
|
||||
The data to insert into the table.
|
||||
At least one of `data` or `schema` must be provided.
|
||||
schema: dict, optional
|
||||
The schema of the table. If not provided, the schema is inferred from the data.
|
||||
At least one of `data` or `schema` must be provided.
|
||||
mode: str, default "create"
|
||||
The mode to use when writing the data. Valid values are
|
||||
"create", "overwrite", and "append".
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
"""
|
||||
tbl = LanceTable(db, name)
|
||||
data = _sanitize_data(data, schema)
|
||||
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
|
||||
if data is not None:
|
||||
data = _sanitize_data(
|
||||
data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
else:
|
||||
if schema is None:
|
||||
raise ValueError("Either data or schema must be provided")
|
||||
data = pa.Table.from_pylist([], schema=schema)
|
||||
lance.write_dataset(data, tbl._dataset_uri, schema=schema, mode=mode)
|
||||
return LanceTable(db, name)
|
||||
|
||||
@classmethod
|
||||
def open(cls, db, name):
|
||||
tbl = cls(db, name)
|
||||
fs, path = fs_from_uri(tbl._dataset_uri)
|
||||
file_info = fs.get_file_info(path)
|
||||
if file_info.type != pa.fs.FileType.Directory:
|
||||
raise FileNotFoundError(
|
||||
f"Table {name} does not exist. Please first call db.create_table({name}, data)"
|
||||
)
|
||||
return tbl
|
||||
|
||||
def delete(self, where: str):
|
||||
self._dataset.delete(where)
|
||||
|
||||
def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
||||
def _execute_query(self, query: Query) -> pa.Table:
|
||||
ds = self.to_lance()
|
||||
return ds.to_table(
|
||||
columns=query.columns,
|
||||
filter=query.filter,
|
||||
nearest={
|
||||
"column": query.vector_column,
|
||||
"q": query.vector,
|
||||
"k": query.k,
|
||||
"metric": query.metric,
|
||||
"nprobes": query.nprobes,
|
||||
"refine_factor": query.refine_factor,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_schema(
|
||||
data: pa.Table,
|
||||
schema: pa.Schema = None,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> pa.Table:
|
||||
"""Ensure that the table has the expected schema.
|
||||
|
||||
Parameters
|
||||
@@ -305,21 +569,41 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
||||
schema: pa.Schema; optional
|
||||
The expected schema. If not provided, this just converts the
|
||||
vector column to fixed_size_list(float32) if necessary.
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
"""
|
||||
if schema is not None:
|
||||
if data.schema == schema:
|
||||
return data
|
||||
# cast the columns to the expected types
|
||||
data = data.combine_chunks()
|
||||
data = _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
||||
data = _sanitize_vector_column(
|
||||
data,
|
||||
vector_column_name=VECTOR_COLUMN_NAME,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
return pa.Table.from_arrays(
|
||||
[data[name] for name in schema.names], schema=schema
|
||||
)
|
||||
# just check the vector column
|
||||
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
||||
return _sanitize_vector_column(
|
||||
data,
|
||||
vector_column_name=VECTOR_COLUMN_NAME,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table:
|
||||
def _sanitize_vector_column(
|
||||
data: pa.Table,
|
||||
vector_column_name: str,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
) -> pa.Table:
|
||||
"""
|
||||
Ensure that the vector column exists and has type fixed_size_list(float32)
|
||||
|
||||
@@ -329,19 +613,103 @@ def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table
|
||||
The table to sanitize.
|
||||
vector_column_name: str
|
||||
The name of the vector column.
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.0
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
"""
|
||||
if vector_column_name not in data.column_names:
|
||||
raise ValueError(f"Missing vector column: {vector_column_name}")
|
||||
# ChunkedArray is annoying to work with, so we combine chunks here
|
||||
vec_arr = data[vector_column_name].combine_chunks()
|
||||
if pa.types.is_fixed_size_list(vec_arr.type):
|
||||
return data
|
||||
if not pa.types.is_list(vec_arr.type):
|
||||
if pa.types.is_list(data[vector_column_name].type):
|
||||
# if it's a variable size list array we make sure the dimensions are all the same
|
||||
has_jagged_ndims = len(vec_arr.values) % len(data) != 0
|
||||
if has_jagged_ndims:
|
||||
data = _sanitize_jagged(
|
||||
data, fill_value, on_bad_vectors, vec_arr, vector_column_name
|
||||
)
|
||||
vec_arr = data[vector_column_name].combine_chunks()
|
||||
elif not pa.types.is_fixed_size_list(vec_arr.type):
|
||||
raise TypeError(f"Unsupported vector column type: {vec_arr.type}")
|
||||
|
||||
vec_arr = ensure_fixed_size_list_of_f32(vec_arr)
|
||||
data = data.set_column(
|
||||
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||
)
|
||||
|
||||
has_nans = pc.any(pc.is_nan(vec_arr.values)).as_py()
|
||||
if has_nans:
|
||||
data = _sanitize_nans(
|
||||
data, fill_value, on_bad_vectors, vec_arr, vector_column_name
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def ensure_fixed_size_list_of_f32(vec_arr):
|
||||
values = vec_arr.values
|
||||
if not pa.types.is_float32(values.type):
|
||||
values = values.cast(pa.float32())
|
||||
list_size = len(values) / len(data)
|
||||
if pa.types.is_fixed_size_list(vec_arr.type):
|
||||
list_size = vec_arr.type.list_size
|
||||
else:
|
||||
list_size = len(values) / len(vec_arr)
|
||||
vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size)
|
||||
return data.set_column(
|
||||
return vec_arr
|
||||
|
||||
|
||||
def _sanitize_jagged(data, fill_value, on_bad_vectors, vec_arr, vector_column_name):
|
||||
"""Sanitize jagged vectors."""
|
||||
if on_bad_vectors == "error":
|
||||
raise ValueError(
|
||||
f"Vector column {vector_column_name} has variable length vectors "
|
||||
"Set on_bad_vectors='drop' to remove them, or "
|
||||
"set on_bad_vectors='fill' and fill_value=<value> to replace them."
|
||||
)
|
||||
|
||||
lst_lengths = pc.list_value_length(vec_arr)
|
||||
ndims = pc.max(lst_lengths).as_py()
|
||||
correct_ndims = pc.equal(lst_lengths, ndims)
|
||||
|
||||
if on_bad_vectors == "fill":
|
||||
if fill_value is None:
|
||||
raise ValueError(
|
||||
"`fill_value` must not be None if `on_bad_vectors` is 'fill'"
|
||||
)
|
||||
fill_arr = pa.scalar([float(fill_value)] * ndims)
|
||||
vec_arr = pc.if_else(correct_ndims, vec_arr, fill_arr)
|
||||
data = data.set_column(
|
||||
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||
)
|
||||
elif on_bad_vectors == "drop":
|
||||
data = data.filter(correct_ndims)
|
||||
return data
|
||||
|
||||
|
||||
def _sanitize_nans(data, fill_value, on_bad_vectors, vec_arr, vector_column_name):
|
||||
"""Sanitize NaNs in vectors"""
|
||||
if on_bad_vectors == "error":
|
||||
raise ValueError(
|
||||
f"Vector column {vector_column_name} has NaNs. "
|
||||
"Set on_bad_vectors='drop' to remove them, or "
|
||||
"set on_bad_vectors='fill' and fill_value=<value> to replace them."
|
||||
)
|
||||
elif on_bad_vectors == "fill":
|
||||
if fill_value is None:
|
||||
raise ValueError(
|
||||
"`fill_value` must not be None if `on_bad_vectors` is 'fill'"
|
||||
)
|
||||
fill_value = float(fill_value)
|
||||
values = pc.if_else(pc.is_nan(vec_arr.values), fill_value, vec_arr.values)
|
||||
ndims = len(vec_arr[0])
|
||||
vec_arr = pa.FixedSizeListArray.from_arrays(values, ndims)
|
||||
data = data.set_column(
|
||||
data.column_names.index(vector_column_name), vector_column_name, vec_arr
|
||||
)
|
||||
elif on_bad_vectors == "drop":
|
||||
is_value_nan = pc.is_nan(vec_arr.values).to_numpy(zero_copy_only=False)
|
||||
is_full = np.any(~is_value_nan.reshape(-1, vec_arr.type.list_size), axis=1)
|
||||
data = data.filter(is_full)
|
||||
return data
|
||||
|
||||
@@ -11,9 +11,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from urllib.parse import ParseResult, urlparse
|
||||
import os
|
||||
from typing import Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pyarrow import fs
|
||||
import pyarrow as pa
|
||||
import pyarrow.fs as pa_fs
|
||||
|
||||
|
||||
def get_uri_scheme(uri: str) -> str:
|
||||
@@ -61,3 +64,15 @@ def get_uri_location(uri: str) -> str:
|
||||
return parsed.path
|
||||
else:
|
||||
return parsed.netloc + parsed.path
|
||||
|
||||
|
||||
def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
|
||||
"""
|
||||
Get a PyArrow FileSystem from a URI, handling extra environment variables.
|
||||
"""
|
||||
if get_uri_scheme(uri) == "s3":
|
||||
fs = pa_fs.S3FileSystem(endpoint_override=os.environ.get("AWS_ENDPOINT"))
|
||||
path = get_uri_location(uri)
|
||||
return fs, path
|
||||
|
||||
return pa_fs.FileSystem.from_uri(uri)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.1.8"
|
||||
dependencies = ["pylance>=0.4.20", "ratelimiter", "retry", "tqdm"]
|
||||
version = "0.1.14"
|
||||
dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
|
||||
description = "lancedb"
|
||||
authors = [
|
||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = [
|
||||
"pytest", "pytest-mock", "doctest"
|
||||
"pytest", "pytest-mock", "pytest-asyncio"
|
||||
]
|
||||
dev = [
|
||||
"ruff", "pre-commit", "black"
|
||||
@@ -52,3 +52,6 @@ requires = [
|
||||
"wheel",
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
77
python/tests/test_context.py
Normal file
77
python/tests/test_context.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from lancedb.context import contextualize
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def raw_df() -> pd.DataFrame:
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"token": [
|
||||
"The",
|
||||
"quick",
|
||||
"brown",
|
||||
"fox",
|
||||
"jumped",
|
||||
"over",
|
||||
"the",
|
||||
"lazy",
|
||||
"dog",
|
||||
"I",
|
||||
"love",
|
||||
"sandwiches",
|
||||
],
|
||||
"document_id": [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_contextualizer(raw_df: pd.DataFrame):
|
||||
result = (
|
||||
contextualize(raw_df)
|
||||
.window(6)
|
||||
.stride(3)
|
||||
.text_col("token")
|
||||
.groupby("document_id")
|
||||
.to_df()["token"]
|
||||
.to_list()
|
||||
)
|
||||
|
||||
assert result == [
|
||||
"The quick brown fox jumped over",
|
||||
"fox jumped over the lazy dog",
|
||||
"the lazy dog",
|
||||
"I love sandwiches",
|
||||
]
|
||||
|
||||
|
||||
def test_contextualizer_with_threshold(raw_df: pd.DataFrame):
|
||||
result = (
|
||||
contextualize(raw_df)
|
||||
.window(6)
|
||||
.stride(3)
|
||||
.text_col("token")
|
||||
.groupby("document_id")
|
||||
.min_window_size(4)
|
||||
.to_df()["token"]
|
||||
.to_list()
|
||||
)
|
||||
|
||||
assert result == [
|
||||
"The quick brown fox jumped over",
|
||||
"fox jumped over the lazy dog",
|
||||
]
|
||||
@@ -11,7 +11,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
import lancedb
|
||||
@@ -74,6 +76,32 @@ def test_ingest_pd(tmp_path):
|
||||
assert db.open_table("test").name == db["test"].name
|
||||
|
||||
|
||||
def test_ingest_record_batch_iterator(tmp_path):
|
||||
def batch_reader():
|
||||
for i in range(5):
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
[
|
||||
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
pa.array(["foo", "bar"]),
|
||||
pa.array([10.0, 20.0]),
|
||||
],
|
||||
["vector", "item", "price"],
|
||||
)
|
||||
|
||||
db = lancedb.connect(tmp_path)
|
||||
tbl = db.create_table(
|
||||
"test",
|
||||
batch_reader(),
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32())),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("price", pa.float32()),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_create_mode(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
data = pd.DataFrame(
|
||||
@@ -120,3 +148,43 @@ def test_delete_table(tmp_path):
|
||||
|
||||
db.create_table("test", data=data)
|
||||
assert db.table_names() == ["test"]
|
||||
|
||||
|
||||
def test_empty_or_nonexistent_table(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
with pytest.raises(Exception):
|
||||
db.create_table("test_with_no_data")
|
||||
|
||||
with pytest.raises(Exception):
|
||||
db.open_table("does_not_exist")
|
||||
|
||||
schema = pa.schema([pa.field("a", pa.int32())])
|
||||
db.create_table("test", schema=schema)
|
||||
|
||||
|
||||
def test_replace_index(tmp_path):
|
||||
db = lancedb.connect(uri=tmp_path)
|
||||
table = db.create_table(
|
||||
"test",
|
||||
[
|
||||
{"vector": np.random.rand(128), "item": "foo", "price": float(i)}
|
||||
for i in range(1000)
|
||||
],
|
||||
)
|
||||
table.create_index(
|
||||
num_partitions=2,
|
||||
num_sub_vectors=4,
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
table.create_index(
|
||||
num_partitions=2,
|
||||
num_sub_vectors=4,
|
||||
replace=False,
|
||||
)
|
||||
|
||||
table.create_index(
|
||||
num_partitions=2,
|
||||
num_sub_vectors=4,
|
||||
replace=True,
|
||||
)
|
||||
|
||||
27
python/tests/test_e2e_remote_db.py
Normal file
27
python/tests/test_e2e_remote_db.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from lancedb import LanceDBConnection
|
||||
|
||||
# TODO: setup integ test mark and script
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Need to set up a local server")
|
||||
def test_against_local_server():
|
||||
conn = LanceDBConnection("lancedb+http://localhost:10024")
|
||||
table = conn.open_table("sift1m_ivf1024_pq16")
|
||||
df = table.search(np.random.rand(128)).to_df()
|
||||
assert len(df) == 10
|
||||
@@ -14,6 +14,7 @@ import sys
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.embeddings import with_embeddings
|
||||
|
||||
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import lancedb.fts
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import tantivy
|
||||
|
||||
import lancedb as ldb
|
||||
import lancedb.fts
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
import lancedb
|
||||
|
||||
165
python/tests/test_pydantic.py
Normal file
165
python/tests/test_pydantic.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import json
|
||||
import sys
|
||||
from typing import List, Optional
|
||||
|
||||
import pyarrow as pa
|
||||
import pydantic
|
||||
import pytest
|
||||
|
||||
from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.version_info < (3, 9),
|
||||
reason="using native type alias requires python3.9 or higher",
|
||||
)
|
||||
def test_pydantic_to_arrow():
|
||||
class StructModel(pydantic.BaseModel):
|
||||
a: str
|
||||
b: Optional[float]
|
||||
|
||||
class TestModel(pydantic.BaseModel):
|
||||
id: int
|
||||
s: str
|
||||
vec: list[float]
|
||||
li: List[int]
|
||||
opt: Optional[str] = None
|
||||
st: StructModel
|
||||
# d: dict
|
||||
|
||||
m = TestModel(
|
||||
id=1, s="hello", vec=[1.0, 2.0, 3.0], li=[2, 3, 4], st=StructModel(a="a", b=1.0)
|
||||
)
|
||||
|
||||
schema = pydantic_to_schema(TestModel)
|
||||
|
||||
expect_schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64(), False),
|
||||
pa.field("s", pa.utf8(), False),
|
||||
pa.field("vec", pa.list_(pa.float64()), False),
|
||||
pa.field("li", pa.list_(pa.int64()), False),
|
||||
pa.field("opt", pa.utf8(), True),
|
||||
pa.field(
|
||||
"st",
|
||||
pa.struct(
|
||||
[pa.field("a", pa.utf8(), False), pa.field("b", pa.float64(), True)]
|
||||
),
|
||||
False,
|
||||
),
|
||||
]
|
||||
)
|
||||
assert schema == expect_schema
|
||||
|
||||
|
||||
def test_pydantic_to_arrow_py38():
|
||||
class StructModel(pydantic.BaseModel):
|
||||
a: str
|
||||
b: Optional[float]
|
||||
|
||||
class TestModel(pydantic.BaseModel):
|
||||
id: int
|
||||
s: str
|
||||
vec: List[float]
|
||||
li: List[int]
|
||||
opt: Optional[str] = None
|
||||
st: StructModel
|
||||
# d: dict
|
||||
|
||||
m = TestModel(
|
||||
id=1, s="hello", vec=[1.0, 2.0, 3.0], li=[2, 3, 4], st=StructModel(a="a", b=1.0)
|
||||
)
|
||||
|
||||
schema = pydantic_to_schema(TestModel)
|
||||
|
||||
expect_schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64(), False),
|
||||
pa.field("s", pa.utf8(), False),
|
||||
pa.field("vec", pa.list_(pa.float64()), False),
|
||||
pa.field("li", pa.list_(pa.int64()), False),
|
||||
pa.field("opt", pa.utf8(), True),
|
||||
pa.field(
|
||||
"st",
|
||||
pa.struct(
|
||||
[pa.field("a", pa.utf8(), False), pa.field("b", pa.float64(), True)]
|
||||
),
|
||||
False,
|
||||
),
|
||||
]
|
||||
)
|
||||
assert schema == expect_schema
|
||||
|
||||
|
||||
def test_fixed_size_list_field():
|
||||
class TestModel(pydantic.BaseModel):
|
||||
vec: vector(16)
|
||||
li: List[int]
|
||||
|
||||
data = TestModel(vec=list(range(16)), li=[1, 2, 3])
|
||||
if PYDANTIC_VERSION >= (2,):
|
||||
assert json.loads(data.model_dump_json()) == {
|
||||
"vec": list(range(16)),
|
||||
"li": [1, 2, 3],
|
||||
}
|
||||
else:
|
||||
assert data.dict() == {
|
||||
"vec": list(range(16)),
|
||||
"li": [1, 2, 3],
|
||||
}
|
||||
|
||||
schema = pydantic_to_schema(TestModel)
|
||||
assert schema == pa.schema(
|
||||
[
|
||||
pa.field("vec", pa.list_(pa.float32(), 16), False),
|
||||
pa.field("li", pa.list_(pa.int64()), False),
|
||||
]
|
||||
)
|
||||
|
||||
if PYDANTIC_VERSION >= (2,):
|
||||
json_schema = TestModel.model_json_schema()
|
||||
else:
|
||||
json_schema = TestModel.schema()
|
||||
|
||||
assert json_schema == {
|
||||
"properties": {
|
||||
"vec": {
|
||||
"items": {"type": "number"},
|
||||
"maxItems": 16,
|
||||
"minItems": 16,
|
||||
"title": "Vec",
|
||||
"type": "array",
|
||||
},
|
||||
"li": {"items": {"type": "integer"}, "title": "Li", "type": "array"},
|
||||
},
|
||||
"required": ["vec", "li"],
|
||||
"title": "TestModel",
|
||||
"type": "object",
|
||||
}
|
||||
|
||||
|
||||
def test_fixed_size_list_validation():
|
||||
class TestModel(pydantic.BaseModel):
|
||||
vec: vector(8)
|
||||
|
||||
with pytest.raises(pydantic.ValidationError):
|
||||
TestModel(vec=range(9))
|
||||
|
||||
with pytest.raises(pydantic.ValidationError):
|
||||
TestModel(vec=range(7))
|
||||
|
||||
TestModel(vec=range(8))
|
||||
@@ -11,22 +11,42 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest.mock as mock
|
||||
|
||||
import lance
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.testing as tm
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
from lancedb.db import LanceDBConnection
|
||||
from lancedb.query import LanceQueryBuilder, Query
|
||||
from lancedb.table import LanceTable
|
||||
|
||||
|
||||
class MockTable:
|
||||
def __init__(self, tmp_path):
|
||||
self.uri = tmp_path
|
||||
self._conn = LanceDBConnection(self.uri)
|
||||
|
||||
def to_lance(self):
|
||||
return lance.dataset(self.uri)
|
||||
|
||||
def _execute_query(self, query):
|
||||
ds = self.to_lance()
|
||||
return ds.to_table(
|
||||
columns=query.columns,
|
||||
filter=query.filter,
|
||||
nearest={
|
||||
"column": query.vector_column,
|
||||
"q": query.vector,
|
||||
"k": query.k,
|
||||
"metric": query.metric,
|
||||
"nprobes": query.nprobes,
|
||||
"refine_factor": query.refine_factor,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def table(tmp_path) -> MockTable:
|
||||
@@ -45,24 +65,30 @@ def table(tmp_path) -> MockTable:
|
||||
|
||||
|
||||
def test_query_builder(table):
|
||||
df = LanceQueryBuilder(table, [0, 0]).limit(1).select(["id"]).to_df()
|
||||
df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
|
||||
assert df["id"].values[0] == 1
|
||||
assert all(df["vector"].values[0] == [1, 2])
|
||||
|
||||
|
||||
def test_query_builder_with_filter(table):
|
||||
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
|
||||
df = LanceQueryBuilder(table, [0, 0], "vector").where("id = 2").to_df()
|
||||
assert df["id"].values[0] == 2
|
||||
assert all(df["vector"].values[0] == [3, 4])
|
||||
|
||||
|
||||
def test_query_builder_with_metric(table):
|
||||
query = [4, 8]
|
||||
df_default = LanceQueryBuilder(table, query).to_df()
|
||||
df_l2 = LanceQueryBuilder(table, query).metric("L2").to_df()
|
||||
vector_column_name = "vector"
|
||||
df_default = LanceQueryBuilder(table, query, vector_column_name).to_df()
|
||||
df_l2 = LanceQueryBuilder(table, query, vector_column_name).metric("L2").to_df()
|
||||
tm.assert_frame_equal(df_default, df_l2)
|
||||
|
||||
df_cosine = LanceQueryBuilder(table, query).metric("cosine").limit(1).to_df()
|
||||
df_cosine = (
|
||||
LanceQueryBuilder(table, query, vector_column_name)
|
||||
.metric("cosine")
|
||||
.limit(1)
|
||||
.to_df()
|
||||
)
|
||||
assert df_cosine.score[0] == pytest.approx(
|
||||
cosine_distance(query, df_cosine.vector[0]),
|
||||
abs=1e-6,
|
||||
@@ -70,5 +96,33 @@ def test_query_builder_with_metric(table):
|
||||
assert 0 <= df_cosine.score[0] <= 1
|
||||
|
||||
|
||||
def test_query_builder_with_different_vector_column():
|
||||
table = mock.MagicMock(spec=LanceTable)
|
||||
query = [4, 8]
|
||||
vector_column_name = "foo_vector"
|
||||
builder = (
|
||||
LanceQueryBuilder(table, query, vector_column_name)
|
||||
.metric("cosine")
|
||||
.where("b < 10")
|
||||
.select(["b"])
|
||||
.limit(2)
|
||||
)
|
||||
ds = mock.Mock()
|
||||
table.to_lance.return_value = ds
|
||||
builder.to_arrow()
|
||||
table._execute_query.assert_called_once_with(
|
||||
Query(
|
||||
vector=query,
|
||||
filter="b < 10",
|
||||
k=2,
|
||||
metric="cosine",
|
||||
columns=["b"],
|
||||
nprobes=20,
|
||||
refine_factor=None,
|
||||
vector_column="foo_vector",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def cosine_distance(vec1, vec2):
|
||||
return 1 - np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user