mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
71 Commits
python-v0.
...
add-async-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f187d4db6 | ||
|
|
162880140e | ||
|
|
99d9ced6d5 | ||
|
|
96933d7df8 | ||
|
|
d369233b3d | ||
|
|
43a670ed4b | ||
|
|
cb9a00a28d | ||
|
|
72af977a73 | ||
|
|
7cecb71df0 | ||
|
|
285071e5c8 | ||
|
|
114866fbcf | ||
|
|
5387c0e243 | ||
|
|
53d1535de1 | ||
|
|
b2f88f0b29 | ||
|
|
f2e3989831 | ||
|
|
83ae52938a | ||
|
|
267aa83bf8 | ||
|
|
cc72050206 | ||
|
|
72543c8b9d | ||
|
|
97d6210c33 | ||
|
|
a3d0c27b0a | ||
|
|
b23d8abcdd | ||
|
|
e3ea5cf9b9 | ||
|
|
4f8b086175 | ||
|
|
72330fb759 | ||
|
|
e3b2c5f438 | ||
|
|
66a881b33a | ||
|
|
a7515d6ee2 | ||
|
|
587c0824af | ||
|
|
b38a4269d0 | ||
|
|
119d88b9db | ||
|
|
74f660d223 | ||
|
|
b2b0979b90 | ||
|
|
ee2a40b182 | ||
|
|
4ca0b15354 | ||
|
|
d8c217b47d | ||
|
|
b724b1a01f | ||
|
|
abd75e0ead | ||
|
|
0fd8a50bd7 | ||
|
|
9f228feb0e | ||
|
|
90e9c52d0a | ||
|
|
68974a4e06 | ||
|
|
4c9bab0d92 | ||
|
|
5117aecc38 | ||
|
|
729718cb09 | ||
|
|
b1c84e0bda | ||
|
|
cbbc07d0f5 | ||
|
|
21021f94ca | ||
|
|
0ed77fa990 | ||
|
|
4372c231cd | ||
|
|
fa9ca8f7a6 | ||
|
|
2a35d24ee6 | ||
|
|
dd9ce337e2 | ||
|
|
b9921d56cc | ||
|
|
0cfd9ed18e | ||
|
|
975398c3a8 | ||
|
|
08d5f93f34 | ||
|
|
91cab3b556 | ||
|
|
c61bfc3af8 | ||
|
|
4e8c7b0adf | ||
|
|
26f4a80e10 | ||
|
|
3604d20ad3 | ||
|
|
9708d829a9 | ||
|
|
059c9794b5 | ||
|
|
15ed7f75a0 | ||
|
|
96181ab421 | ||
|
|
f3fc339ef6 | ||
|
|
113cd6995b | ||
|
|
02535bdc88 | ||
|
|
facc7d61c0 | ||
|
|
f947259f16 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.11.1-beta.1"
|
||||
current_version = "0.13.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
@@ -87,11 +87,26 @@ glob = "node/package.json"
|
||||
replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
|
||||
search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
|
||||
|
||||
[[tool.bumpversion.files]]
|
||||
glob = "node/package.json"
|
||||
replace = "\"@lancedb/vectordb-linux-arm64-musl\": \"{new_version}\""
|
||||
search = "\"@lancedb/vectordb-linux-arm64-musl\": \"{current_version}\""
|
||||
|
||||
[[tool.bumpversion.files]]
|
||||
glob = "node/package.json"
|
||||
replace = "\"@lancedb/vectordb-linux-x64-musl\": \"{new_version}\""
|
||||
search = "\"@lancedb/vectordb-linux-x64-musl\": \"{current_version}\""
|
||||
|
||||
[[tool.bumpversion.files]]
|
||||
glob = "node/package.json"
|
||||
replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
|
||||
search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
|
||||
|
||||
[[tool.bumpversion.files]]
|
||||
glob = "node/package.json"
|
||||
replace = "\"@lancedb/vectordb-win32-arm64-msvc\": \"{new_version}\""
|
||||
search = "\"@lancedb/vectordb-win32-arm64-msvc\": \"{current_version}\""
|
||||
|
||||
# Cargo files
|
||||
# ------------
|
||||
[[tool.bumpversion.files]]
|
||||
|
||||
@@ -31,6 +31,9 @@ rustflags = [
|
||||
[target.x86_64-unknown-linux-gnu]
|
||||
rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=+avx2,+fma,+f16c"]
|
||||
|
||||
[target.x86_64-unknown-linux-musl]
|
||||
rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=-crt-static,+avx2,+fma,+f16c"]
|
||||
|
||||
[target.aarch64-apple-darwin]
|
||||
rustflags = ["-C", "target-cpu=apple-m1", "-C", "target-feature=+neon,+fp16,+fhm,+dotprod"]
|
||||
|
||||
@@ -38,3 +41,7 @@ rustflags = ["-C", "target-cpu=apple-m1", "-C", "target-feature=+neon,+fp16,+fhm
|
||||
# not found errors on systems that are missing it.
|
||||
[target.x86_64-pc-windows-msvc]
|
||||
rustflags = ["-Ctarget-feature=+crt-static"]
|
||||
|
||||
# Experimental target for Arm64 Windows
|
||||
[target.aarch64-pc-windows-msvc]
|
||||
rustflags = ["-Ctarget-feature=+crt-static"]
|
||||
6
.github/workflows/docs.yml
vendored
6
.github/workflows/docs.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
- name: Install dependecies needed for ubuntu
|
||||
run: |
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
rustup update && rustup default
|
||||
rustup update && rustup default
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
@@ -41,8 +41,8 @@ jobs:
|
||||
- name: Build Python
|
||||
working-directory: python
|
||||
run: |
|
||||
python -m pip install -e .
|
||||
python -m pip install -r ../docs/requirements.txt
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
|
||||
2
.github/workflows/docs_test.yml
vendored
2
.github/workflows/docs_test.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
- name: Build Python
|
||||
working-directory: docs/test
|
||||
run:
|
||||
python -m pip install -r requirements.txt
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
|
||||
- name: Create test files
|
||||
run: |
|
||||
cd docs/test
|
||||
|
||||
15
.github/workflows/nodejs.yml
vendored
15
.github/workflows/nodejs.yml
vendored
@@ -53,6 +53,9 @@ jobs:
|
||||
cargo clippy --all --all-features -- -D warnings
|
||||
npm ci
|
||||
npm run lint-ci
|
||||
- name: Lint examples
|
||||
working-directory: nodejs/examples
|
||||
run: npm ci && npm run lint-ci
|
||||
linux:
|
||||
name: Linux (NodeJS ${{ matrix.node-version }})
|
||||
timeout-minutes: 30
|
||||
@@ -91,6 +94,18 @@ jobs:
|
||||
env:
|
||||
S3_TEST: "1"
|
||||
run: npm run test
|
||||
- name: Setup examples
|
||||
working-directory: nodejs/examples
|
||||
run: npm ci
|
||||
- name: Test examples
|
||||
working-directory: ./
|
||||
env:
|
||||
OPENAI_API_KEY: test
|
||||
OPENAI_BASE_URL: http://0.0.0.0:8000
|
||||
run: |
|
||||
python ci/mock_openai.py &
|
||||
cd nodejs/examples
|
||||
npm test
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
runs-on: "macos-14"
|
||||
|
||||
320
.github/workflows/npm-publish.yml
vendored
320
.github/workflows/npm-publish.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
|
||||
node-linux:
|
||||
node-linux-gnu:
|
||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
# Only runs on tags that matches the make-release action
|
||||
@@ -137,11 +137,63 @@ jobs:
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: node-native-linux-${{ matrix.config.arch }}
|
||||
name: node-native-linux-${{ matrix.config.arch }}-gnu
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-linux*.tgz
|
||||
|
||||
nodejs-linux:
|
||||
node-linux-musl:
|
||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
||||
runs-on: ubuntu-latest
|
||||
container: alpine:edge
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- arch: x86_64
|
||||
- arch: aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install common dependencies
|
||||
run: |
|
||||
apk add protobuf-dev curl clang mold grep npm bash
|
||||
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||
echo "source $HOME/.cargo/env" >> saved_env
|
||||
echo "export CC=clang" >> saved_env
|
||||
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
|
||||
- name: Configure aarch64 build
|
||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
|
||||
crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
|
||||
sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
|
||||
apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
|
||||
curl -sSf $apk_url > apk_list
|
||||
for pkg in gcc libgcc musl; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
|
||||
mkdir -p $sysroot_lib
|
||||
echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
|
||||
cp usr/lib/libgcc_s.so.1 $sysroot_lib
|
||||
cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
|
||||
cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
|
||||
echo '!<arch>' > $sysroot_lib/libdl.a
|
||||
(cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
|
||||
echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
|
||||
echo "export RUSTFLAGS='-Ctarget-cpu=apple-m1 -Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
|
||||
- name: Build Linux Artifacts
|
||||
run: |
|
||||
source ./saved_env
|
||||
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: node-native-linux-${{ matrix.config.arch }}-musl
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-linux*.tgz
|
||||
|
||||
nodejs-linux-gnu:
|
||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-gnu
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
# Only runs on tags that matches the make-release action
|
||||
@@ -178,7 +230,7 @@ jobs:
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: nodejs-native-linux-${{ matrix.config.arch }}
|
||||
name: nodejs-native-linux-${{ matrix.config.arch }}-gnu
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
# The generic files are the same in all distros so we just pick
|
||||
@@ -192,6 +244,62 @@ jobs:
|
||||
nodejs/dist/*
|
||||
!nodejs/dist/*.node
|
||||
|
||||
nodejs-linux-musl:
|
||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
||||
runs-on: ubuntu-latest
|
||||
container: alpine:edge
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- arch: x86_64
|
||||
- arch: aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install common dependencies
|
||||
run: |
|
||||
apk add protobuf-dev curl clang mold grep npm bash openssl-dev openssl-libs-static
|
||||
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||
echo "source $HOME/.cargo/env" >> saved_env
|
||||
echo "export CC=clang" >> saved_env
|
||||
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
|
||||
echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=/usr/include" >> saved_env
|
||||
echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=/usr/lib" >> saved_env
|
||||
- name: Configure aarch64 build
|
||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
|
||||
crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
|
||||
sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
|
||||
apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
|
||||
curl -sSf $apk_url > apk_list
|
||||
for pkg in gcc libgcc musl openssl-dev openssl-libs-static; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
|
||||
mkdir -p $sysroot_lib
|
||||
echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
|
||||
cp usr/lib/libgcc_s.so.1 $sysroot_lib
|
||||
cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
|
||||
cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
|
||||
echo '!<arch>' > $sysroot_lib/libdl.a
|
||||
(cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
|
||||
echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
|
||||
echo "export RUSTFLAGS='-Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
|
||||
echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=$(realpath usr/include)" >> saved_env
|
||||
echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=$(realpath usr/lib)" >> saved_env
|
||||
- name: Build Linux Artifacts
|
||||
run: |
|
||||
source ./saved_env
|
||||
bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: nodejs-native-linux-${{ matrix.config.arch }}-musl
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
|
||||
node-windows:
|
||||
name: vectordb ${{ matrix.target }}
|
||||
runs-on: windows-2022
|
||||
@@ -226,6 +334,110 @@ jobs:
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-win32*.tgz
|
||||
|
||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
||||
# node-windows-arm64:
|
||||
# name: vectordb win32-arm64-msvc
|
||||
# runs-on: windows-4x-arm
|
||||
# if: startsWith(github.ref, 'refs/tags/v')
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
# - name: Install Git
|
||||
# run: |
|
||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
||||
# shell: powershell
|
||||
# - name: Add Git to PATH
|
||||
# run: |
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
||||
# shell: powershell
|
||||
# - name: Configure Git symlinks
|
||||
# run: git config --global core.symlinks true
|
||||
# - uses: actions/checkout@v4
|
||||
# - uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: "3.13"
|
||||
# - name: Install Visual Studio Build Tools
|
||||
# run: |
|
||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
||||
# "--installPath", "C:\BuildTools", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
||||
# shell: powershell
|
||||
# - name: Add Visual Studio Build Tools to PATH
|
||||
# run: |
|
||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
||||
|
||||
# # Add MSVC runtime libraries to LIB
|
||||
# $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
|
||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
|
||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
||||
# Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
|
||||
|
||||
# # Add INCLUDE paths
|
||||
# $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
|
||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
|
||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
|
||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
|
||||
# Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
|
||||
# shell: powershell
|
||||
# - name: Install Rust
|
||||
# run: |
|
||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
||||
# shell: powershell
|
||||
# - name: Add Rust to PATH
|
||||
# run: |
|
||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
||||
# shell: powershell
|
||||
|
||||
# - uses: Swatinem/rust-cache@v2
|
||||
# with:
|
||||
# workspaces: rust
|
||||
# - name: Install 7-Zip ARM
|
||||
# run: |
|
||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
||||
# shell: powershell
|
||||
# - name: Add 7-Zip to PATH
|
||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
||||
# shell: powershell
|
||||
# - name: Install Protoc v21.12
|
||||
# working-directory: C:\
|
||||
# run: |
|
||||
# if (Test-Path 'C:\protoc') {
|
||||
# Write-Host "Protoc directory exists, skipping installation"
|
||||
# return
|
||||
# }
|
||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
||||
# Set-Location C:\protoc
|
||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
||||
# shell: powershell
|
||||
# - name: Add Protoc to PATH
|
||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||
# shell: powershell
|
||||
# - name: Build Windows native node modules
|
||||
# run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
|
||||
# - name: Upload Windows ARM64 Artifacts
|
||||
# uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: node-native-windows-arm64
|
||||
# path: |
|
||||
# node/dist/*.node
|
||||
|
||||
nodejs-windows:
|
||||
name: lancedb ${{ matrix.target }}
|
||||
runs-on: windows-2022
|
||||
@@ -260,9 +472,103 @@ jobs:
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
|
||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
||||
# nodejs-windows-arm64:
|
||||
# name: lancedb win32-arm64-msvc
|
||||
# runs-on: windows-4x-arm
|
||||
# if: startsWith(github.ref, 'refs/tags/v')
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
# - name: Install Git
|
||||
# run: |
|
||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
||||
# shell: powershell
|
||||
# - name: Add Git to PATH
|
||||
# run: |
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
||||
# shell: powershell
|
||||
# - name: Configure Git symlinks
|
||||
# run: git config --global core.symlinks true
|
||||
# - uses: actions/checkout@v4
|
||||
# - uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: "3.13"
|
||||
# - name: Install Visual Studio Build Tools
|
||||
# run: |
|
||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
||||
# "--installPath", "C:\BuildTools", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
||||
# shell: powershell
|
||||
# - name: Add Visual Studio Build Tools to PATH
|
||||
# run: |
|
||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
||||
|
||||
# $env:LIB = ""
|
||||
# Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
||||
# shell: powershell
|
||||
# - name: Install Rust
|
||||
# run: |
|
||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
||||
# shell: powershell
|
||||
# - name: Add Rust to PATH
|
||||
# run: |
|
||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
||||
# shell: powershell
|
||||
|
||||
# - uses: Swatinem/rust-cache@v2
|
||||
# with:
|
||||
# workspaces: rust
|
||||
# - name: Install 7-Zip ARM
|
||||
# run: |
|
||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
||||
# shell: powershell
|
||||
# - name: Add 7-Zip to PATH
|
||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
||||
# shell: powershell
|
||||
# - name: Install Protoc v21.12
|
||||
# working-directory: C:\
|
||||
# run: |
|
||||
# if (Test-Path 'C:\protoc') {
|
||||
# Write-Host "Protoc directory exists, skipping installation"
|
||||
# return
|
||||
# }
|
||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
||||
# Set-Location C:\protoc
|
||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
||||
# shell: powershell
|
||||
# - name: Add Protoc to PATH
|
||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||
# shell: powershell
|
||||
# - name: Build Windows native node modules
|
||||
# run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
|
||||
# - name: Upload Windows ARM64 Artifacts
|
||||
# uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: nodejs-native-windows-arm64
|
||||
# path: |
|
||||
# nodejs/dist/*.node
|
||||
|
||||
release:
|
||||
name: vectordb NPM Publish
|
||||
needs: [node, node-macos, node-linux, node-windows]
|
||||
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
@@ -280,7 +586,7 @@ jobs:
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||
run: |
|
||||
# Tag beta as "preview" instead of default "latest". See lancedb
|
||||
# Tag beta as "preview" instead of default "latest". See lancedb
|
||||
# npm publish step for more info.
|
||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
||||
PUBLISH_ARGS="--tag preview"
|
||||
@@ -302,7 +608,7 @@ jobs:
|
||||
|
||||
release-nodejs:
|
||||
name: lancedb NPM Publish
|
||||
needs: [nodejs-macos, nodejs-linux, nodejs-windows]
|
||||
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
|
||||
2
.github/workflows/python.yml
vendored
2
.github/workflows/python.yml
vendored
@@ -138,7 +138,7 @@ jobs:
|
||||
run: rm -rf target/wheels
|
||||
windows:
|
||||
name: "Windows: ${{ matrix.config.name }}"
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
|
||||
169
.github/workflows/rust.yml
vendored
169
.github/workflows/rust.yml
vendored
@@ -35,21 +35,22 @@ jobs:
|
||||
CC: clang-18
|
||||
CXX: clang++-18
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Run format
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Run clippy
|
||||
run: cargo clippy --workspace --tests --all-features -- -D warnings
|
||||
- name: Run format
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Run clippy
|
||||
run: cargo clippy --workspace --tests --all-features -- -D warnings
|
||||
|
||||
linux:
|
||||
timeout-minutes: 30
|
||||
# To build all features, we need more disk space than is available
|
||||
@@ -65,37 +66,38 @@ jobs:
|
||||
CC: clang-18
|
||||
CXX: clang++-18
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Make Swap
|
||||
run: |
|
||||
sudo fallocate -l 16G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
- name: Start S3 integration test environment
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
- name: Run examples
|
||||
run: cargo run --example simple
|
||||
- name: Make Swap
|
||||
run: |
|
||||
sudo fallocate -l 16G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
- name: Start S3 integration test environment
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Build
|
||||
run: cargo build --all-features
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
- name: Run examples
|
||||
run: cargo run --example simple
|
||||
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
mac-runner: [ "macos-13", "macos-14" ]
|
||||
mac-runner: ["macos-13", "macos-14"]
|
||||
runs-on: "${{ matrix.mac-runner }}"
|
||||
defaults:
|
||||
run:
|
||||
@@ -104,8 +106,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: CPU features
|
||||
run: sysctl -a | grep cpu
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
@@ -118,6 +120,7 @@ jobs:
|
||||
- name: Run tests
|
||||
# Run with everything except the integration tests.
|
||||
run: cargo test --features remote,fp16kernels
|
||||
|
||||
windows:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
@@ -139,3 +142,99 @@ jobs:
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo build
|
||||
cargo test
|
||||
|
||||
windows-arm64:
|
||||
runs-on: windows-4x-arm
|
||||
steps:
|
||||
- name: Install Git
|
||||
run: |
|
||||
Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
||||
Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
||||
shell: powershell
|
||||
- name: Add Git to PATH
|
||||
run: |
|
||||
Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
||||
$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
||||
shell: powershell
|
||||
- name: Configure Git symlinks
|
||||
run: git config --global core.symlinks true
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
- name: Install Visual Studio Build Tools
|
||||
run: |
|
||||
Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
||||
Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
||||
"--installPath", "C:\BuildTools", `
|
||||
"--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
||||
"--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
||||
"--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
||||
"--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
||||
"--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
||||
"--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
||||
shell: powershell
|
||||
- name: Add Visual Studio Build Tools to PATH
|
||||
run: |
|
||||
$vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
||||
$latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
||||
Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
||||
Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
||||
Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
||||
Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
||||
Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
||||
|
||||
# Add MSVC runtime libraries to LIB
|
||||
$env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
|
||||
"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
|
||||
"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
||||
Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
|
||||
|
||||
# Add INCLUDE paths
|
||||
$env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
|
||||
"C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
|
||||
"C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
|
||||
"C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
|
||||
Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
|
||||
shell: powershell
|
||||
- name: Install Rust
|
||||
run: |
|
||||
Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||
.\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
||||
shell: powershell
|
||||
- name: Add Rust to PATH
|
||||
run: |
|
||||
Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
||||
shell: powershell
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install 7-Zip ARM
|
||||
run: |
|
||||
New-Item -Path 'C:\7zip' -ItemType Directory
|
||||
Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
||||
Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
||||
shell: powershell
|
||||
- name: Add 7-Zip to PATH
|
||||
run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
||||
shell: powershell
|
||||
- name: Install Protoc v21.12
|
||||
working-directory: C:\
|
||||
run: |
|
||||
if (Test-Path 'C:\protoc') {
|
||||
Write-Host "Protoc directory exists, skipping installation"
|
||||
return
|
||||
}
|
||||
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||
Set-Location C:\protoc
|
||||
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||
& 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
||||
shell: powershell
|
||||
- name: Add Protoc to PATH
|
||||
run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||
shell: powershell
|
||||
- name: Run tests
|
||||
run: |
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo build --target aarch64-pc-windows-msvc
|
||||
cargo test --target aarch64-pc-windows-msvc
|
||||
|
||||
18
Cargo.toml
18
Cargo.toml
@@ -18,16 +18,18 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
|
||||
rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.19.1", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.19.1" }
|
||||
lance-linalg = { "version" = "=0.19.1" }
|
||||
lance-table = { "version" = "=0.19.1" }
|
||||
lance-testing = { "version" = "=0.19.1" }
|
||||
lance-datafusion = { "version" = "=0.19.1" }
|
||||
lance-encoding = { "version" = "=0.19.1" }
|
||||
lance = { "version" = "=0.20.0", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "52.2", optional = false }
|
||||
arrow-array = "52.2"
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
[](https://blog.lancedb.com/)
|
||||
[](https://discord.gg/zMM32dvNtd)
|
||||
[](https://twitter.com/lancedb)
|
||||
[](https://gurubase.io/g/lancedb)
|
||||
|
||||
</p>
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# Targets supported:
|
||||
# - x86_64-pc-windows-msvc
|
||||
# - i686-pc-windows-msvc
|
||||
# - aarch64-pc-windows-msvc
|
||||
|
||||
function Prebuild-Rust {
|
||||
param (
|
||||
@@ -31,7 +32,7 @@ function Build-NodeBinaries {
|
||||
|
||||
$targets = $args[0]
|
||||
if (-not $targets) {
|
||||
$targets = "x86_64-pc-windows-msvc"
|
||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
||||
}
|
||||
|
||||
Write-Host "Building artifacts for targets: $targets"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# Targets supported:
|
||||
# - x86_64-pc-windows-msvc
|
||||
# - i686-pc-windows-msvc
|
||||
# - aarch64-pc-windows-msvc
|
||||
|
||||
function Prebuild-Rust {
|
||||
param (
|
||||
@@ -31,7 +32,7 @@ function Build-NodeBinaries {
|
||||
|
||||
$targets = $args[0]
|
||||
if (-not $targets) {
|
||||
$targets = "x86_64-pc-windows-msvc"
|
||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
||||
}
|
||||
|
||||
Write-Host "Building artifacts for targets: $targets"
|
||||
|
||||
@@ -11,7 +11,8 @@ fi
|
||||
export OPENSSL_STATIC=1
|
||||
export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
|
||||
|
||||
source $HOME/.bashrc
|
||||
#Alpine doesn't have .bashrc
|
||||
FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
||||
|
||||
cd nodejs
|
||||
npm ci
|
||||
|
||||
@@ -5,13 +5,14 @@ ARCH=${1:-x86_64}
|
||||
|
||||
if [ "$ARCH" = "x86_64" ]; then
|
||||
export OPENSSL_LIB_DIR=/usr/local/lib64/
|
||||
else
|
||||
else
|
||||
export OPENSSL_LIB_DIR=/usr/local/lib/
|
||||
fi
|
||||
export OPENSSL_STATIC=1
|
||||
export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
|
||||
|
||||
source $HOME/.bashrc
|
||||
#Alpine doesn't have .bashrc
|
||||
FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
||||
|
||||
cd node
|
||||
npm ci
|
||||
|
||||
57
ci/mock_openai.py
Normal file
57
ci/mock_openai.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
"""A zero-dependency mock OpenAI embeddings API endpoint for testing purposes."""
|
||||
import argparse
|
||||
import json
|
||||
import http.server
|
||||
|
||||
|
||||
class MockOpenAIRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
content_length = int(self.headers["Content-Length"])
|
||||
post_data = self.rfile.read(content_length)
|
||||
post_data = json.loads(post_data.decode("utf-8"))
|
||||
# See: https://platform.openai.com/docs/api-reference/embeddings/create
|
||||
|
||||
if isinstance(post_data["input"], str):
|
||||
num_inputs = 1
|
||||
else:
|
||||
num_inputs = len(post_data["input"])
|
||||
|
||||
model = post_data.get("model", "text-embedding-ada-002")
|
||||
|
||||
data = []
|
||||
for i in range(num_inputs):
|
||||
data.append({
|
||||
"object": "embedding",
|
||||
"embedding": [0.1] * 1536,
|
||||
"index": i,
|
||||
})
|
||||
|
||||
response = {
|
||||
"object": "list",
|
||||
"data": data,
|
||||
"model": model,
|
||||
"usage": {
|
||||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
}
|
||||
}
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(response).encode("utf-8"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Mock OpenAI embeddings API endpoint")
|
||||
parser.add_argument("--port", type=int, default=8000, help="Port to listen on")
|
||||
args = parser.parse_args()
|
||||
port = args.port
|
||||
|
||||
print(f"server started on port {port}. Press Ctrl-C to stop.")
|
||||
print(f"To use, set OPENAI_BASE_URL=http://localhost:{port} in your environment.")
|
||||
|
||||
with http.server.HTTPServer(("0.0.0.0", port), MockOpenAIRequestHandler) as server:
|
||||
server.serve_forever()
|
||||
@@ -138,6 +138,7 @@ nav:
|
||||
- Jina Reranker: reranking/jina.md
|
||||
- OpenAI Reranker: reranking/openai.md
|
||||
- AnswerDotAi Rerankers: reranking/answerdotai.md
|
||||
- Voyage AI Rerankers: reranking/voyageai.md
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
@@ -165,6 +166,7 @@ nav:
|
||||
- Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
|
||||
- AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
|
||||
- IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
|
||||
- Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
|
||||
- Multimodal Embedding Functions:
|
||||
- OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
|
||||
- Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
|
||||
|
||||
21
docs/package-lock.json
generated
21
docs/package-lock.json
generated
@@ -19,7 +19,7 @@
|
||||
},
|
||||
"../node": {
|
||||
"name": "vectordb",
|
||||
"version": "0.4.6",
|
||||
"version": "0.12.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -31,9 +31,7 @@
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
"@neon-rs/load": "^0.0.74",
|
||||
"apache-arrow": "^14.0.2",
|
||||
"axios": "^1.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -46,6 +44,7 @@
|
||||
"@types/temp": "^0.9.1",
|
||||
"@types/uuid": "^9.0.3",
|
||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
||||
"cargo-cp-artifact": "^0.1",
|
||||
"chai": "^4.3.7",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
@@ -62,15 +61,19 @@
|
||||
"ts-node-dev": "^2.0.0",
|
||||
"typedoc": "^0.24.7",
|
||||
"typedoc-plugin-markdown": "^3.15.3",
|
||||
"typescript": "*",
|
||||
"typescript": "^5.1.0",
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.4.6",
|
||||
"@lancedb/vectordb-darwin-x64": "0.4.6",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.6",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.6",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.6"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.12.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.12.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.12.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.12.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
"apache-arrow": "^14.0.2"
|
||||
}
|
||||
},
|
||||
"../node/node_modules/apache-arrow": {
|
||||
|
||||
@@ -45,9 +45,9 @@ Lance supports `IVF_PQ` index type by default.
|
||||
Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/ann_indexes.ts:import"
|
||||
--8<--- "nodejs/examples/ann_indexes.test.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:ingest"
|
||||
--8<-- "nodejs/examples/ann_indexes.test.ts:ingest"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -140,13 +140,15 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
|
||||
- **limit** (default: 10): The amount of results that will be returned
|
||||
- **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
|
||||
Most of the time, setting nprobes to cover 5-10% of the dataset should achieve high recall with low latency.<br/>
|
||||
e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
|
||||
Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
|
||||
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
|
||||
|
||||
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
||||
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
||||
e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
|
||||
Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
|
||||
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
|
||||
!!! note
|
||||
Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
|
||||
|
||||
|
||||
=== "Python"
|
||||
|
||||
@@ -169,7 +171,7 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search1"
|
||||
--8<-- "nodejs/examples/ann_indexes.test.ts:search1"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -203,7 +205,7 @@ You can further filter the elements returned by a search using a where clause.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search2"
|
||||
--8<-- "nodejs/examples/ann_indexes.test.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -235,7 +237,7 @@ You can select the columns returned by the query using a select clause.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search3"
|
||||
--8<-- "nodejs/examples/ann_indexes.test.ts:search3"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -275,7 +277,15 @@ Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` t
|
||||
Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
|
||||
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.
|
||||
|
||||
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. Because
|
||||
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
|
||||
PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
|
||||
less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and
|
||||
more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
|
||||
less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
|
||||
|
||||
!!! note
|
||||
if `num_sub_vectors` is set to be greater than the vector dimension, you will see errors like `attempt to divide by zero`
|
||||
|
||||
### How to choose `m` and `ef_construction` for `IVF_HNSW_*` index?
|
||||
|
||||
`m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
|
||||
|
||||
`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
|
||||
@@ -157,7 +157,7 @@ recommend switching to stable releases.
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
|
||||
--8<-- "nodejs/examples/basic.ts:connect"
|
||||
--8<-- "nodejs/examples/basic.test.ts:connect"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -212,7 +212,7 @@ table.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -268,7 +268,7 @@ similar to a `CREATE TABLE` statement in SQL.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -298,7 +298,7 @@ Once created, you can open a table as follows:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:open_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:open_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -327,7 +327,7 @@ If you forget the name of your table, you can always get a listing of all table
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:table_names"
|
||||
--8<-- "nodejs/examples/basic.test.ts:table_names"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -357,7 +357,7 @@ After a table has been created, you can always add more data to it as follows:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:add_data"
|
||||
--8<-- "nodejs/examples/basic.test.ts:add_data"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -389,7 +389,7 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:vector_search"
|
||||
--8<-- "nodejs/examples/basic.test.ts:vector_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -429,7 +429,7 @@ LanceDB allows you to create an ANN index on a table as follows:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_index"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_index"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -469,7 +469,7 @@ This can delete any number of rows that match the filter.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:delete_rows"
|
||||
--8<-- "nodejs/examples/basic.test.ts:delete_rows"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -527,7 +527,7 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:drop_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:drop_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -561,8 +561,8 @@ You can use the embedding API when working with embedding models. It automatical
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
--8<-- "nodejs/examples/embedding.test.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.test.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -57,6 +57,13 @@ Then the greedy search routine operates as follows:
|
||||
|
||||
## Usage
|
||||
|
||||
There are three key parameters to set when constructing an HNSW index:
|
||||
|
||||
* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
|
||||
* `m`: The number of neighbors to select for each vector in the HNSW graph.
|
||||
* `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph.
|
||||
|
||||
|
||||
We can combine the above concepts to understand how to build and query an HNSW index in LanceDB.
|
||||
|
||||
### Construct index
|
||||
|
||||
@@ -58,8 +58,10 @@ In Python, the index can be created as follows:
|
||||
# Make sure you have enough data in the table for an effective training step
|
||||
tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96)
|
||||
```
|
||||
!!! note
|
||||
`num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset.
|
||||
|
||||
The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See the [FAQs](#faq) below for best practices on choosing these parameters.
|
||||
The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See [here](../ann_indexes.md/#how-to-choose-num_partitions-and-num_sub_vectors-for-ivf_pq-index) for best practices on choosing these parameters.
|
||||
|
||||
|
||||
### Query the index
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
# VoyageAI Embeddings
|
||||
|
||||
Voyage AI provides cutting-edge embedding and rerankers.
|
||||
|
||||
|
||||
Using voyageai API requires voyageai package, which can be installed using `pip install voyageai`. Voyage AI embeddings are used to generate embeddings for text data. The embeddings can be used for various tasks like semantic search, clustering, and classification.
|
||||
You also need to set the `VOYAGE_API_KEY` environment variable to use the VoyageAI API.
|
||||
|
||||
Supported models are:
|
||||
|
||||
- voyage-3
|
||||
- voyage-3-lite
|
||||
- voyage-finance-2
|
||||
- voyage-multilingual-2
|
||||
- voyage-law-2
|
||||
- voyage-code-2
|
||||
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|--------|---------|
|
||||
| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
|
||||
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
|
||||
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
|
||||
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
|
||||
voyageai = EmbeddingFunctionRegistry
|
||||
.get_instance()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-3")
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
data = [ { "text": "hello world" },
|
||||
{ "text": "goodbye world" }]
|
||||
|
||||
db = lancedb.connect("~/.lancedb")
|
||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(data)
|
||||
```
|
||||
@@ -47,9 +47,9 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp
|
||||
=== "TypeScript"
|
||||
|
||||
```ts
|
||||
--8<--- "nodejs/examples/custom_embedding_function.ts:imports"
|
||||
--8<--- "nodejs/examples/custom_embedding_function.test.ts:imports"
|
||||
|
||||
--8<--- "nodejs/examples/custom_embedding_function.ts:embedding_impl"
|
||||
--8<--- "nodejs/examples/custom_embedding_function.test.ts:embedding_impl"
|
||||
```
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ Now you can use this embedding function to create your table schema and that's i
|
||||
=== "TypeScript"
|
||||
|
||||
```ts
|
||||
--8<--- "nodejs/examples/custom_embedding_function.ts:call_custom_function"
|
||||
--8<--- "nodejs/examples/custom_embedding_function.test.ts:call_custom_function"
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
@@ -53,6 +53,7 @@ These functions are registered by default to handle text embeddings.
|
||||
| [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |
|
||||
| [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |
|
||||
| [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
|
||||
| [**VoyageAI Embeddings**](available_embedding_models/text_embedding_functions/voyageai_embedding.md "voyageai") | 🌕 Voyage AI provides cutting-edge embedding and rerankers. This will help you get started with **VoyageAI** embedding models using LanceDB. Using voyageai API requires voyageai package. Install it via `pip`. | [<img src="https://www.voyageai.com/logo.svg" alt="VoyageAI Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/voyageai_embedding.md) |
|
||||
|
||||
|
||||
|
||||
@@ -66,6 +67,7 @@ These functions are registered by default to handle text embeddings.
|
||||
[jina-key]: "jina"
|
||||
[aws-key]: "bedrock-text"
|
||||
[watsonx-key]: "watsonx"
|
||||
[voyageai-key]: "voyageai"
|
||||
|
||||
|
||||
## Multi-modal Embedding Functions🖼️
|
||||
|
||||
@@ -94,8 +94,8 @@ the embeddings at all:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:embedding_function"
|
||||
--8<-- "nodejs/examples/embedding.test.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.test.ts:embedding_function"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -150,7 +150,7 @@ need to worry about it when you query the table:
|
||||
.toArray()
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const results = await table
|
||||
|
||||
@@ -51,8 +51,8 @@ LanceDB registers the OpenAI embeddings function in the registry as `openai`. Yo
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/embedding.ts:imports"
|
||||
--8<--- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
--8<--- "nodejs/examples/embedding.test.ts:imports"
|
||||
--8<--- "nodejs/examples/embedding.test.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
@@ -121,12 +121,10 @@ class Words(LanceModel):
|
||||
vector: Vector(func.ndims()) = func.VectorField()
|
||||
|
||||
table = db.create_table("words", schema=Words)
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
)
|
||||
table.add([
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
])
|
||||
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
|
||||
@@ -114,12 +114,45 @@ table.create_fts_index("text",
|
||||
|
||||
LanceDB full text search supports to filter the search results by a condition, both pre-filtering and post-filtering are supported.
|
||||
|
||||
This can be invoked via the familiar `where` syntax:
|
||||
|
||||
This can be invoked via the familiar `where` syntax.
|
||||
|
||||
With pre-filtering:
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
table.search("puppy").limit(10).where("meta='foo'").to_list()
|
||||
table.search("puppy").limit(10).where("meta='foo'", prefilte=True).to_list()
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
await tbl
|
||||
.search("puppy")
|
||||
.select(["id", "doc"])
|
||||
.limit(10)
|
||||
.where("meta='foo'")
|
||||
.prefilter(true)
|
||||
.toArray();
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
table
|
||||
.query()
|
||||
.full_text_search(FullTextSearchQuery::new("puppy".to_owned()))
|
||||
.select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
|
||||
.limit(10)
|
||||
.only_if("meta='foo'")
|
||||
.execute()
|
||||
.await?;
|
||||
```
|
||||
|
||||
With post-filtering:
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
table.search("puppy").limit(10).where("meta='foo'", prefilte=False).to_list()
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
@@ -130,6 +163,7 @@ This can be invoked via the familiar `where` syntax:
|
||||
.select(["id", "doc"])
|
||||
.limit(10)
|
||||
.where("meta='foo'")
|
||||
.prefilter(false)
|
||||
.toArray();
|
||||
```
|
||||
|
||||
@@ -140,6 +174,7 @@ This can be invoked via the familiar `where` syntax:
|
||||
.query()
|
||||
.full_text_search(FullTextSearchQuery::new(words[0].to_owned()))
|
||||
.select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
|
||||
.postfilter()
|
||||
.limit(10)
|
||||
.only_if("meta='foo'")
|
||||
.execute()
|
||||
@@ -160,3 +195,35 @@ To search for a phrase, the index must be created with `with_position=True`:
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=True)
|
||||
```
|
||||
This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
|
||||
|
||||
|
||||
## Incremental indexing
|
||||
|
||||
LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table.
|
||||
|
||||
This can make the query more efficient, especially when the table is large and the new records are relatively small.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
|
||||
table.optimize()
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]);
|
||||
await tbl.optimize();
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
let more_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||
tbl.add(more_data).execute().await?;
|
||||
tbl.optimize(OptimizeAction::All).execute().await?;
|
||||
```
|
||||
!!! note
|
||||
|
||||
New data added after creating the FTS index will appear in search results while incremental index is still progress, but with increased latency due to a flat search on the unindexed portion. LanceDB Cloud automates this merging process, minimizing the impact on search speed.
|
||||
@@ -153,9 +153,7 @@ table.create_fts_index(["title", "content"], use_tantivy=True, writer_heap_size=
|
||||
|
||||
## Current limitations
|
||||
|
||||
1. Currently we do not yet support incremental writes.
|
||||
If you add data after FTS index creation, it won't be reflected
|
||||
in search results until you do a full reindex.
|
||||
1. New data added after creating the FTS index will appear in search results, but with increased latency due to a flat search on the unindexed portion. Re-indexing with `create_fts_index` will reduce latency. LanceDB Cloud automates this merging process, minimizing the impact on search speed.
|
||||
|
||||
2. We currently only support local filesystem paths for the FTS index.
|
||||
This is a tantivy limitation. We've implemented an object store plugin
|
||||
|
||||
@@ -85,13 +85,13 @@ Initialize a LanceDB connection and create a table
|
||||
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_table"
|
||||
```
|
||||
|
||||
This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_with_schema"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_table_with_schema"
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
@@ -100,14 +100,14 @@ Initialize a LanceDB connection and create a table
|
||||
passed in will NOT be appended to the table in that case.
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_table_exists_ok"
|
||||
```
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_overwrite"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_table_overwrite"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -227,7 +227,7 @@ LanceDB supports float16 data type!
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_f16_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_f16_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -274,7 +274,7 @@ table = db.create_table(table_name, schema=Content)
|
||||
|
||||
Sometimes your data model may contain nested objects.
|
||||
For example, you may want to store the document string
|
||||
and the document soure name as a nested Document object:
|
||||
and the document source name as a nested Document object:
|
||||
|
||||
```python
|
||||
class Document(BaseModel):
|
||||
@@ -455,7 +455,7 @@ You can create an empty table for scenarios where you want to add data to the ta
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
--8<-- "nodejs/examples/basic.test.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -466,7 +466,7 @@ You can create an empty table for scenarios where you want to add data to the ta
|
||||
|
||||
## Adding to a table
|
||||
|
||||
After a table has been created, you can always add more data to it usind the `add` method
|
||||
After a table has been created, you can always add more data to it using the `add` method
|
||||
|
||||
=== "Python"
|
||||
You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
|
||||
@@ -535,7 +535,7 @@ After a table has been created, you can always add more data to it usind the `ad
|
||||
```
|
||||
|
||||
??? "Ingesting Pydantic models with LanceDB embedding API"
|
||||
When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` feild as None to allow LanceDB to automatically vectorize the data.
|
||||
When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data.
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
@@ -790,6 +790,27 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
If the table does not exist an exception is raised.
|
||||
|
||||
## Handling bad vectors
|
||||
|
||||
In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
|
||||
invalid vector values are handled. Invalid vectors are vectors that are not valid
|
||||
because:
|
||||
|
||||
1. They are the wrong dimension
|
||||
2. They contain NaN values
|
||||
3. They are null but are on a non-nullable field
|
||||
|
||||
By default, LanceDB will raise an error if it encounters a bad vector. You can
|
||||
also choose one of the following options:
|
||||
|
||||
* `drop`: Ignore rows with bad vectors
|
||||
* `fill`: Replace bad values (NaNs) or missing values (too few dimensions) with
|
||||
the fill value specified in the `fill_value` parameter. An input like
|
||||
`[1.0, NaN, 3.0]` will be replaced with `[1.0, 0.0, 3.0]` if `fill_value=0.0`.
|
||||
* `null`: Replace bad vectors with null (only works if the column is nullable).
|
||||
A bad vector `[1.0, NaN, 3.0]` will be replaced with `null` if the column is
|
||||
nullable. If the vector column is non-nullable, then bad vectors will cause an
|
||||
error
|
||||
|
||||
## Consistency
|
||||
|
||||
@@ -859,4 +880,4 @@ There are three possible settings for `read_consistency_interval`:
|
||||
|
||||
Learn the best practices on creating an ANN index and getting the most out of it.
|
||||
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](../migration.md) for more information.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -117,8 +117,14 @@ lists the indices that LanceDb supports.
|
||||
|
||||
::: lancedb.index.LabelList
|
||||
|
||||
::: lancedb.index.FTS
|
||||
|
||||
::: lancedb.index.IvfPq
|
||||
|
||||
::: lancedb.index.HnswPq
|
||||
|
||||
::: lancedb.index.HnswSq
|
||||
|
||||
## Querying (Asynchronous)
|
||||
|
||||
Queries allow you to return data from your database. Basic queries can be
|
||||
|
||||
@@ -6,6 +6,9 @@ This re-ranker uses the [Cohere](https://cohere.ai/) API to rerank the search re
|
||||
!!! note
|
||||
Supported Query Types: Hybrid, Vector, FTS
|
||||
|
||||
```shell
|
||||
pip install cohere
|
||||
```
|
||||
|
||||
```python
|
||||
import numpy
|
||||
|
||||
@@ -9,6 +9,7 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
|
||||
| `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
|
||||
| `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
|
||||
| `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
|
||||
| `VoyageAIReranker` | Uses voyageai Reranker API to rerank results | Vector, FTS, Hybrid |
|
||||
|
||||
|
||||
## Using a Reranker
|
||||
@@ -73,6 +74,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
|
||||
- [Jina Reranker](./jina.md)
|
||||
- [AnswerDotAI Rerankers](./answerdotai.md)
|
||||
- [Reciprocal Rank Fusion Reranker](./rrf.md)
|
||||
- [VoyageAI Reranker](./voyageai.md)
|
||||
|
||||
## Creating Custom Rerankers
|
||||
|
||||
|
||||
77
docs/src/reranking/voyageai.md
Normal file
77
docs/src/reranking/voyageai.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# Voyage AI Reranker
|
||||
|
||||
Voyage AI provides cutting-edge embedding and rerankers.
|
||||
|
||||
This re-ranker uses the [VoyageAI](https://docs.voyageai.com/docs/) API to rerank the search results. You can use this re-ranker by passing `VoyageAIReranker()` to the `rerank()` method. Note that you'll either need to set the `VOYAGE_API_KEY` environment variable or pass the `api_key` argument to use this re-ranker.
|
||||
|
||||
|
||||
!!! note
|
||||
Supported Query Types: Hybrid, Vector, FTS
|
||||
|
||||
|
||||
```python
|
||||
import numpy
|
||||
import lancedb
|
||||
from lancedb.embeddings import get_registry
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.rerankers import VoyageAIReranker
|
||||
|
||||
embedder = get_registry().get("sentence-transformers").create()
|
||||
db = lancedb.connect("~/.lancedb")
|
||||
|
||||
class Schema(LanceModel):
|
||||
text: str = embedder.SourceField()
|
||||
vector: Vector(embedder.ndims()) = embedder.VectorField()
|
||||
|
||||
data = [
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
tbl = db.create_table("test", schema=Schema, mode="overwrite")
|
||||
tbl.add(data)
|
||||
reranker = VoyageAIReranker(model_name="rerank-2")
|
||||
|
||||
# Run vector search with a reranker
|
||||
result = tbl.search("hello").rerank(reranker=reranker).to_list()
|
||||
|
||||
# Run FTS search with a reranker
|
||||
result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
|
||||
|
||||
# Run hybrid search with a reranker
|
||||
tbl.create_fts_index("text", replace=True)
|
||||
result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
|
||||
|
||||
```
|
||||
|
||||
Accepted Arguments
|
||||
----------------
|
||||
| Argument | Type | Default | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `model_name` | `str` | `None` | The name of the reranker model to use. Available models are: rerank-2, rerank-2-lite |
|
||||
| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
|
||||
| `top_n` | `str` | `None` | The number of results to return. If None, will return all results. |
|
||||
| `api_key` | `str` | `None` | The API key for the Voyage AI API. If not provided, the `VOYAGE_API_KEY` environment variable is used. |
|
||||
| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
|
||||
| `truncation` | `bool` | `None` | Whether to truncate the input to satisfy the "context length limit" on the query and the documents. |
|
||||
|
||||
|
||||
## Supported Scores for each query type
|
||||
You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
|
||||
|
||||
### Hybrid Search
|
||||
|`return_score`| Status | Description |
|
||||
| --- | --- | --- |
|
||||
| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
|
||||
| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
|
||||
|
||||
### Vector Search
|
||||
|`return_score`| Status | Description |
|
||||
| --- | --- | --- |
|
||||
| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
|
||||
| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
|
||||
|
||||
### FTS Search
|
||||
|`return_score`| Status | Description |
|
||||
| --- | --- | --- |
|
||||
| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
|
||||
| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
|
||||
@@ -58,9 +58,9 @@ db.create_table("my_vectors", data=data)
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:import"
|
||||
--8<-- "nodejs/examples/search.test.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/search.ts:search1"
|
||||
--8<-- "nodejs/examples/search.test.ts:search1"
|
||||
```
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ By default, `l2` will be used as metric type. You can specify the metric type as
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:search2"
|
||||
--8<-- "nodejs/examples/search.test.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
@@ -49,7 +49,7 @@ const tbl = await db.createTable('myVectors', data)
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:search"
|
||||
--8<-- "nodejs/examples/filtering.test.ts:search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -91,7 +91,7 @@ For example, the following filter string is acceptable:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:vec_search"
|
||||
--8<-- "nodejs/examples/filtering.test.ts:vec_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
@@ -169,7 +169,7 @@ You can also filter your data without search.
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:sql_search"
|
||||
--8<-- "nodejs/examples/filtering.test.ts:sql_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.11.1-beta.1</version>
|
||||
<version>0.13.1-beta.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.11.1-beta.1</version>
|
||||
<version>0.13.1-beta.0</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
88
node/package-lock.json
generated
88
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,11 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.1-beta.1"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -326,71 +329,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.11.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.1-beta.1.tgz",
|
||||
"integrity": "sha512-q9jcCbmcz45UHmjgecL6zK82WaqUJsARfniwXXPcnd8ooISVhPkgN+RVKv6edwI9T0PV+xVRYq+LQLlZu5fyxw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.11.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.1-beta.1.tgz",
|
||||
"integrity": "sha512-E5tCTS5TaTkssTPa+gdnFxZJ1f60jnSIJXhqufNFZk4s+IMViwR1BPqaqE++WY5c1uBI55ef1862CROKDKX4gg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.11.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.1-beta.1.tgz",
|
||||
"integrity": "sha512-Obohy6TH31Uq+fp6ZisHR7iAsvgVPqBExrycVcIJqrLZnIe88N9OWUwBXkmfMAw/2hNJFwD4tU7+4U2FcBWX4w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.11.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.1-beta.1.tgz",
|
||||
"integrity": "sha512-3Meu0dgrzNrnBVVQhxkUSAOhQNmgtKHvOvmrRLUicV+X19hd33udihgxVpZZb9mpXenJ8lZsS+Jq6R0hWqntag==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.11.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.1-beta.1.tgz",
|
||||
"integrity": "sha512-BafZ9OJPQXsS7JW0weAl12wC+827AiRjfUrE5tvrYWZah2OwCF2U2g6uJ3x4pxfwEGsv5xcHFqgxlS7ttFkh+Q==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
@@ -1505,9 +1443,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "7.0.3",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"path-key": "^3.1.0",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -84,14 +84,18 @@
|
||||
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
|
||||
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
|
||||
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
|
||||
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
|
||||
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc",
|
||||
"aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc"
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.1-beta.1"
|
||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import axios, { type AxiosResponse, type ResponseType } from 'axios'
|
||||
import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
|
||||
|
||||
import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
|
||||
|
||||
@@ -197,7 +197,7 @@ export class HttpLancedbClient {
|
||||
response = await callWithMiddlewares(req, this._middlewares)
|
||||
return response
|
||||
} catch (err: any) {
|
||||
console.error('error: ', err)
|
||||
console.error(serializeErrorAsJson(err))
|
||||
if (err.response === undefined) {
|
||||
throw new Error(`Network Error: ${err.message as string}`)
|
||||
}
|
||||
@@ -247,7 +247,8 @@ export class HttpLancedbClient {
|
||||
|
||||
// return response
|
||||
} catch (err: any) {
|
||||
console.error('error: ', err)
|
||||
console.error(serializeErrorAsJson(err))
|
||||
|
||||
if (err.response === undefined) {
|
||||
throw new Error(`Network Error: ${err.message as string}`)
|
||||
}
|
||||
@@ -287,3 +288,15 @@ export class HttpLancedbClient {
|
||||
return clone
|
||||
}
|
||||
}
|
||||
|
||||
function serializeErrorAsJson(err: AxiosError) {
|
||||
const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
|
||||
error.response = err.response != null
|
||||
? JSON.parse(JSON.stringify(
|
||||
err.response,
|
||||
// config contains the request data, too noisy
|
||||
Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
|
||||
))
|
||||
: null
|
||||
return JSON.stringify({ error })
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.11.1-beta.1"
|
||||
version = "0.13.1-beta.0"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -18,7 +18,7 @@ futures.workspace = true
|
||||
lancedb = { path = "../rust/lancedb", features = ["remote"] }
|
||||
napi = { version = "2.16.8", default-features = false, features = [
|
||||
"napi9",
|
||||
"async",
|
||||
"async"
|
||||
] }
|
||||
napi-derive = "2.16.4"
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
|
||||
@@ -187,6 +187,81 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
||||
},
|
||||
);
|
||||
|
||||
// TODO: https://github.com/lancedb/lancedb/issues/1832
|
||||
it.skip("should be able to omit nullable fields", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float64()),
|
||||
),
|
||||
true,
|
||||
),
|
||||
new arrow.Field("item", new arrow.Utf8(), true),
|
||||
new arrow.Field("price", new arrow.Float64(), false),
|
||||
]);
|
||||
const table = await db.createEmptyTable("test", schema);
|
||||
|
||||
const data1 = { item: "foo", price: 10.0 };
|
||||
await table.add([data1]);
|
||||
const data2 = { vector: [3.1, 4.1], price: 2.0 };
|
||||
await table.add([data2]);
|
||||
const data3 = { vector: [5.9, 26.5], item: "bar", price: 3.0 };
|
||||
await table.add([data3]);
|
||||
|
||||
let res = await table.query().limit(10).toArray();
|
||||
const resVector = res.map((r) => r.get("vector").toArray());
|
||||
expect(resVector).toEqual([null, data2.vector, data3.vector]);
|
||||
const resItem = res.map((r) => r.get("item").toArray());
|
||||
expect(resItem).toEqual(["foo", null, "bar"]);
|
||||
const resPrice = res.map((r) => r.get("price").toArray());
|
||||
expect(resPrice).toEqual([10.0, 2.0, 3.0]);
|
||||
|
||||
const data4 = { item: "foo" };
|
||||
// We can't omit a column if it's not nullable
|
||||
await expect(table.add([data4])).rejects.toThrow("Invalid user input");
|
||||
|
||||
// But we can alter columns to make them nullable
|
||||
await table.alterColumns([{ path: "price", nullable: true }]);
|
||||
await table.add([data4]);
|
||||
|
||||
res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
|
||||
expect(res).toEqual([data1, data2, data3, data4]);
|
||||
});
|
||||
|
||||
it("should be able to insert nullable data for non-nullable fields", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("x", new arrow.Float64(), false),
|
||||
new arrow.Field("id", new arrow.Utf8(), false),
|
||||
]);
|
||||
const table = await db.createEmptyTable("test", schema);
|
||||
|
||||
const data1 = { x: 4.1, id: "foo" };
|
||||
await table.add([data1]);
|
||||
const res = (await table.query().toArray())[0];
|
||||
expect(res.x).toEqual(data1.x);
|
||||
expect(res.id).toEqual(data1.id);
|
||||
|
||||
const data2 = { x: null, id: "bar" };
|
||||
await expect(table.add([data2])).rejects.toThrow(
|
||||
"declared as non-nullable but contains null values",
|
||||
);
|
||||
|
||||
// But we can alter columns to make them nullable
|
||||
await table.alterColumns([{ path: "x", nullable: true }]);
|
||||
await table.add([data2]);
|
||||
|
||||
const res2 = await table.query().toArray();
|
||||
expect(res2.length).toBe(2);
|
||||
expect(res2[0].x).toEqual(data1.x);
|
||||
expect(res2[0].id).toEqual(data1.id);
|
||||
expect(res2[1].x).toBeNull();
|
||||
expect(res2[1].id).toEqual(data2.id);
|
||||
});
|
||||
|
||||
it("should return the table as an instance of an arrow table", async () => {
|
||||
const arrowTbl = await table.toArrow();
|
||||
expect(arrowTbl).toBeInstanceOf(ArrowTable);
|
||||
@@ -402,6 +477,88 @@ describe("When creating an index", () => {
|
||||
expect(rst.numRows).toBe(1);
|
||||
});
|
||||
|
||||
it("should create and search IVF_HNSW indices", async () => {
|
||||
await tbl.createIndex("vec", {
|
||||
config: Index.hnswSq(),
|
||||
});
|
||||
|
||||
// check index directory
|
||||
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
const indices = await tbl.listIndices();
|
||||
expect(indices.length).toBe(1);
|
||||
expect(indices[0]).toEqual({
|
||||
name: "vec_idx",
|
||||
indexType: "IvfHnswSq",
|
||||
columns: ["vec"],
|
||||
});
|
||||
|
||||
// Search without specifying the column
|
||||
let rst = await tbl
|
||||
.query()
|
||||
.limit(2)
|
||||
.nearestTo(queryVec)
|
||||
.distanceType("dot")
|
||||
.toArrow();
|
||||
expect(rst.numRows).toBe(2);
|
||||
|
||||
// Search using `vectorSearch`
|
||||
rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
|
||||
expect(rst.numRows).toBe(2);
|
||||
|
||||
// Search with specifying the column
|
||||
const rst2 = await tbl
|
||||
.query()
|
||||
.limit(2)
|
||||
.nearestTo(queryVec)
|
||||
.column("vec")
|
||||
.toArrow();
|
||||
expect(rst2.numRows).toBe(2);
|
||||
expect(rst.toString()).toEqual(rst2.toString());
|
||||
|
||||
// test offset
|
||||
rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
|
||||
expect(rst.numRows).toBe(1);
|
||||
|
||||
// test ef
|
||||
rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
|
||||
expect(rst.numRows).toBe(2);
|
||||
});
|
||||
|
||||
it("should be able to query unindexed data", async () => {
|
||||
await tbl.createIndex("vec");
|
||||
await tbl.add([
|
||||
{
|
||||
id: 300,
|
||||
vec: Array(32)
|
||||
.fill(1)
|
||||
.map(() => Math.random()),
|
||||
tags: [],
|
||||
},
|
||||
]);
|
||||
|
||||
const plan1 = await tbl.query().nearestTo(queryVec).explainPlan(true);
|
||||
expect(plan1).toMatch("LanceScan");
|
||||
|
||||
const plan2 = await tbl
|
||||
.query()
|
||||
.nearestTo(queryVec)
|
||||
.fastSearch()
|
||||
.explainPlan(true);
|
||||
expect(plan2).not.toMatch("LanceScan");
|
||||
});
|
||||
|
||||
it("should be able to query with row id", async () => {
|
||||
const results = await tbl
|
||||
.query()
|
||||
.nearestTo(queryVec)
|
||||
.withRowId()
|
||||
.limit(1)
|
||||
.toArray();
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0]).toHaveProperty("_rowid");
|
||||
});
|
||||
|
||||
it("should allow parameters to be specified", async () => {
|
||||
await tbl.createIndex("vec", {
|
||||
config: Index.ivfPq({
|
||||
@@ -964,4 +1121,18 @@ describe("column name options", () => {
|
||||
const results = await table.query().where("`camelCase` = 1").toArray();
|
||||
expect(results[0].camelCase).toBe(1);
|
||||
});
|
||||
|
||||
test("can make multiple vector queries in one go", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo([0.1, 0.2])
|
||||
.addQueryVector([0.1, 0.2])
|
||||
.limit(1)
|
||||
.toArray();
|
||||
console.log(results);
|
||||
expect(results.length).toBe(2);
|
||||
results.sort((a, b) => a.query_index - b.query_index);
|
||||
expect(results[0].query_index).toBe(0);
|
||||
expect(results[1].query_index).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
"**/native.js",
|
||||
"**/native.d.ts",
|
||||
"**/npm/**/*",
|
||||
"**/.vscode/**"
|
||||
"**/.vscode/**",
|
||||
"./examples/*"
|
||||
]
|
||||
},
|
||||
"formatter": {
|
||||
|
||||
57
nodejs/examples/ann_indexes.test.ts
Normal file
57
nodejs/examples/ann_indexes.test.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { VectorQuery } from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
test("ann index examples", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
// --8<-- [start:ingest]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
const data = Array.from({ length: 5_000 }, (_, i) => ({
|
||||
vector: Array(128).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
const table = await db.createTable("my_vectors", data, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await table.createIndex("vector", {
|
||||
config: lancedb.Index.ivfPq({
|
||||
numPartitions: 10,
|
||||
numSubVectors: 16,
|
||||
}),
|
||||
});
|
||||
// --8<-- [end:ingest]
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const search = table.search(Array(128).fill(1.2)).limit(2) as VectorQuery;
|
||||
const results1 = await search.nprobes(20).refineFactor(10).toArray();
|
||||
// --8<-- [end:search1]
|
||||
expect(results1.length).toBe(2);
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const results2 = await table
|
||||
.search(Array(128).fill(1.2))
|
||||
.where("id != '1141'")
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
expect(results2.length).toBe(2);
|
||||
|
||||
// --8<-- [start:search3]
|
||||
const results3 = await table
|
||||
.search(Array(128).fill(1.2))
|
||||
.select(["id"])
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search3]
|
||||
expect(results3.length).toBe(2);
|
||||
});
|
||||
}, 100_000);
|
||||
@@ -1,49 +0,0 @@
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
|
||||
// --8<-- [start:ingest]
|
||||
const db = await lancedb.connect("/tmp/lancedb/");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
|
||||
await table.createIndex("vector", {
|
||||
config: lancedb.Index.ivfPq({
|
||||
numPartitions: 16,
|
||||
numSubVectors: 48,
|
||||
}),
|
||||
});
|
||||
// --8<-- [end:ingest]
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const _results1 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.limit(2)
|
||||
.nprobes(20)
|
||||
.refineFactor(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.where("id != '1141'")
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
// --8<-- [start:search3]
|
||||
const _results3 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.select(["id"])
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search3]
|
||||
|
||||
console.log("Ann indexes: done");
|
||||
175
nodejs/examples/basic.test.ts
Normal file
175
nodejs/examples/basic.test.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float16,
|
||||
Int32,
|
||||
Schema,
|
||||
Utf8,
|
||||
} from "apache-arrow";
|
||||
// --8<-- [end:imports]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
test("basic table examples", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
// --8<-- [start:connect]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
// --8<-- [end:connect]
|
||||
{
|
||||
// --8<-- [start:create_table]
|
||||
const _tbl = await db.createTable(
|
||||
"myTable",
|
||||
[
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
],
|
||||
{ mode: "overwrite" },
|
||||
);
|
||||
// --8<-- [end:create_table]
|
||||
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_exists_ok]
|
||||
const tbl = await db.createTable("myTable", data, {
|
||||
existOk: true,
|
||||
});
|
||||
// --8<-- [end:create_table_exists_ok]
|
||||
expect(await tbl.countRows()).toBe(2);
|
||||
}
|
||||
{
|
||||
// --8<-- [start:create_table_overwrite]
|
||||
const tbl = await db.createTable("myTable", data, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:create_table_overwrite]
|
||||
expect(await tbl.countRows()).toBe(2);
|
||||
}
|
||||
}
|
||||
|
||||
await db.dropTable("myTable");
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_with_schema]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
new arrow.Field("item", new arrow.Utf8(), true),
|
||||
new arrow.Field("price", new arrow.Float32(), true),
|
||||
]);
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const tbl = await db.createTable("myTable", data, {
|
||||
schema,
|
||||
});
|
||||
// --8<-- [end:create_table_with_schema]
|
||||
expect(await tbl.countRows()).toBe(2);
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_empty_table]
|
||||
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int32()),
|
||||
new arrow.Field("name", new arrow.Utf8()),
|
||||
]);
|
||||
|
||||
const emptyTbl = await db.createEmptyTable("empty_table", schema);
|
||||
// --8<-- [end:create_empty_table]
|
||||
expect(await emptyTbl.countRows()).toBe(0);
|
||||
}
|
||||
{
|
||||
// --8<-- [start:open_table]
|
||||
const _tbl = await db.openTable("myTable");
|
||||
// --8<-- [end:open_table]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:table_names]
|
||||
const tableNames = await db.tableNames();
|
||||
// --8<-- [end:table_names]
|
||||
expect(tableNames).toEqual(["empty_table", "myTable"]);
|
||||
}
|
||||
|
||||
const tbl = await db.openTable("myTable");
|
||||
{
|
||||
// --8<-- [start:add_data]
|
||||
const data = [
|
||||
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
||||
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
||||
];
|
||||
await tbl.add(data);
|
||||
// --8<-- [end:add_data]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:vector_search]
|
||||
const res = await tbl.search([100, 100]).limit(2).toArray();
|
||||
// --8<-- [end:vector_search]
|
||||
expect(res.length).toBe(2);
|
||||
}
|
||||
{
|
||||
const data = Array.from({ length: 1000 })
|
||||
.fill(null)
|
||||
.map(() => ({
|
||||
vector: [Math.random(), Math.random()],
|
||||
item: "autogen",
|
||||
price: Math.round(Math.random() * 100),
|
||||
}));
|
||||
|
||||
await tbl.add(data);
|
||||
}
|
||||
|
||||
// --8<-- [start:create_index]
|
||||
await tbl.createIndex("vector");
|
||||
// --8<-- [end:create_index]
|
||||
|
||||
// --8<-- [start:delete_rows]
|
||||
await tbl.delete('item = "fizz"');
|
||||
// --8<-- [end:delete_rows]
|
||||
|
||||
// --8<-- [start:drop_table]
|
||||
await db.dropTable("myTable");
|
||||
// --8<-- [end:drop_table]
|
||||
await db.dropTable("empty_table");
|
||||
|
||||
{
|
||||
// --8<-- [start:create_f16_table]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
const dim = 16;
|
||||
const total = 10;
|
||||
const f16Schema = new Schema([
|
||||
new Field("id", new Int32()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(dim, new Field("item", new Float16(), true)),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
const data = lancedb.makeArrowTable(
|
||||
Array.from(Array(total), (_, i) => ({
|
||||
id: i,
|
||||
vector: Array.from(Array(dim), Math.random),
|
||||
})),
|
||||
{ schema: f16Schema },
|
||||
);
|
||||
const _table = await db.createTable("f16_tbl", data);
|
||||
// --8<-- [end:create_f16_table]
|
||||
await db.dropTable("f16_tbl");
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,162 +0,0 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float16,
|
||||
Int32,
|
||||
Schema,
|
||||
Utf8,
|
||||
} from "apache-arrow";
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:connect]
|
||||
const uri = "/tmp/lancedb/";
|
||||
const db = await lancedb.connect(uri);
|
||||
// --8<-- [end:connect]
|
||||
{
|
||||
// --8<-- [start:create_table]
|
||||
const tbl = await db.createTable(
|
||||
"myTable",
|
||||
[
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
],
|
||||
{ mode: "overwrite" },
|
||||
);
|
||||
// --8<-- [end:create_table]
|
||||
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_exists_ok]
|
||||
const tbl = await db.createTable("myTable", data, {
|
||||
existsOk: true,
|
||||
});
|
||||
// --8<-- [end:create_table_exists_ok]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:create_table_overwrite]
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:create_table_overwrite]
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_with_schema]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
new arrow.Field("item", new arrow.Utf8(), true),
|
||||
new arrow.Field("price", new arrow.Float32(), true),
|
||||
]);
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
schema,
|
||||
});
|
||||
// --8<-- [end:create_table_with_schema]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_empty_table]
|
||||
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int32()),
|
||||
new arrow.Field("name", new arrow.Utf8()),
|
||||
]);
|
||||
|
||||
const empty_tbl = await db.createEmptyTable("empty_table", schema);
|
||||
// --8<-- [end:create_empty_table]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:open_table]
|
||||
const _tbl = await db.openTable("myTable");
|
||||
// --8<-- [end:open_table]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:table_names]
|
||||
const tableNames = await db.tableNames();
|
||||
console.log(tableNames);
|
||||
// --8<-- [end:table_names]
|
||||
}
|
||||
|
||||
const tbl = await db.openTable("myTable");
|
||||
{
|
||||
// --8<-- [start:add_data]
|
||||
const data = [
|
||||
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
||||
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
||||
];
|
||||
await tbl.add(data);
|
||||
// --8<-- [end:add_data]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:vector_search]
|
||||
const _res = tbl.search([100, 100]).limit(2).toArray();
|
||||
// --8<-- [end:vector_search]
|
||||
}
|
||||
{
|
||||
const data = Array.from({ length: 1000 })
|
||||
.fill(null)
|
||||
.map(() => ({
|
||||
vector: [Math.random(), Math.random()],
|
||||
item: "autogen",
|
||||
price: Math.round(Math.random() * 100),
|
||||
}));
|
||||
|
||||
await tbl.add(data);
|
||||
}
|
||||
|
||||
// --8<-- [start:create_index]
|
||||
await tbl.createIndex("vector");
|
||||
// --8<-- [end:create_index]
|
||||
|
||||
// --8<-- [start:delete_rows]
|
||||
await tbl.delete('item = "fizz"');
|
||||
// --8<-- [end:delete_rows]
|
||||
|
||||
// --8<-- [start:drop_table]
|
||||
await db.dropTable("myTable");
|
||||
// --8<-- [end:drop_table]
|
||||
await db.dropTable("empty_table");
|
||||
|
||||
{
|
||||
// --8<-- [start:create_f16_table]
|
||||
const db = await lancedb.connect("/tmp/lancedb");
|
||||
const dim = 16;
|
||||
const total = 10;
|
||||
const f16Schema = new Schema([
|
||||
new Field("id", new Int32()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(dim, new Field("item", new Float16(), true)),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
const data = lancedb.makeArrowTable(
|
||||
Array.from(Array(total), (_, i) => ({
|
||||
id: i,
|
||||
vector: Array.from(Array(dim), Math.random),
|
||||
})),
|
||||
{ schema: f16Schema },
|
||||
);
|
||||
const _table = await db.createTable("f16_tbl", data);
|
||||
// --8<-- [end:create_f16_table]
|
||||
await db.dropTable("f16_tbl");
|
||||
}
|
||||
76
nodejs/examples/custom_embedding_function.test.ts
Normal file
76
nodejs/examples/custom_embedding_function.test.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import {
|
||||
LanceSchema,
|
||||
TextEmbeddingFunction,
|
||||
getRegistry,
|
||||
register,
|
||||
} from "@lancedb/lancedb/embedding";
|
||||
// --8<-- [end:imports]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
// --8<-- [start:embedding_impl]
|
||||
@register("sentence-transformers")
|
||||
class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
|
||||
name = "Xenova/all-miniLM-L6-v2";
|
||||
#ndims!: number;
|
||||
extractor!: FeatureExtractionPipeline;
|
||||
|
||||
async init() {
|
||||
this.extractor = await pipeline("feature-extraction", this.name, {
|
||||
dtype: "fp32",
|
||||
});
|
||||
this.#ndims = await this.generateEmbeddings(["hello"]).then(
|
||||
(e) => e[0].length,
|
||||
);
|
||||
}
|
||||
|
||||
ndims() {
|
||||
return this.#ndims;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
};
|
||||
}
|
||||
async generateEmbeddings(texts: string[]) {
|
||||
const output = await this.extractor(texts, {
|
||||
pooling: "mean",
|
||||
normalize: true,
|
||||
});
|
||||
return output.tolist();
|
||||
}
|
||||
}
|
||||
// -8<-- [end:embedding_impl]
|
||||
|
||||
test("Registry examples", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
// --8<-- [start:call_custom_function]
|
||||
const registry = getRegistry();
|
||||
|
||||
const sentenceTransformer = await registry
|
||||
.get<SentenceTransformersEmbeddings>("sentence-transformers")!
|
||||
.create();
|
||||
|
||||
const schema = LanceSchema({
|
||||
vector: sentenceTransformer.vectorField(),
|
||||
text: sentenceTransformer.sourceField(),
|
||||
});
|
||||
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
const table = await db.createEmptyTable("table", schema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
|
||||
await table.add([{ text: "hello" }, { text: "world" }]);
|
||||
|
||||
const results = await table.search("greeting").limit(1).toArray();
|
||||
// -8<-- [end:call_custom_function]
|
||||
expect(results.length).toBe(1);
|
||||
});
|
||||
}, 100_000);
|
||||
@@ -1,64 +0,0 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import {
|
||||
LanceSchema,
|
||||
TextEmbeddingFunction,
|
||||
getRegistry,
|
||||
register,
|
||||
} from "@lancedb/lancedb/embedding";
|
||||
import { pipeline } from "@xenova/transformers";
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:embedding_impl]
|
||||
@register("sentence-transformers")
|
||||
class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
|
||||
name = "Xenova/all-miniLM-L6-v2";
|
||||
#ndims!: number;
|
||||
extractor: any;
|
||||
|
||||
async init() {
|
||||
this.extractor = await pipeline("feature-extraction", this.name);
|
||||
this.#ndims = await this.generateEmbeddings(["hello"]).then(
|
||||
(e) => e[0].length,
|
||||
);
|
||||
}
|
||||
|
||||
ndims() {
|
||||
return this.#ndims;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
};
|
||||
}
|
||||
async generateEmbeddings(texts: string[]) {
|
||||
const output = await this.extractor(texts, {
|
||||
pooling: "mean",
|
||||
normalize: true,
|
||||
});
|
||||
return output.tolist();
|
||||
}
|
||||
}
|
||||
// -8<-- [end:embedding_impl]
|
||||
|
||||
// --8<-- [start:call_custom_function]
|
||||
const registry = getRegistry();
|
||||
|
||||
const sentenceTransformer = await registry
|
||||
.get<SentenceTransformersEmbeddings>("sentence-transformers")!
|
||||
.create();
|
||||
|
||||
const schema = LanceSchema({
|
||||
vector: sentenceTransformer.vectorField(),
|
||||
text: sentenceTransformer.sourceField(),
|
||||
});
|
||||
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
const table = await db.createEmptyTable("table", schema, { mode: "overwrite" });
|
||||
|
||||
await table.add([{ text: "hello" }, { text: "world" }]);
|
||||
|
||||
const results = await table.search("greeting").limit(1).toArray();
|
||||
console.log(results[0].text);
|
||||
// -8<-- [end:call_custom_function]
|
||||
96
nodejs/examples/embedding.test.ts
Normal file
96
nodejs/examples/embedding.test.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import "@lancedb/lancedb/embedding/openai";
|
||||
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { type Float, Float32, Utf8 } from "apache-arrow";
|
||||
// --8<-- [end:imports]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
const openAiTest = process.env.OPENAI_API_KEY == null ? test.skip : test;
|
||||
|
||||
openAiTest("openai embeddings", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
// --8<-- [start:openai_embeddings]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
const func = getRegistry()
|
||||
.get("openai")
|
||||
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
|
||||
|
||||
const wordsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
||||
|
||||
const query = "greetings";
|
||||
const actual = (await tbl.search(query).limit(1).toArray())[0];
|
||||
// --8<-- [end:openai_embeddings]
|
||||
expect(actual).toHaveProperty("text");
|
||||
});
|
||||
});
|
||||
|
||||
test("custom embedding function", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
// --8<-- [start:embedding_function]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
@register("my_embedding")
|
||||
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return Array.from({ length: data.length }).fill([
|
||||
1, 2, 3,
|
||||
]) as number[][];
|
||||
}
|
||||
}
|
||||
|
||||
const func = new MyEmbeddingFunction();
|
||||
|
||||
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
|
||||
|
||||
// Option 1: manually specify the embedding function
|
||||
const table = await db.createTable("vectors", data, {
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
vectorColumn: "vector",
|
||||
},
|
||||
mode: "overwrite",
|
||||
});
|
||||
|
||||
// Option 2: provide the embedding function through a schema
|
||||
|
||||
const schema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const table2 = await db.createTable("vectors2", data, {
|
||||
schema,
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:embedding_function]
|
||||
expect(await table.countRows()).toBe(2);
|
||||
expect(await table2.countRows()).toBe(2);
|
||||
});
|
||||
});
|
||||
@@ -1,83 +0,0 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { type Float, Float32, Utf8 } from "apache-arrow";
|
||||
// --8<-- [end:imports]
|
||||
|
||||
{
|
||||
// --8<-- [start:openai_embeddings]
|
||||
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
const func = getRegistry()
|
||||
.get("openai")
|
||||
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
|
||||
|
||||
const wordsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
||||
|
||||
const query = "greetings";
|
||||
const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
|
||||
|
||||
// --8<-- [end:openai_embeddings]
|
||||
console.log("result = ", actual.text);
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:embedding_function]
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
|
||||
@register("my_embedding")
|
||||
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
|
||||
}
|
||||
}
|
||||
|
||||
const func = new MyEmbeddingFunction();
|
||||
|
||||
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
|
||||
|
||||
// Option 1: manually specify the embedding function
|
||||
const table = await db.createTable("vectors", data, {
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
vectorColumn: "vector",
|
||||
},
|
||||
mode: "overwrite",
|
||||
});
|
||||
|
||||
// Option 2: provide the embedding function through a schema
|
||||
|
||||
const schema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const table2 = await db.createTable("vectors2", data, {
|
||||
schema,
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:embedding_function]
|
||||
}
|
||||
42
nodejs/examples/filtering.test.ts
Normal file
42
nodejs/examples/filtering.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
test("filtering examples", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
// --8<-- [start:search]
|
||||
const _result = await tbl
|
||||
.search(Array(1536).fill(0.5))
|
||||
.limit(1)
|
||||
.where("id = 10")
|
||||
.toArray();
|
||||
// --8<-- [end:search]
|
||||
|
||||
// --8<-- [start:vec_search]
|
||||
const result = await (
|
||||
tbl.search(Array(1536).fill(0)) as lancedb.VectorQuery
|
||||
)
|
||||
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
|
||||
.postfilter()
|
||||
.toArray();
|
||||
// --8<-- [end:vec_search]
|
||||
expect(result.length).toBe(0);
|
||||
|
||||
// --8<-- [start:sql_search]
|
||||
await tbl.query().where("id = 10").limit(10).toArray();
|
||||
// --8<-- [end:sql_search]
|
||||
});
|
||||
});
|
||||
@@ -1,34 +0,0 @@
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
// --8<-- [start:search]
|
||||
const _result = await tbl
|
||||
.search(Array(1536).fill(0.5))
|
||||
.limit(1)
|
||||
.where("id = 10")
|
||||
.toArray();
|
||||
// --8<-- [end:search]
|
||||
|
||||
// --8<-- [start:vec_search]
|
||||
await tbl
|
||||
.search(Array(1536).fill(0))
|
||||
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
|
||||
.postfilter()
|
||||
.toArray();
|
||||
// --8<-- [end:vec_search]
|
||||
|
||||
// --8<-- [start:sql_search]
|
||||
await tbl.query().where("id = 10").limit(10).toArray();
|
||||
// --8<-- [end:sql_search]
|
||||
|
||||
console.log("SQL search: done");
|
||||
45
nodejs/examples/full_text_search.test.ts
Normal file
45
nodejs/examples/full_text_search.test.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
test("full text search", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
const words = [
|
||||
"apple",
|
||||
"banana",
|
||||
"cherry",
|
||||
"date",
|
||||
"elderberry",
|
||||
"fig",
|
||||
"grape",
|
||||
];
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
doc: words[i % words.length],
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
await tbl.createIndex("doc", {
|
||||
config: lancedb.Index.fts(),
|
||||
});
|
||||
|
||||
// --8<-- [start:full_text_search]
|
||||
const result = await tbl
|
||||
.query()
|
||||
.nearestToText("apple")
|
||||
.select(["id", "doc"])
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(result.length).toBe(10);
|
||||
// --8<-- [end:full_text_search]
|
||||
});
|
||||
});
|
||||
@@ -1,52 +0,0 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const words = [
|
||||
"apple",
|
||||
"banana",
|
||||
"cherry",
|
||||
"date",
|
||||
"elderberry",
|
||||
"fig",
|
||||
"grape",
|
||||
];
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
doc: words[i % words.length],
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
await tbl.createIndex("doc", {
|
||||
config: lancedb.Index.fts(),
|
||||
});
|
||||
|
||||
// --8<-- [start:full_text_search]
|
||||
let result = await tbl
|
||||
.search("apple")
|
||||
.select(["id", "doc"])
|
||||
.limit(10)
|
||||
.toArray();
|
||||
console.log(result);
|
||||
// --8<-- [end:full_text_search]
|
||||
|
||||
console.log("SQL search: done");
|
||||
6
nodejs/examples/jest.config.cjs
Normal file
6
nodejs/examples/jest.config.cjs
Normal file
@@ -0,0 +1,6 @@
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: "ts-jest",
|
||||
testEnvironment: "node",
|
||||
testPathIgnorePatterns: ["./dist"],
|
||||
};
|
||||
@@ -1,27 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
// Enable latest features
|
||||
"lib": ["ESNext", "DOM"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
|
||||
// Bundler mode
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"noEmit": true,
|
||||
|
||||
// Best practices
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
|
||||
// Some stricter flags (disabled by default)
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false
|
||||
}
|
||||
}
|
||||
5009
nodejs/examples/package-lock.json
generated
5009
nodejs/examples/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -5,24 +5,29 @@
|
||||
"main": "index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
"//1": "--experimental-vm-modules is needed to run jest with sentence-transformers",
|
||||
"//2": "--testEnvironment is needed to run jest with sentence-transformers",
|
||||
"//3": "See: https://github.com/huggingface/transformers.js/issues/57",
|
||||
"test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
|
||||
"lint": "biome check *.ts && biome format *.ts",
|
||||
"lint-ci": "biome ci .",
|
||||
"lint-fix": "biome check --write *.ts && npm run format",
|
||||
"format": "biome format --write *.ts"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "file:../",
|
||||
"@xenova/transformers": "^2.17.2"
|
||||
"@huggingface/transformers": "^3.0.2",
|
||||
"@lancedb/lancedb": "file:../dist",
|
||||
"openai": "^4.29.2",
|
||||
"sharp": "^0.33.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"jest": "^29.7.0",
|
||||
"jest-environment-node-single-context": "^29.4.0",
|
||||
"ts-jest": "^29.2.5",
|
||||
"typescript": "^5.5.4"
|
||||
},
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Node",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true
|
||||
}
|
||||
}
|
||||
|
||||
42
nodejs/examples/search.test.ts
Normal file
42
nodejs/examples/search.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
test("full text search", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
{
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(128).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
await db.createTable("my_vectors", data);
|
||||
}
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
const tbl = await db.openTable("my_vectors");
|
||||
|
||||
const results1 = await tbl.search(Array(128).fill(1.2)).limit(10).toArray();
|
||||
// --8<-- [end:search1]
|
||||
expect(results1.length).toBe(10);
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const results2 = await (
|
||||
tbl.search(Array(128).fill(1.2)) as lancedb.VectorQuery
|
||||
)
|
||||
.distanceType("cosine")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
expect(results2.length).toBe(10);
|
||||
});
|
||||
});
|
||||
@@ -1,38 +0,0 @@
|
||||
// --8<-- [end:import]
|
||||
import * as fs from "node:fs";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
async function setup() {
|
||||
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
await db.createTable("my_vectors", data);
|
||||
}
|
||||
|
||||
await setup();
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const tbl = await db.openTable("my_vectors");
|
||||
|
||||
const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await tbl
|
||||
.search(Array(1536).fill(1.2))
|
||||
.distanceType("cosine")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
console.log(_results2);
|
||||
// --8<-- [end:search2]
|
||||
|
||||
console.log("search: done");
|
||||
@@ -1,50 +0,0 @@
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
|
||||
import { Utf8 } from "apache-arrow";
|
||||
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
const func = await getRegistry().get("huggingface").create();
|
||||
|
||||
const facts = [
|
||||
"Albert Einstein was a theoretical physicist.",
|
||||
"The capital of France is Paris.",
|
||||
"The Great Wall of China is one of the Seven Wonders of the World.",
|
||||
"Python is a popular programming language.",
|
||||
"Mount Everest is the highest mountain in the world.",
|
||||
"Leonardo da Vinci painted the Mona Lisa.",
|
||||
"Shakespeare wrote Hamlet.",
|
||||
"The human body has 206 bones.",
|
||||
"The speed of light is approximately 299,792 kilometers per second.",
|
||||
"Water boils at 100 degrees Celsius.",
|
||||
"The Earth orbits the Sun.",
|
||||
"The Pyramids of Giza are located in Egypt.",
|
||||
"Coffee is one of the most popular beverages in the world.",
|
||||
"Tokyo is the capital city of Japan.",
|
||||
"Photosynthesis is the process by which plants make their food.",
|
||||
"The Pacific Ocean is the largest ocean on Earth.",
|
||||
"Mozart was a prolific composer of classical music.",
|
||||
"The Internet is a global network of computers.",
|
||||
"Basketball is a sport played with a ball and a hoop.",
|
||||
"The first computer virus was created in 1983.",
|
||||
"Artificial neural networks are inspired by the human brain.",
|
||||
"Deep learning is a subset of machine learning.",
|
||||
"IBM's Watson won Jeopardy! in 2011.",
|
||||
"The first computer programmer was Ada Lovelace.",
|
||||
"The first chatbot was ELIZA, created in the 1960s.",
|
||||
].map((text) => ({ text }));
|
||||
|
||||
const factsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const tbl = await db.createTable("facts", facts, {
|
||||
mode: "overwrite",
|
||||
schema: factsSchema,
|
||||
});
|
||||
|
||||
const query = "How many bones are in the human body?";
|
||||
const actual = await tbl.search(query).limit(1).toArray();
|
||||
|
||||
console.log("Answer: ", actual[0]["text"]);
|
||||
63
nodejs/examples/sentence-transformers.test.ts
Normal file
63
nodejs/examples/sentence-transformers.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import "@lancedb/lancedb/embedding/transformers";
|
||||
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { Utf8 } from "apache-arrow";
|
||||
|
||||
test("full text search", async () => {
|
||||
await withTempDirectory(async (databaseDir) => {
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
console.log(getRegistry());
|
||||
const func = (await getRegistry()
|
||||
.get("huggingface")
|
||||
?.create()) as EmbeddingFunction;
|
||||
|
||||
const facts = [
|
||||
"Albert Einstein was a theoretical physicist.",
|
||||
"The capital of France is Paris.",
|
||||
"The Great Wall of China is one of the Seven Wonders of the World.",
|
||||
"Python is a popular programming language.",
|
||||
"Mount Everest is the highest mountain in the world.",
|
||||
"Leonardo da Vinci painted the Mona Lisa.",
|
||||
"Shakespeare wrote Hamlet.",
|
||||
"The human body has 206 bones.",
|
||||
"The speed of light is approximately 299,792 kilometers per second.",
|
||||
"Water boils at 100 degrees Celsius.",
|
||||
"The Earth orbits the Sun.",
|
||||
"The Pyramids of Giza are located in Egypt.",
|
||||
"Coffee is one of the most popular beverages in the world.",
|
||||
"Tokyo is the capital city of Japan.",
|
||||
"Photosynthesis is the process by which plants make their food.",
|
||||
"The Pacific Ocean is the largest ocean on Earth.",
|
||||
"Mozart was a prolific composer of classical music.",
|
||||
"The Internet is a global network of computers.",
|
||||
"Basketball is a sport played with a ball and a hoop.",
|
||||
"The first computer virus was created in 1983.",
|
||||
"Artificial neural networks are inspired by the human brain.",
|
||||
"Deep learning is a subset of machine learning.",
|
||||
"IBM's Watson won Jeopardy! in 2011.",
|
||||
"The first computer programmer was Ada Lovelace.",
|
||||
"The first chatbot was ELIZA, created in the 1960s.",
|
||||
].map((text) => ({ text }));
|
||||
|
||||
const factsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const tbl = await db.createTable("facts", facts, {
|
||||
mode: "overwrite",
|
||||
schema: factsSchema,
|
||||
});
|
||||
|
||||
const query = "How many bones are in the human body?";
|
||||
const actual = await tbl.search(query).limit(1).toArray();
|
||||
|
||||
expect(actual[0]["text"]).toBe("The human body has 206 bones.");
|
||||
});
|
||||
}, 100_000);
|
||||
17
nodejs/examples/tsconfig.json
Normal file
17
nodejs/examples/tsconfig.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"include": ["*.test.ts"],
|
||||
"compilerOptions": {
|
||||
"target": "es2022",
|
||||
"module": "NodeNext",
|
||||
"declaration": true,
|
||||
"outDir": "./dist",
|
||||
"strict": true,
|
||||
"allowJs": true,
|
||||
"resolveJsonModule": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"experimentalDecorators": true,
|
||||
"moduleResolution": "NodeNext",
|
||||
"allowImportingTsExtensions": true,
|
||||
"emitDeclarationOnly": true
|
||||
}
|
||||
}
|
||||
16
nodejs/examples/util.ts
Normal file
16
nodejs/examples/util.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import * as fs from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import * as path from "path";
|
||||
|
||||
export async function withTempDirectory(
|
||||
fn: (tempDir: string) => Promise<void>,
|
||||
) {
|
||||
const tmpDirPath = fs.mkdtempSync(path.join(tmpdir(), "temp-dir-"));
|
||||
try {
|
||||
await fn(tmpDirPath);
|
||||
} finally {
|
||||
fs.rmSync(tmpDirPath, { recursive: true });
|
||||
}
|
||||
}
|
||||
@@ -4,4 +4,5 @@ module.exports = {
|
||||
testEnvironment: "node",
|
||||
moduleDirectories: ["node_modules", "./dist"],
|
||||
moduleFileExtensions: ["js", "ts"],
|
||||
modulePathIgnorePatterns: ["<rootDir>/examples/"],
|
||||
};
|
||||
|
||||
@@ -19,9 +19,6 @@ import { EmbeddingFunctionConfig, getRegistry } from "./registry";
|
||||
|
||||
export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
|
||||
|
||||
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
||||
export * from "./openai";
|
||||
export * from "./transformers";
|
||||
export * from "./registry";
|
||||
|
||||
/**
|
||||
|
||||
@@ -17,8 +17,6 @@ import {
|
||||
type EmbeddingFunctionConstructor,
|
||||
} from "./embedding_function";
|
||||
import "reflect-metadata";
|
||||
import { OpenAIEmbeddingFunction } from "./openai";
|
||||
import { TransformersEmbeddingFunction } from "./transformers";
|
||||
|
||||
type CreateReturnType<T> = T extends { init: () => Promise<void> }
|
||||
? Promise<T>
|
||||
@@ -73,10 +71,6 @@ export class EmbeddingFunctionRegistry {
|
||||
};
|
||||
}
|
||||
|
||||
get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
|
||||
get(
|
||||
name: "huggingface",
|
||||
): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
|
||||
get<T extends EmbeddingFunction<unknown>>(
|
||||
name: string,
|
||||
): EmbeddingFunctionCreate<T> | undefined;
|
||||
|
||||
@@ -47,8 +47,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
string,
|
||||
Partial<XenovaTransformerOptions>
|
||||
> {
|
||||
#model?: import("@xenova/transformers").PreTrainedModel;
|
||||
#tokenizer?: import("@xenova/transformers").PreTrainedTokenizer;
|
||||
#model?: import("@huggingface/transformers").PreTrainedModel;
|
||||
#tokenizer?: import("@huggingface/transformers").PreTrainedTokenizer;
|
||||
#modelName: XenovaTransformerOptions["model"];
|
||||
#initialized = false;
|
||||
#tokenizerOptions: XenovaTransformerOptions["tokenizerOptions"];
|
||||
@@ -92,18 +92,19 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
try {
|
||||
// SAFETY:
|
||||
// since typescript transpiles `import` to `require`, we need to do this in an unsafe way
|
||||
// We can't use `require` because `@xenova/transformers` is an ESM module
|
||||
// We can't use `require` because `@huggingface/transformers` is an ESM module
|
||||
// and we can't use `import` directly because typescript will transpile it to `require`.
|
||||
// and we want to remain compatible with both ESM and CJS modules
|
||||
// so we use `eval` to bypass typescript for this specific import.
|
||||
transformers = await eval('import("@xenova/transformers")');
|
||||
transformers = await eval('import("@huggingface/transformers")');
|
||||
} catch (e) {
|
||||
throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
|
||||
throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
|
||||
}
|
||||
|
||||
try {
|
||||
this.#model = await transformers.AutoModel.from_pretrained(
|
||||
this.#modelName,
|
||||
{ dtype: "fp32" },
|
||||
);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
@@ -128,7 +129,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
} else {
|
||||
const config = this.#model!.config;
|
||||
|
||||
const ndims = config["hidden_size"];
|
||||
// biome-ignore lint/style/useNamingConvention: we don't control this name.
|
||||
const ndims = (config as unknown as { hidden_size: number }).hidden_size;
|
||||
if (!ndims) {
|
||||
throw new Error(
|
||||
"hidden_size not found in model config, you may need to manually specify the embedding dimensions. ",
|
||||
@@ -183,7 +185,7 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
}
|
||||
|
||||
const tensorDiv = (
|
||||
src: import("@xenova/transformers").Tensor,
|
||||
src: import("@huggingface/transformers").Tensor,
|
||||
divBy: number,
|
||||
) => {
|
||||
for (let i = 0; i < src.data.length; ++i) {
|
||||
|
||||
@@ -239,6 +239,29 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip searching un-indexed data. This can make search faster, but will miss
|
||||
* any data that is not yet indexed.
|
||||
*
|
||||
* Use {@link lancedb.Table#optimize} to index all un-indexed data.
|
||||
*/
|
||||
fastSearch(): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.fastSearch());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to return the row id in the results.
|
||||
*
|
||||
* This column can be used to match results between different queries. For
|
||||
* example, to match results from a full text search and a vector search in
|
||||
* order to perform hybrid search.
|
||||
*/
|
||||
withRowId(): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.withRowId());
|
||||
return this;
|
||||
}
|
||||
|
||||
protected nativeExecute(
|
||||
options?: Partial<QueryExecutionOptions>,
|
||||
): Promise<NativeBatchIterator> {
|
||||
@@ -362,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of candidates to consider during the search
|
||||
*
|
||||
* This argument is only used when the vector column has an HNSW index.
|
||||
* If there is no index then this value is ignored.
|
||||
*
|
||||
* Increasing this value will increase the recall of your query but will
|
||||
* also increase the latency of your query. The default value is 1.5*limit.
|
||||
*/
|
||||
ef(ef: number): VectorQuery {
|
||||
super.doCall((inner) => inner.ef(ef));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the vector column to query
|
||||
*
|
||||
@@ -469,6 +506,42 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
|
||||
super.doCall((inner) => inner.bypassVectorIndex());
|
||||
return this;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a query vector to the search
|
||||
*
|
||||
* This method can be called multiple times to add multiple query vectors
|
||||
* to the search. If multiple query vectors are added, then they will be searched
|
||||
* in parallel, and the results will be concatenated. A column called `query_index`
|
||||
* will be added to indicate the index of the query vector that produced the result.
|
||||
*
|
||||
* Performance wise, this is equivalent to running multiple queries concurrently.
|
||||
*/
|
||||
addQueryVector(vector: IntoVector): VectorQuery {
|
||||
if (vector instanceof Promise) {
|
||||
const res = (async () => {
|
||||
try {
|
||||
const v = await vector;
|
||||
const arr = Float32Array.from(v);
|
||||
//
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
||||
const value: any = this.addQueryVector(arr);
|
||||
const inner = value.inner as
|
||||
| NativeVectorQuery
|
||||
| Promise<NativeVectorQuery>;
|
||||
return inner;
|
||||
} catch (e) {
|
||||
return Promise.reject(e);
|
||||
}
|
||||
})();
|
||||
return new VectorQuery(res);
|
||||
} else {
|
||||
super.doCall((inner) => {
|
||||
inner.addQueryVector(Float32Array.from(vector));
|
||||
});
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** A builder for LanceDB queries. */
|
||||
@@ -548,4 +621,9 @@ export class Query extends QueryBase<NativeQuery> {
|
||||
return new VectorQuery(vectorQuery);
|
||||
}
|
||||
}
|
||||
|
||||
nearestToText(query: string, columns?: string[]): Query {
|
||||
this.doCall((inner) => inner.fullTextSearch(query, columns));
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,6 +87,12 @@ export interface OptimizeOptions {
|
||||
deleteUnverified: boolean;
|
||||
}
|
||||
|
||||
export interface Version {
|
||||
version: number;
|
||||
timestamp: Date;
|
||||
metadata: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A Table is a collection of Records in a LanceDB Database.
|
||||
*
|
||||
@@ -360,6 +366,11 @@ export abstract class Table {
|
||||
*/
|
||||
abstract checkoutLatest(): Promise<void>;
|
||||
|
||||
/**
|
||||
* List all the versions of the table
|
||||
*/
|
||||
abstract listVersions(): Promise<Version[]>;
|
||||
|
||||
/**
|
||||
* Restore the table to the currently checked out version
|
||||
*
|
||||
@@ -659,6 +670,14 @@ export class LocalTable extends Table {
|
||||
await this.inner.checkoutLatest();
|
||||
}
|
||||
|
||||
async listVersions(): Promise<Version[]> {
|
||||
return (await this.inner.listVersions()).map((version) => ({
|
||||
version: version.version,
|
||||
timestamp: new Date(version.timestamp / 1000),
|
||||
metadata: version.metadata,
|
||||
}));
|
||||
}
|
||||
|
||||
async restore(): Promise<void> {
|
||||
await this.inner.restore();
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
3
nodejs/npm/linux-arm64-musl/README.md
Normal file
3
nodejs/npm/linux-arm64-musl/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `@lancedb/lancedb-linux-arm64-musl`
|
||||
|
||||
This is the **aarch64-unknown-linux-musl** binary for `@lancedb/lancedb`
|
||||
13
nodejs/npm/linux-arm64-musl/package.json
Normal file
13
nodejs/npm/linux-arm64-musl/package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
"files": ["lancedb.linux-arm64-musl.node"],
|
||||
"license": "Apache 2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["musl"]
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
3
nodejs/npm/linux-x64-musl/README.md
Normal file
3
nodejs/npm/linux-x64-musl/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `@lancedb/lancedb-linux-x64-musl`
|
||||
|
||||
This is the **x86_64-unknown-linux-musl** binary for `@lancedb/lancedb`
|
||||
13
nodejs/npm/linux-x64-musl/package.json
Normal file
13
nodejs/npm/linux-x64-musl/package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
"files": ["lancedb.linux-x64-musl.node"],
|
||||
"license": "Apache 2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["musl"]
|
||||
}
|
||||
3
nodejs/npm/win32-arm64-msvc/README.md
Normal file
3
nodejs/npm/win32-arm64-msvc/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `@lancedb/lancedb-win32-arm64-msvc`
|
||||
|
||||
This is the **aarch64-pc-windows-msvc** binary for `@lancedb/lancedb`
|
||||
18
nodejs/npm/win32-arm64-msvc/package.json
Normal file
18
nodejs/npm/win32-arm64-msvc/package.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"main": "lancedb.win32-arm64-msvc.node",
|
||||
"files": [
|
||||
"lancedb.win32-arm64-msvc.node"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
1438
nodejs/package-lock.json
generated
1438
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,11 +10,13 @@
|
||||
"vector database",
|
||||
"ann"
|
||||
],
|
||||
"version": "0.11.1-beta.1",
|
||||
"version": "0.13.1-beta.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./embedding": "./dist/embedding/index.js"
|
||||
"./embedding": "./dist/embedding/index.js",
|
||||
"./embedding/openai": "./dist/embedding/openai.js",
|
||||
"./embedding/transformers": "./dist/embedding/transformers.js"
|
||||
},
|
||||
"types": "dist/index.d.ts",
|
||||
"napi": {
|
||||
@@ -22,10 +24,12 @@
|
||||
"triples": {
|
||||
"defaults": false,
|
||||
"additional": [
|
||||
"aarch64-apple-darwin",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-unknown-linux-musl",
|
||||
"aarch64-unknown-linux-musl",
|
||||
"x86_64-pc-windows-msvc"
|
||||
]
|
||||
}
|
||||
@@ -85,7 +89,7 @@
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@xenova/transformers": ">=2.17 < 3",
|
||||
"@huggingface/transformers": "^3.0.2",
|
||||
"openai": "^4.29.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
||||
@@ -82,7 +82,7 @@ pub struct OpenTableOptions {
|
||||
#[napi::module_init]
|
||||
fn init() {
|
||||
let env = Env::new()
|
||||
.filter_or("LANCEDB_LOG", "trace")
|
||||
.filter_or("LANCEDB_LOG", "warn")
|
||||
.write_style("LANCEDB_LOG_STYLE");
|
||||
env_logger::init_from_env(env);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,16 @@ impl Query {
|
||||
Ok(VectorQuery { inner })
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner = self.inner.clone().fast_search();
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn with_row_id(&mut self) {
|
||||
self.inner = self.inner.clone().with_row_id();
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn execute(
|
||||
&self,
|
||||
@@ -125,6 +135,16 @@ impl VectorQuery {
|
||||
self.inner = self.inner.clone().column(&column);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn add_query_vector(&mut self, vector: Float32Array) -> Result<()> {
|
||||
self.inner = self
|
||||
.inner
|
||||
.clone()
|
||||
.add_query_vector(vector.as_ref())
|
||||
.default_error()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
|
||||
let distance_type = parse_distance_type(distance_type)?;
|
||||
@@ -147,6 +167,11 @@ impl VectorQuery {
|
||||
self.inner = self.inner.clone().nprobes(nprobe as usize);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn ef(&mut self, ef: u32) {
|
||||
self.inner = self.inner.clone().ef(ef as usize);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn bypass_vector_index(&mut self) {
|
||||
self.inner = self.inner.clone().bypass_vector_index()
|
||||
@@ -183,6 +208,16 @@ impl VectorQuery {
|
||||
self.inner = self.inner.clone().offset(offset as usize);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner = self.inner.clone().fast_search();
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn with_row_id(&mut self) {
|
||||
self.inner = self.inner.clone().with_row_id();
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn execute(
|
||||
&self,
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use lancedb::ipc::ipc_file_to_batches;
|
||||
use lancedb::table::{
|
||||
@@ -226,6 +228,28 @@ impl Table {
|
||||
self.inner_ref()?.checkout_latest().await.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn list_versions(&self) -> napi::Result<Vec<Version>> {
|
||||
self.inner_ref()?
|
||||
.list_versions()
|
||||
.await
|
||||
.map(|versions| {
|
||||
versions
|
||||
.iter()
|
||||
.map(|version| Version {
|
||||
version: version.version as i64,
|
||||
timestamp: version.timestamp.timestamp_micros(),
|
||||
metadata: version
|
||||
.metadata
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect(),
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn restore(&self) -> napi::Result<()> {
|
||||
self.inner_ref()?.restore().await.default_error()
|
||||
@@ -466,3 +490,10 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct Version {
|
||||
pub version: i64,
|
||||
pub timestamp: i64,
|
||||
pub metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"experimentalDecorators": true,
|
||||
"moduleResolution": "Node"
|
||||
},
|
||||
"exclude": ["./dist/*"],
|
||||
"exclude": ["./dist/*", "./examples/*"],
|
||||
"typedocOptions": {
|
||||
"entryPoints": ["lancedb/index.ts"],
|
||||
"out": "../docs/src/javascript/",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.15.0"
|
||||
current_version = "0.16.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.15.0"
|
||||
version = "0.16.1-beta.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "52.1", features = ["pyarrow"] }
|
||||
lancedb = { path = "../rust/lancedb" }
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
|
||||
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
||||
@@ -33,6 +33,11 @@ pyo3-build-config = { version = "0.20.3", features = [
|
||||
] }
|
||||
|
||||
[features]
|
||||
default = ["remote"]
|
||||
default = ["default-tls", "remote"]
|
||||
fp16kernels = ["lancedb/fp16kernels"]
|
||||
remote = ["lancedb/remote"]
|
||||
|
||||
# TLS
|
||||
default-tls = ["lancedb/default-tls"]
|
||||
native-tls = ["lancedb/native-tls"]
|
||||
rustls-tls = ["lancedb/rustls-tls"]
|
||||
|
||||
@@ -3,13 +3,11 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.19.1",
|
||||
"requests>=2.31.0",
|
||||
"nest-asyncio~=1.0",
|
||||
"pylance==0.20.0b2",
|
||||
"tqdm>=4.27.0",
|
||||
"pydantic>=1.10",
|
||||
"attrs>=21.3.0",
|
||||
"packaging",
|
||||
"cachetools",
|
||||
"overrides>=0.7",
|
||||
]
|
||||
description = "lancedb"
|
||||
@@ -61,6 +59,7 @@ dev = ["ruff", "pre-commit"]
|
||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
|
||||
clip = ["torch", "pillow", "open-clip"]
|
||||
embeddings = [
|
||||
"requests>=2.31.0",
|
||||
"openai>=1.6.1",
|
||||
"sentence-transformers",
|
||||
"torch",
|
||||
|
||||
@@ -19,12 +19,10 @@ from typing import Dict, Optional, Union, Any
|
||||
|
||||
__version__ = importlib.metadata.version("lancedb")
|
||||
|
||||
from lancedb.remote import ClientConfig
|
||||
|
||||
from ._lancedb import connect as lancedb_connect
|
||||
from .common import URI, sanitize_uri
|
||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||
from .remote.db import RemoteDBConnection
|
||||
from .remote import ClientConfig
|
||||
from .schema import vector
|
||||
from .table import AsyncTable
|
||||
|
||||
@@ -37,6 +35,7 @@ def connect(
|
||||
host_override: Optional[str] = None,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
**kwargs: Any,
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
@@ -64,14 +63,10 @@ def connect(
|
||||
the last check, then the table will be checked for updates. Note: this
|
||||
consistency only applies to read operations. Write operations are
|
||||
always consistent.
|
||||
request_thread_pool: int or ThreadPoolExecutor, optional
|
||||
The thread pool to use for making batch requests to the LanceDB Cloud API.
|
||||
If an integer, then a ThreadPoolExecutor will be created with that
|
||||
number of threads. If None, then a ThreadPoolExecutor will be created
|
||||
with the default number of threads. If a ThreadPoolExecutor, then that
|
||||
executor will be used for making requests. This is for LanceDB Cloud
|
||||
only and is only used when making batch requests (i.e., passing in
|
||||
multiple queries to the search method at once).
|
||||
client_config: ClientConfig or dict, optional
|
||||
Configuration options for the LanceDB Cloud HTTP client. If a dict, then
|
||||
the keys are the attributes of the ClientConfig class. If None, then the
|
||||
default configuration is used.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -94,6 +89,8 @@ def connect(
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
from .remote.db import RemoteDBConnection
|
||||
|
||||
if isinstance(uri, str) and uri.startswith("db://"):
|
||||
if api_key is None:
|
||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||
@@ -106,7 +103,9 @@ def connect(
|
||||
api_key,
|
||||
region,
|
||||
host_override,
|
||||
# TODO: remove this (deprecation warning downstream)
|
||||
request_thread_pool=request_thread_pool,
|
||||
client_config=client_config,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ class Connection(object):
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
) -> Table: ...
|
||||
async def rename_table(self, old_name: str, new_name: str) -> None: ...
|
||||
async def drop_table(self, name: str) -> None: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
|
||||
@@ -817,6 +817,18 @@ class AsyncConnection(object):
|
||||
table = await self._inner.open_table(name, storage_options, index_cache_size)
|
||||
return AsyncTable(table)
|
||||
|
||||
async def rename_table(self, old_name: str, new_name: str):
|
||||
"""Rename a table in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
old_name: str
|
||||
The current name of the table.
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
"""
|
||||
await self._inner.rename_table(old_name, new_name)
|
||||
|
||||
async def drop_table(self, name: str):
|
||||
"""Drop a table from the database.
|
||||
|
||||
|
||||
@@ -27,3 +27,4 @@ from .imagebind import ImageBindEmbeddings
|
||||
from .utils import with_embeddings
|
||||
from .jinaai import JinaEmbeddings
|
||||
from .watsonx import WatsonxEmbeddings
|
||||
from .voyageai import VoyageAIEmbeddingFunction
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
|
||||
import os
|
||||
import io
|
||||
import requests
|
||||
import base64
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
@@ -226,6 +225,8 @@ class JinaEmbeddings(EmbeddingFunction):
|
||||
return [result["embedding"] for result in sorted_embeddings]
|
||||
|
||||
def _init_client(self):
|
||||
import requests
|
||||
|
||||
if JinaEmbeddings._session is None:
|
||||
if self.api_key is None and os.environ.get("JINA_API_KEY") is None:
|
||||
api_key_not_found_help("jina")
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
# Copyright (c) 2023. LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import json
|
||||
from typing import Dict, Optional
|
||||
|
||||
@@ -170,7 +161,7 @@ def register(name):
|
||||
return __REGISTRY__.get_instance().register(name)
|
||||
|
||||
|
||||
def get_registry():
|
||||
def get_registry() -> EmbeddingFunctionRegistry:
|
||||
"""
|
||||
Utility function to get the global instance of the registry
|
||||
|
||||
|
||||
127
python/python/lancedb/embeddings/voyageai.py
Normal file
127
python/python/lancedb/embeddings/voyageai.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# Copyright (c) 2023. LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
from typing import ClassVar, List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..util import attempt_import_or_raise
|
||||
from .base import TextEmbeddingFunction
|
||||
from .registry import register
|
||||
from .utils import api_key_not_found_help, TEXT
|
||||
|
||||
|
||||
@register("voyageai")
|
||||
class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the VoyageAI API
|
||||
|
||||
https://docs.voyageai.com/docs/embeddings
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the model to use. List of acceptable models:
|
||||
|
||||
* voyage-3
|
||||
* voyage-3-lite
|
||||
* voyage-finance-2
|
||||
* voyage-multilingual-2
|
||||
* voyage-law-2
|
||||
* voyage-code-2
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
|
||||
voyageai = EmbeddingFunctionRegistry
|
||||
.get_instance()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-3")
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
data = [ { "text": "hello world" },
|
||||
{ "text": "goodbye world" }]
|
||||
|
||||
db = lancedb.connect("~/.lancedb")
|
||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(data)
|
||||
|
||||
"""
|
||||
|
||||
name: str
|
||||
client: ClassVar = None
|
||||
|
||||
def ndims(self):
|
||||
if self.name == "voyage-3-lite":
|
||||
return 512
|
||||
elif self.name == "voyage-code-2":
|
||||
return 1536
|
||||
elif self.name in [
|
||||
"voyage-3",
|
||||
"voyage-finance-2",
|
||||
"voyage-multilingual-2",
|
||||
"voyage-law-2",
|
||||
]:
|
||||
return 1024
|
||||
else:
|
||||
raise ValueError(f"Model {self.name} not supported")
|
||||
|
||||
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
|
||||
return self.compute_source_embeddings(query, input_type="query")
|
||||
|
||||
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
|
||||
texts = self.sanitize_input(texts)
|
||||
input_type = (
|
||||
kwargs.get("input_type") or "document"
|
||||
) # assume source input type if not passed by `compute_query_embeddings`
|
||||
return self.generate_embeddings(texts, input_type=input_type)
|
||||
|
||||
def generate_embeddings(
|
||||
self, texts: Union[List[str], np.ndarray], *args, **kwargs
|
||||
) -> List[np.array]:
|
||||
"""
|
||||
Get the embeddings for the given texts
|
||||
|
||||
Parameters
|
||||
----------
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
input_type: Optional[str]
|
||||
|
||||
truncation: Optional[bool]
|
||||
"""
|
||||
VoyageAIEmbeddingFunction._init_client()
|
||||
rs = VoyageAIEmbeddingFunction.client.embed(
|
||||
texts=texts, model=self.name, **kwargs
|
||||
)
|
||||
|
||||
return [emb for emb in rs.embeddings]
|
||||
|
||||
@staticmethod
|
||||
def _init_client():
|
||||
if VoyageAIEmbeddingFunction.client is None:
|
||||
voyageai = attempt_import_or_raise("voyageai")
|
||||
if os.environ.get("VOYAGE_API_KEY") is None:
|
||||
api_key_not_found_help("voyageai")
|
||||
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
||||
os.environ["VOYAGE_API_KEY"]
|
||||
)
|
||||
@@ -467,6 +467,8 @@ class IvfPq:
|
||||
|
||||
The default value is 256.
|
||||
"""
|
||||
if distance_type is not None:
|
||||
distance_type = distance_type.lower()
|
||||
self._inner = LanceDbIndex.ivf_pq(
|
||||
distance_type=distance_type,
|
||||
num_partitions=num_partitions,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user