mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
44 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f9789ec962 | ||
|
|
347515aa51 | ||
|
|
3324e7d525 | ||
|
|
ab5316b4fa | ||
|
|
db125013fc | ||
|
|
a43193c99b | ||
|
|
b70513ca72 | ||
|
|
78165801c6 | ||
|
|
6e5927ce6d | ||
|
|
6c1f32ac11 | ||
|
|
4fdf084777 | ||
|
|
1fad24fcd8 | ||
|
|
6ef20b85ca | ||
|
|
35bacdd57e | ||
|
|
a5ebe5a6c4 | ||
|
|
bf03ad1b4a | ||
|
|
2a9e3e2084 | ||
|
|
f298f15360 | ||
|
|
679b031b99 | ||
|
|
f50b5d532b | ||
|
|
fe655a15f0 | ||
|
|
9d0af794d0 | ||
|
|
048a2d10f8 | ||
|
|
c78a9849b4 | ||
|
|
c663085203 | ||
|
|
8b628854d5 | ||
|
|
a8d8c17b2a | ||
|
|
3c487e5fc7 | ||
|
|
d6219d687c | ||
|
|
239f725b32 | ||
|
|
5f261cf2d8 | ||
|
|
79eaa52184 | ||
|
|
bd82e1f66d | ||
|
|
ba34c3bee1 | ||
|
|
d4d0873e2b | ||
|
|
12c7bd18a5 | ||
|
|
c6bf6a25d6 | ||
|
|
c998a47e17 | ||
|
|
d8c758513c | ||
|
|
3795e02ee3 | ||
|
|
c7d424b2f3 | ||
|
|
1efb9914ee | ||
|
|
83e26a231e | ||
|
|
72a17b2de4 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.14.0-beta.0"
|
current_version = "0.14.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
4
.github/workflows/docs.yml
vendored
4
.github/workflows/docs.yml
vendored
@@ -72,9 +72,9 @@ jobs:
|
|||||||
- name: Setup Pages
|
- name: Setup Pages
|
||||||
uses: actions/configure-pages@v2
|
uses: actions/configure-pages@v2
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
uses: actions/upload-pages-artifact@v1
|
uses: actions/upload-pages-artifact@v3
|
||||||
with:
|
with:
|
||||||
path: "docs/site"
|
path: "docs/site"
|
||||||
- name: Deploy to GitHub Pages
|
- name: Deploy to GitHub Pages
|
||||||
id: deployment
|
id: deployment
|
||||||
uses: actions/deploy-pages@v1
|
uses: actions/deploy-pages@v4
|
||||||
|
|||||||
304
.github/workflows/npm-publish.yml
vendored
304
.github/workflows/npm-publish.yml
vendored
@@ -143,7 +143,7 @@ jobs:
|
|||||||
|
|
||||||
node-linux-musl:
|
node-linux-musl:
|
||||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ubuntu-latest
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -152,10 +152,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -249,7 +246,7 @@ jobs:
|
|||||||
|
|
||||||
nodejs-linux-musl:
|
nodejs-linux-musl:
|
||||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ubuntu-latest
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -258,10 +255,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -340,109 +334,50 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
node/dist/lancedb-vectordb-win32*.tgz
|
node/dist/lancedb-vectordb-win32*.tgz
|
||||||
|
|
||||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
node-windows-arm64:
|
||||||
# node-windows-arm64:
|
name: vectordb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# name: vectordb win32-arm64-msvc
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
# runs-on: windows-4x-arm
|
runs-on: ubuntu-latest
|
||||||
# if: startsWith(github.ref, 'refs/tags/v')
|
container: alpine:edge
|
||||||
# steps:
|
strategy:
|
||||||
# - uses: actions/checkout@v4
|
fail-fast: false
|
||||||
# - name: Install Git
|
matrix:
|
||||||
# run: |
|
config:
|
||||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
# - arch: x86_64
|
||||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
- arch: aarch64
|
||||||
# shell: powershell
|
steps:
|
||||||
# - name: Add Git to PATH
|
- name: Checkout
|
||||||
# run: |
|
uses: actions/checkout@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
- name: Install dependencies
|
||||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
run: |
|
||||||
# shell: powershell
|
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||||
# - name: Configure Git symlinks
|
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||||
# run: git config --global core.symlinks true
|
echo "source $HOME/.cargo/env" >> saved_env
|
||||||
# - uses: actions/checkout@v4
|
echo "export CC=clang" >> saved_env
|
||||||
# - uses: actions/setup-python@v5
|
echo "export AR=llvm-ar" >> saved_env
|
||||||
# with:
|
source "$HOME/.cargo/env"
|
||||||
# python-version: "3.13"
|
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||||
# - name: Install Visual Studio Build Tools
|
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||||
# run: |
|
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
- name: Configure x86_64 build
|
||||||
# "--installPath", "C:\BuildTools", `
|
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
- name: Configure aarch64 build
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||||
# shell: powershell
|
- name: Build Windows Artifacts
|
||||||
# - name: Add Visual Studio Build Tools to PATH
|
run: |
|
||||||
# run: |
|
source ./saved_env
|
||||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
- name: Upload Windows Artifacts
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
uses: actions/upload-artifact@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
with:
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
name: node-native-windows-${{ matrix.config.arch }}
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
path: |
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
node/dist/lancedb-vectordb-win32*.tgz
|
||||||
|
|
||||||
# # Add MSVC runtime libraries to LIB
|
|
||||||
# $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
|
||||||
# Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
|
|
||||||
|
|
||||||
# # Add INCLUDE paths
|
|
||||||
# $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
|
|
||||||
# Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Rust
|
|
||||||
# run: |
|
|
||||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
|
||||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Rust to PATH
|
|
||||||
# run: |
|
|
||||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
|
||||||
# shell: powershell
|
|
||||||
|
|
||||||
# - uses: Swatinem/rust-cache@v2
|
|
||||||
# with:
|
|
||||||
# workspaces: rust
|
|
||||||
# - name: Install 7-Zip ARM
|
|
||||||
# run: |
|
|
||||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
|
||||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
|
||||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add 7-Zip to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Protoc v21.12
|
|
||||||
# working-directory: C:\
|
|
||||||
# run: |
|
|
||||||
# if (Test-Path 'C:\protoc') {
|
|
||||||
# Write-Host "Protoc directory exists, skipping installation"
|
|
||||||
# return
|
|
||||||
# }
|
|
||||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
# Set-Location C:\protoc
|
|
||||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Protoc to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Build Windows native node modules
|
|
||||||
# run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
|
|
||||||
# - name: Upload Windows ARM64 Artifacts
|
|
||||||
# uses: actions/upload-artifact@v4
|
|
||||||
# with:
|
|
||||||
# name: node-native-windows-arm64
|
|
||||||
# path: |
|
|
||||||
# node/dist/*.node
|
|
||||||
|
|
||||||
nodejs-windows:
|
nodejs-windows:
|
||||||
name: lancedb ${{ matrix.target }}
|
name: lancedb ${{ matrix.target }}
|
||||||
@@ -478,103 +413,57 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
nodejs/dist/*.node
|
nodejs/dist/*.node
|
||||||
|
|
||||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
nodejs-windows-arm64:
|
||||||
# nodejs-windows-arm64:
|
name: lancedb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# name: lancedb win32-arm64-msvc
|
# Only runs on tags that matches the make-release action
|
||||||
# runs-on: windows-4x-arm
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
# if: startsWith(github.ref, 'refs/tags/v')
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
container: alpine:edge
|
||||||
# - uses: actions/checkout@v4
|
strategy:
|
||||||
# - name: Install Git
|
fail-fast: false
|
||||||
# run: |
|
matrix:
|
||||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
config:
|
||||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
# - arch: x86_64
|
||||||
# shell: powershell
|
- arch: aarch64
|
||||||
# - name: Add Git to PATH
|
steps:
|
||||||
# run: |
|
- name: Checkout
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
uses: actions/checkout@v4
|
||||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
- name: Install dependencies
|
||||||
# shell: powershell
|
run: |
|
||||||
# - name: Configure Git symlinks
|
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||||
# run: git config --global core.symlinks true
|
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||||
# - uses: actions/checkout@v4
|
echo "source $HOME/.cargo/env" >> saved_env
|
||||||
# - uses: actions/setup-python@v5
|
echo "export CC=clang" >> saved_env
|
||||||
# with:
|
echo "export AR=llvm-ar" >> saved_env
|
||||||
# python-version: "3.13"
|
source "$HOME/.cargo/env"
|
||||||
# - name: Install Visual Studio Build Tools
|
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||||
# run: |
|
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||||
# "--installPath", "C:\BuildTools", `
|
printf '#!/bin/sh\ncargo "$@"' > $HOME/.cargo/bin/cargo-xwin
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
chmod u+x $HOME/.cargo/bin/cargo-xwin
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
- name: Configure x86_64 build
|
||||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
- name: Configure aarch64 build
|
||||||
# shell: powershell
|
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||||
# - name: Add Visual Studio Build Tools to PATH
|
run: |
|
||||||
# run: |
|
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
- name: Build Windows Artifacts
|
||||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
run: |
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
source ./saved_env
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
- name: Upload Windows Artifacts
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
uses: actions/upload-artifact@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
with:
|
||||||
|
name: nodejs-native-windows-${{ matrix.config.arch }}
|
||||||
# $env:LIB = ""
|
path: |
|
||||||
# Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
nodejs/dist/*.node
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Rust
|
|
||||||
# run: |
|
|
||||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
|
||||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Rust to PATH
|
|
||||||
# run: |
|
|
||||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
|
||||||
# shell: powershell
|
|
||||||
|
|
||||||
# - uses: Swatinem/rust-cache@v2
|
|
||||||
# with:
|
|
||||||
# workspaces: rust
|
|
||||||
# - name: Install 7-Zip ARM
|
|
||||||
# run: |
|
|
||||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
|
||||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
|
||||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add 7-Zip to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Protoc v21.12
|
|
||||||
# working-directory: C:\
|
|
||||||
# run: |
|
|
||||||
# if (Test-Path 'C:\protoc') {
|
|
||||||
# Write-Host "Protoc directory exists, skipping installation"
|
|
||||||
# return
|
|
||||||
# }
|
|
||||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
# Set-Location C:\protoc
|
|
||||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Protoc to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Build Windows native node modules
|
|
||||||
# run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
|
|
||||||
# - name: Upload Windows ARM64 Artifacts
|
|
||||||
# uses: actions/upload-artifact@v4
|
|
||||||
# with:
|
|
||||||
# name: nodejs-native-windows-arm64
|
|
||||||
# path: |
|
|
||||||
# nodejs/dist/*.node
|
|
||||||
|
|
||||||
release:
|
release:
|
||||||
name: vectordb NPM Publish
|
name: vectordb NPM Publish
|
||||||
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
|
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows, node-windows-arm64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -614,7 +503,7 @@ jobs:
|
|||||||
|
|
||||||
release-nodejs:
|
release-nodejs:
|
||||||
name: lancedb NPM Publish
|
name: lancedb NPM Publish
|
||||||
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
|
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows, nodejs-windows-arm64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -672,6 +561,7 @@ jobs:
|
|||||||
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
||||||
|
|
||||||
update-package-lock:
|
update-package-lock:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
needs: [release]
|
needs: [release]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
@@ -689,6 +579,7 @@ jobs:
|
|||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
update-package-lock-nodejs:
|
update-package-lock-nodejs:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
needs: [release-nodejs]
|
needs: [release-nodejs]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
@@ -706,6 +597,7 @@ jobs:
|
|||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
gh-release:
|
gh-release:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
|||||||
2
.github/workflows/pypi-publish.yml
vendored
2
.github/workflows/pypi-publish.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.8
|
python-version: 3.12
|
||||||
- uses: ./.github/workflows/build_windows_wheel
|
- uses: ./.github/workflows/build_windows_wheel
|
||||||
with:
|
with:
|
||||||
python-minor-version: 8
|
python-minor-version: 8
|
||||||
|
|||||||
1
.github/workflows/upload_wheel/action.yml
vendored
1
.github/workflows/upload_wheel/action.yml
vendored
@@ -17,6 +17,7 @@ runs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install twine
|
pip install twine
|
||||||
|
python3 -m pip install --upgrade pkginfo
|
||||||
- name: Choose repo
|
- name: Choose repo
|
||||||
shell: bash
|
shell: bash
|
||||||
id: choose_repo
|
id: choose_repo
|
||||||
|
|||||||
36
Cargo.toml
36
Cargo.toml
@@ -23,27 +23,27 @@ rust-version = "1.80.0" # TO
|
|||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.20.0", "features" = [
|
lance = { "version" = "=0.20.0", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
] }
|
||||||
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-io = "0.20.0"
|
||||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-index = "0.20.0"
|
||||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-linalg = "0.20.0"
|
||||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-table = "0.20.0"
|
||||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-testing = "0.20.0"
|
||||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-datafusion = "0.20.0"
|
||||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-encoding = "0.20.0"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "52.2", optional = false }
|
arrow = { version = "53.2", optional = false }
|
||||||
arrow-array = "52.2"
|
arrow-array = "53.2"
|
||||||
arrow-data = "52.2"
|
arrow-data = "53.2"
|
||||||
arrow-ipc = "52.2"
|
arrow-ipc = "53.2"
|
||||||
arrow-ord = "52.2"
|
arrow-ord = "53.2"
|
||||||
arrow-schema = "52.2"
|
arrow-schema = "53.2"
|
||||||
arrow-arith = "52.2"
|
arrow-arith = "53.2"
|
||||||
arrow-cast = "52.2"
|
arrow-cast = "53.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
chrono = "0.4.35"
|
chrono = "0.4.35"
|
||||||
datafusion-common = "41.0"
|
datafusion-common = "42.0"
|
||||||
datafusion-physical-plan = "41.0"
|
datafusion-physical-plan = "42.0"
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -18,4 +18,4 @@ FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
|||||||
cd node
|
cd node
|
||||||
npm ci
|
npm ci
|
||||||
npm run build-release
|
npm run build-release
|
||||||
npm run pack-build -t $TARGET_TRIPLE
|
npm run pack-build -- -t $TARGET_TRIPLE
|
||||||
|
|||||||
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||||
|
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||||
|
|
||||||
|
# function dl() {
|
||||||
|
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||||
|
# }
|
||||||
|
|
||||||
|
# [[.h]]
|
||||||
|
|
||||||
|
# "id": "Win11SDK_10.0.26100"
|
||||||
|
# "version": "10.0.26100.7"
|
||||||
|
|
||||||
|
# libucrt.lib
|
||||||
|
|
||||||
|
# example: <assert.h>
|
||||||
|
# dir: ucrt/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||||
|
|
||||||
|
# example: <windows.h>
|
||||||
|
# dir: um/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||||
|
|
||||||
|
# example: <winapifamily.h>
|
||||||
|
# dir: /shared
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||||
|
|
||||||
|
|
||||||
|
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||||
|
# "version": "14.16.27045"
|
||||||
|
|
||||||
|
# example: <vcruntime.h>
|
||||||
|
# dir: MSVC/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
|
||||||
|
# [[.lib]]
|
||||||
|
|
||||||
|
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||||
|
|
||||||
|
# fwpuclnt.lib arm64rt.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7a332420d812f7c1d41da865ae5a7c52/windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/19de98ed4a79938d0045d19c047936b3/3e2f7be479e3679d700ce0782e4cc318.cab
|
||||||
|
|
||||||
|
# libcmt.lib libvcruntime.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/227f40682a88dc5fa0ccb9cadc9ad30af99ad1f1a75db63407587d079f60d035/Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||||
|
|
||||||
|
|
||||||
|
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||||
|
|
||||||
|
mkdir -p /usr/aarch64-pc-windows-msvc/usr/include
|
||||||
|
mkdir -p /usr/aarch64-pc-windows-msvc/usr/lib
|
||||||
|
|
||||||
|
# lowercase folder/file names
|
||||||
|
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||||
|
|
||||||
|
# .h
|
||||||
|
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/aarch64-pc-windows-msvc/usr/include
|
||||||
|
|
||||||
|
# lowercase #include "" and #include <>
|
||||||
|
find /usr/aarch64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||||
|
|
||||||
|
# ARM intrinsics
|
||||||
|
# original dir: MSVC/
|
||||||
|
|
||||||
|
# '__n128x4' redefined in arm_neon.h
|
||||||
|
# "arm64_neon.h" included from intrin.h
|
||||||
|
|
||||||
|
(cd /usr/lib/llvm19/lib/clang/19/include && cp arm_neon.h intrin.h -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
# .lib
|
||||||
|
|
||||||
|
# _Interlocked intrinsics
|
||||||
|
# must always link with arm64rt.lib
|
||||||
|
# reason: https://developercommunity.visualstudio.com/t/libucrtlibstreamobj-error-lnk2001-unresolved-exter/1544787#T-ND1599818
|
||||||
|
# I don't understand the 'correct' fix for this, arm64rt.lib is supposed to be the workaround
|
||||||
|
|
||||||
|
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/arm64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib fwpuclnt.lib arm64rt.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
(cd 'contents/vc/tools/msvc/14.16.27023/lib/arm64' && cp libcmt.lib libvcruntime.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/arm64/libucrt.lib' /usr/aarch64-pc-windows-msvc/usr/lib
|
||||||
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||||
|
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||||
|
|
||||||
|
# function dl() {
|
||||||
|
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||||
|
# }
|
||||||
|
|
||||||
|
# [[.h]]
|
||||||
|
|
||||||
|
# "id": "Win11SDK_10.0.26100"
|
||||||
|
# "version": "10.0.26100.7"
|
||||||
|
|
||||||
|
# libucrt.lib
|
||||||
|
|
||||||
|
# example: <assert.h>
|
||||||
|
# dir: ucrt/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||||
|
|
||||||
|
# example: <windows.h>
|
||||||
|
# dir: um/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||||
|
|
||||||
|
# example: <winapifamily.h>
|
||||||
|
# dir: /shared
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||||
|
|
||||||
|
|
||||||
|
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||||
|
# "version": "14.16.27045"
|
||||||
|
|
||||||
|
# example: <vcruntime.h>
|
||||||
|
# dir: MSVC/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
|
||||||
|
# [[.lib]]
|
||||||
|
|
||||||
|
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/bfc3904a0195453419ae4dfea7abd6fb/e10768bb6e9d0ea730280336b697da66.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/637f9f3be880c71f9e3ca07b4d67345c/f9b24c8280986c0683fbceca5326d806.cab
|
||||||
|
|
||||||
|
# dbghelp.lib fwpuclnt.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/9f51690d5aa804b1340ce12d1ec80f89/windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/d3a7df4ca3303a698640a29e558a5e5b/58314d0646d7e1a25e97c902166c3155.cab
|
||||||
|
|
||||||
|
# libcmt.lib libvcruntime.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/8728f21ae09940f1f4b4ee47b4a596be2509e2a47d2f0c83bbec0ea37d69644b/Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||||
|
|
||||||
|
|
||||||
|
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||||
|
|
||||||
|
mkdir -p /usr/x86_64-pc-windows-msvc/usr/include
|
||||||
|
mkdir -p /usr/x86_64-pc-windows-msvc/usr/lib
|
||||||
|
|
||||||
|
# lowercase folder/file names
|
||||||
|
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||||
|
|
||||||
|
# .h
|
||||||
|
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/x86_64-pc-windows-msvc/usr/include
|
||||||
|
|
||||||
|
# lowercase #include "" and #include <>
|
||||||
|
find /usr/x86_64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||||
|
|
||||||
|
# x86 intrinsics
|
||||||
|
# original dir: MSVC/
|
||||||
|
|
||||||
|
# '_mm_movemask_epi8' defined in emmintrin.h
|
||||||
|
# '__v4sf' defined in xmmintrin.h
|
||||||
|
# '__v2si' defined in mmintrin.h
|
||||||
|
# '__m128d' redefined in immintrin.h
|
||||||
|
# '__m128i' redefined in intrin.h
|
||||||
|
# '_mm_comlt_epu8' defined in ammintrin.h
|
||||||
|
|
||||||
|
(cd /usr/lib/llvm19/lib/clang/19/include && cp emmintrin.h xmmintrin.h mmintrin.h immintrin.h intrin.h ammintrin.h -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
# .lib
|
||||||
|
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/x64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib dbghelp.lib fwpuclnt.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
(cd 'contents/vc/tools/msvc/14.16.27023/lib/x64' && cp libcmt.lib libvcruntime.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/x64/libucrt.lib' /usr/x86_64-pc-windows-msvc/usr/lib
|
||||||
@@ -83,6 +83,7 @@ The following IVF_PQ paramters can be specified:
|
|||||||
- **num_sub_vectors**: The number of sub-vectors (M) that will be created during Product Quantization (PQ).
|
- **num_sub_vectors**: The number of sub-vectors (M) that will be created during Product Quantization (PQ).
|
||||||
For D dimensional vector, it will be divided into `M` subvectors with dimension `D/M`, each of which is replaced by
|
For D dimensional vector, it will be divided into `M` subvectors with dimension `D/M`, each of which is replaced by
|
||||||
a single PQ code. The default is the dimension of the vector divided by 16.
|
a single PQ code. The default is the dimension of the vector divided by 16.
|
||||||
|
- **num_bits**: The number of bits used to encode each sub-vector. Only 4 and 8 are supported. The higher the number of bits, the higher the accuracy of the index, also the slower search. The default is 8.
|
||||||
|
|
||||||
!!! note
|
!!! note
|
||||||
|
|
||||||
@@ -142,11 +143,11 @@ There are a couple of parameters that can be used to fine-tune the search:
|
|||||||
- **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
|
- **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
|
||||||
Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
|
Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
|
||||||
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
|
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
|
||||||
|
|
||||||
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
- **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
|
||||||
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
|
||||||
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
|
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
|
||||||
!!! note
|
!!! note
|
||||||
Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
|
Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
|
||||||
|
|
||||||
|
|
||||||
@@ -288,4 +289,4 @@ less space distortion, and thus yields better accuracy. However, a higher `num_s
|
|||||||
|
|
||||||
`m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
|
`m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
|
||||||
|
|
||||||
`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
|
`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ LanceDB registers the OpenAI embeddings function in the registry by default, as
|
|||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
|
| `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
|
||||||
| `dim` | `int` | Model default | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
|
| `dim` | `int` | Model default | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
|
||||||
|
| `use_azure` | bool | `False` | Set true to use Azure OpenAPI SDK |
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|||||||
@@ -27,10 +27,13 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure
|
|||||||
|
|
||||||
Azure Blob Storage:
|
Azure Blob Storage:
|
||||||
|
|
||||||
|
<!-- skip-test -->
|
||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
db = lancedb.connect("az://bucket/path")
|
db = lancedb.connect("az://bucket/path")
|
||||||
```
|
```
|
||||||
|
Note that for Azure, storage credentials must be configured. See [below](#azure-blob-storage) for more details.
|
||||||
|
|
||||||
|
|
||||||
=== "TypeScript"
|
=== "TypeScript"
|
||||||
|
|
||||||
@@ -87,11 +90,6 @@ In most cases, when running in the respective cloud and permissions are set up c
|
|||||||
export TIMEOUT=60s
|
export TIMEOUT=60s
|
||||||
```
|
```
|
||||||
|
|
||||||
!!! note "`storage_options` availability"
|
|
||||||
|
|
||||||
The `storage_options` parameter is only available in Python *async* API and JavaScript API.
|
|
||||||
It is not yet supported in the Python synchronous API.
|
|
||||||
|
|
||||||
If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection:
|
If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection:
|
||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
|
|||||||
@@ -790,6 +790,101 @@ Use the `drop_table()` method on the database to remove a table.
|
|||||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||||
If the table does not exist an exception is raised.
|
If the table does not exist an exception is raised.
|
||||||
|
|
||||||
|
## Changing schemas
|
||||||
|
|
||||||
|
While tables must have a schema specified when they are created, you can
|
||||||
|
change the schema over time. There's three methods to alter the schema of
|
||||||
|
a table:
|
||||||
|
|
||||||
|
* `add_columns`: Add new columns to the table
|
||||||
|
* `alter_columns`: Alter the name, nullability, or data type of a column
|
||||||
|
* `drop_columns`: Drop columns from the table
|
||||||
|
|
||||||
|
### Adding new columns
|
||||||
|
|
||||||
|
You can add new columns to the table with the `add_columns` method. New columns
|
||||||
|
are filled with values based on a SQL expression. For example, you can add a new
|
||||||
|
column `y` to the table and fill it with the value of `x + 1`.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
table.add_columns({"double_price": "price * 2"})
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.add_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:add_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.addColumns](../js/classes/Table.md/#addcolumns)
|
||||||
|
|
||||||
|
If you want to fill it with null, you can use `cast(NULL as <data_type>)` as
|
||||||
|
the SQL expression to fill the column with nulls, while controlling the data
|
||||||
|
type of the column. Available data types are base on the
|
||||||
|
[DataFusion data types](https://datafusion.apache.org/user-guide/sql/data_types.html).
|
||||||
|
You can use any of the SQL types, such as `BIGINT`:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
cast(NULL as BIGINT)
|
||||||
|
```
|
||||||
|
|
||||||
|
Using Arrow data types and the `arrow_typeof` function is not yet supported.
|
||||||
|
|
||||||
|
<!-- TODO: we could provide a better formula for filling with nulls:
|
||||||
|
https://github.com/lancedb/lance/issues/3175
|
||||||
|
-->
|
||||||
|
|
||||||
|
### Altering existing columns
|
||||||
|
|
||||||
|
You can alter the name, nullability, or data type of a column with the `alter_columns`
|
||||||
|
method.
|
||||||
|
|
||||||
|
Changing the name or nullability of a column just updates the metadata. Because
|
||||||
|
of this, it's a fast operation. Changing the data type of a column requires
|
||||||
|
rewriting the column, which can be a heavy operation.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pyarrow as pa
|
||||||
|
table.alter_column({"path": "double_price", "rename": "dbl_price",
|
||||||
|
"data_type": pa.float32(), "nullable": False})
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.alter_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:alter_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.alterColumns](../js/classes/Table.md/#altercolumns)
|
||||||
|
|
||||||
|
### Dropping columns
|
||||||
|
|
||||||
|
You can drop columns from the table with the `drop_columns` method. This will
|
||||||
|
will remove the column from the schema.
|
||||||
|
|
||||||
|
<!-- TODO: Provide guidance on how to reduce disk usage once optimize helps here
|
||||||
|
waiting on: https://github.com/lancedb/lance/issues/3177
|
||||||
|
-->
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
table.drop_columns(["dbl_price"])
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.drop_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:drop_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.dropColumns](../js/classes/Table.md/#altercolumns)
|
||||||
|
|
||||||
|
|
||||||
## Handling bad vectors
|
## Handling bad vectors
|
||||||
|
|
||||||
In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
|
In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
TypeDoc added this file to prevent GitHub Pages from using Jekyll. You can turn off this behavior by setting the `githubPages` option to false.
|
|
||||||
@@ -27,7 +27,9 @@ the underlying connection has been closed.
|
|||||||
|
|
||||||
### new Connection()
|
### new Connection()
|
||||||
|
|
||||||
> **new Connection**(): [`Connection`](Connection.md)
|
```ts
|
||||||
|
new Connection(): Connection
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -37,7 +39,9 @@ the underlying connection has been closed.
|
|||||||
|
|
||||||
### close()
|
### close()
|
||||||
|
|
||||||
> `abstract` **close**(): `void`
|
```ts
|
||||||
|
abstract close(): void
|
||||||
|
```
|
||||||
|
|
||||||
Close the connection, releasing any underlying resources.
|
Close the connection, releasing any underlying resources.
|
||||||
|
|
||||||
@@ -53,21 +57,24 @@ Any attempt to use the connection after it is closed will result in an error.
|
|||||||
|
|
||||||
### createEmptyTable()
|
### createEmptyTable()
|
||||||
|
|
||||||
> `abstract` **createEmptyTable**(`name`, `schema`, `options`?): `Promise`<[`Table`](Table.md)>
|
```ts
|
||||||
|
abstract createEmptyTable(
|
||||||
|
name,
|
||||||
|
schema,
|
||||||
|
options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
Creates a new empty Table
|
Creates a new empty Table
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **name**: `string`
|
* **name**: `string`
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
The name of the table.
|
* **schema**: `SchemaLike`
|
||||||
|
The schema of the table
|
||||||
|
|
||||||
• **schema**: `SchemaLike`
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
|
||||||
The schema of the table
|
|
||||||
|
|
||||||
• **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -79,15 +86,16 @@ The schema of the table
|
|||||||
|
|
||||||
#### createTable(options)
|
#### createTable(options)
|
||||||
|
|
||||||
> `abstract` **createTable**(`options`): `Promise`<[`Table`](Table.md)>
|
```ts
|
||||||
|
abstract createTable(options): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
The options object.
|
||||||
The options object.
|
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -95,22 +103,25 @@ The options object.
|
|||||||
|
|
||||||
#### createTable(name, data, options)
|
#### createTable(name, data, options)
|
||||||
|
|
||||||
> `abstract` **createTable**(`name`, `data`, `options`?): `Promise`<[`Table`](Table.md)>
|
```ts
|
||||||
|
abstract createTable(
|
||||||
|
name,
|
||||||
|
data,
|
||||||
|
options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **name**: `string`
|
* **name**: `string`
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
The name of the table.
|
* **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
||||||
|
Non-empty Array of Records
|
||||||
|
to be inserted into the table
|
||||||
|
|
||||||
• **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
|
||||||
Non-empty Array of Records
|
|
||||||
to be inserted into the table
|
|
||||||
|
|
||||||
• **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -120,7 +131,9 @@ to be inserted into the table
|
|||||||
|
|
||||||
### display()
|
### display()
|
||||||
|
|
||||||
> `abstract` **display**(): `string`
|
```ts
|
||||||
|
abstract display(): string
|
||||||
|
```
|
||||||
|
|
||||||
Return a brief description of the connection
|
Return a brief description of the connection
|
||||||
|
|
||||||
@@ -132,15 +145,16 @@ Return a brief description of the connection
|
|||||||
|
|
||||||
### dropTable()
|
### dropTable()
|
||||||
|
|
||||||
> `abstract` **dropTable**(`name`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract dropTable(name): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Drop an existing table.
|
Drop an existing table.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **name**: `string`
|
* **name**: `string`
|
||||||
|
The name of the table to drop.
|
||||||
The name of the table to drop.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -150,7 +164,9 @@ The name of the table to drop.
|
|||||||
|
|
||||||
### isOpen()
|
### isOpen()
|
||||||
|
|
||||||
> `abstract` **isOpen**(): `boolean`
|
```ts
|
||||||
|
abstract isOpen(): boolean
|
||||||
|
```
|
||||||
|
|
||||||
Return true if the connection has not been closed
|
Return true if the connection has not been closed
|
||||||
|
|
||||||
@@ -162,17 +178,18 @@ Return true if the connection has not been closed
|
|||||||
|
|
||||||
### openTable()
|
### openTable()
|
||||||
|
|
||||||
> `abstract` **openTable**(`name`, `options`?): `Promise`<[`Table`](Table.md)>
|
```ts
|
||||||
|
abstract openTable(name, options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
Open a table in the database.
|
Open a table in the database.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **name**: `string`
|
* **name**: `string`
|
||||||
|
The name of the table
|
||||||
|
|
||||||
The name of the table
|
* **options?**: `Partial`<`OpenTableOptions`>
|
||||||
|
|
||||||
• **options?**: `Partial`<`OpenTableOptions`>
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -182,7 +199,9 @@ The name of the table
|
|||||||
|
|
||||||
### tableNames()
|
### tableNames()
|
||||||
|
|
||||||
> `abstract` **tableNames**(`options`?): `Promise`<`string`[]>
|
```ts
|
||||||
|
abstract tableNames(options?): Promise<string[]>
|
||||||
|
```
|
||||||
|
|
||||||
List all the table names in this database.
|
List all the table names in this database.
|
||||||
|
|
||||||
@@ -190,10 +209,9 @@ Tables will be returned in lexicographical order.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||||
|
options to control the
|
||||||
options to control the
|
paging / start point
|
||||||
paging / start point
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -8,9 +8,30 @@
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
|
### bitmap()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static bitmap(): Index
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a bitmap index.
|
||||||
|
|
||||||
|
A `Bitmap` index stores a bitmap for each distinct value in the column for every row.
|
||||||
|
|
||||||
|
This index works best for low-cardinality columns, where the number of unique values
|
||||||
|
is small (i.e., less than a few hundreds).
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Index`](Index.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### btree()
|
### btree()
|
||||||
|
|
||||||
> `static` **btree**(): [`Index`](Index.md)
|
```ts
|
||||||
|
static btree(): Index
|
||||||
|
```
|
||||||
|
|
||||||
Create a btree index
|
Create a btree index
|
||||||
|
|
||||||
@@ -36,9 +57,82 @@ block size may be added in the future.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fts()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static fts(options?): Index
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a full text search index
|
||||||
|
|
||||||
|
A full text search index is an index on a string column, so that you can conduct full
|
||||||
|
text searches on the column.
|
||||||
|
|
||||||
|
The results of a full text search are ordered by relevance measured by BM25.
|
||||||
|
|
||||||
|
You can combine filters with full text search.
|
||||||
|
|
||||||
|
For now, the full text search index only supports English, and doesn't support phrase search.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`FtsOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Index`](Index.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### hnswPq()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static hnswPq(options?): Index
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a hnswPq index
|
||||||
|
|
||||||
|
HNSW-PQ stands for Hierarchical Navigable Small World - Product Quantization.
|
||||||
|
It is a variant of the HNSW algorithm that uses product quantization to compress
|
||||||
|
the vectors.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`HnswPqOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Index`](Index.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### hnswSq()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static hnswSq(options?): Index
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a hnswSq index
|
||||||
|
|
||||||
|
HNSW-SQ stands for Hierarchical Navigable Small World - Scalar Quantization.
|
||||||
|
It is a variant of the HNSW algorithm that uses scalar quantization to compress
|
||||||
|
the vectors.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`HnswSqOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Index`](Index.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ivfPq()
|
### ivfPq()
|
||||||
|
|
||||||
> `static` **ivfPq**(`options`?): [`Index`](Index.md)
|
```ts
|
||||||
|
static ivfPq(options?): Index
|
||||||
|
```
|
||||||
|
|
||||||
Create an IvfPq index
|
Create an IvfPq index
|
||||||
|
|
||||||
@@ -63,29 +157,25 @@ currently is also a memory intensive operation.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<[`IvfPqOptions`](../interfaces/IvfPqOptions.md)>
|
* **options?**: `Partial`<[`IvfPqOptions`](../interfaces/IvfPqOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`Index`](Index.md)
|
[`Index`](Index.md)
|
||||||
|
|
||||||
### fts()
|
***
|
||||||
|
|
||||||
> `static` **fts**(`options`?): [`Index`](Index.md)
|
### labelList()
|
||||||
|
|
||||||
Create a full text search index
|
```ts
|
||||||
|
static labelList(): Index
|
||||||
|
```
|
||||||
|
|
||||||
This index is used to search for text data. The index is created by tokenizing the text
|
Create a label list index.
|
||||||
into words and then storing occurrences of these words in a data structure called inverted index
|
|
||||||
that allows for fast search.
|
|
||||||
|
|
||||||
During a search the query is tokenized and the inverted index is used to find the rows that
|
LabelList index is a scalar index that can be used on `List<T>` columns to
|
||||||
contain the query words. The rows are then scored based on BM25 and the top scoring rows are
|
support queries with `array_contains_all` and `array_contains_any`
|
||||||
sorted and returned.
|
using an underlying bitmap index.
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
• **options?**: `Partial`<[`FtsOptions`](../interfaces/FtsOptions.md)>
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -12,11 +12,13 @@ Options to control the makeArrowTable call.
|
|||||||
|
|
||||||
### new MakeArrowTableOptions()
|
### new MakeArrowTableOptions()
|
||||||
|
|
||||||
> **new MakeArrowTableOptions**(`values`?): [`MakeArrowTableOptions`](MakeArrowTableOptions.md)
|
```ts
|
||||||
|
new MakeArrowTableOptions(values?): MakeArrowTableOptions
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **values?**: `Partial`<[`MakeArrowTableOptions`](MakeArrowTableOptions.md)>
|
* **values?**: `Partial`<[`MakeArrowTableOptions`](MakeArrowTableOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -26,7 +28,9 @@ Options to control the makeArrowTable call.
|
|||||||
|
|
||||||
### dictionaryEncodeStrings
|
### dictionaryEncodeStrings
|
||||||
|
|
||||||
> **dictionaryEncodeStrings**: `boolean` = `false`
|
```ts
|
||||||
|
dictionaryEncodeStrings: boolean = false;
|
||||||
|
```
|
||||||
|
|
||||||
If true then string columns will be encoded with dictionary encoding
|
If true then string columns will be encoded with dictionary encoding
|
||||||
|
|
||||||
@@ -40,22 +44,30 @@ If `schema` is provided then this property is ignored.
|
|||||||
|
|
||||||
### embeddingFunction?
|
### embeddingFunction?
|
||||||
|
|
||||||
> `optional` **embeddingFunction**: [`EmbeddingFunctionConfig`](../namespaces/embedding/interfaces/EmbeddingFunctionConfig.md)
|
```ts
|
||||||
|
optional embeddingFunction: EmbeddingFunctionConfig;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### embeddings?
|
### embeddings?
|
||||||
|
|
||||||
> `optional` **embeddings**: [`EmbeddingFunction`](../namespaces/embedding/classes/EmbeddingFunction.md)<`unknown`, `FunctionOptions`>
|
```ts
|
||||||
|
optional embeddings: EmbeddingFunction<unknown, FunctionOptions>;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### schema?
|
### schema?
|
||||||
|
|
||||||
> `optional` **schema**: `SchemaLike`
|
```ts
|
||||||
|
optional schema: SchemaLike;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### vectorColumns
|
### vectorColumns
|
||||||
|
|
||||||
> **vectorColumns**: `Record`<`string`, [`VectorColumnOptions`](VectorColumnOptions.md)>
|
```ts
|
||||||
|
vectorColumns: Record<string, VectorColumnOptions>;
|
||||||
|
```
|
||||||
|
|||||||
@@ -16,11 +16,13 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
### new Query()
|
### new Query()
|
||||||
|
|
||||||
> **new Query**(`tbl`): [`Query`](Query.md)
|
```ts
|
||||||
|
new Query(tbl): Query
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **tbl**: `Table`
|
* **tbl**: `Table`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -34,7 +36,9 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
### inner
|
### inner
|
||||||
|
|
||||||
> `protected` **inner**: `Query` \| `Promise`<`Query`>
|
```ts
|
||||||
|
protected inner: Query | Promise<Query>;
|
||||||
|
```
|
||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
@@ -44,7 +48,9 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
### \[asyncIterator\]()
|
### \[asyncIterator\]()
|
||||||
|
|
||||||
> **\[asyncIterator\]**(): `AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
```ts
|
||||||
|
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -58,11 +64,13 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
### doCall()
|
### doCall()
|
||||||
|
|
||||||
> `protected` **doCall**(`fn`): `void`
|
```ts
|
||||||
|
protected doCall(fn): void
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **fn**
|
* **fn**
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -76,13 +84,15 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
> `protected` **execute**(`options`?): [`RecordBatchIterator`](RecordBatchIterator.md)
|
```ts
|
||||||
|
protected execute(options?): RecordBatchIterator
|
||||||
|
```
|
||||||
|
|
||||||
Execute the query and return the results as an
|
Execute the query and return the results as an
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -108,15 +118,16 @@ single query)
|
|||||||
|
|
||||||
### explainPlan()
|
### explainPlan()
|
||||||
|
|
||||||
> **explainPlan**(`verbose`): `Promise`<`string`>
|
```ts
|
||||||
|
explainPlan(verbose): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
Generates an explanation of the query execution plan.
|
Generates an explanation of the query execution plan.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **verbose**: `boolean` = `false`
|
* **verbose**: `boolean` = `false`
|
||||||
|
If true, provides a more detailed explanation. Defaults to false.
|
||||||
If true, provides a more detailed explanation. Defaults to false.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -141,15 +152,38 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fastSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fastSearch(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Skip searching un-indexed data. This can make search faster, but will miss
|
||||||
|
any data that is not yet indexed.
|
||||||
|
|
||||||
|
Use lancedb.Table#optimize to index all un-indexed data.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ~~filter()~~
|
### ~~filter()~~
|
||||||
|
|
||||||
> **filter**(`predicate`): `this`
|
```ts
|
||||||
|
filter(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -169,9 +203,33 @@ Use `where` instead
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fullTextSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fullTextSearch(query, options?): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### limit()
|
### limit()
|
||||||
|
|
||||||
> **limit**(`limit`): `this`
|
```ts
|
||||||
|
limit(limit): this
|
||||||
|
```
|
||||||
|
|
||||||
Set the maximum number of results to return.
|
Set the maximum number of results to return.
|
||||||
|
|
||||||
@@ -180,7 +238,7 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **limit**: `number`
|
* **limit**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -194,11 +252,13 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
### nativeExecute()
|
### nativeExecute()
|
||||||
|
|
||||||
> `protected` **nativeExecute**(`options`?): `Promise`<`RecordBatchIterator`>
|
```ts
|
||||||
|
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -212,7 +272,9 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
### nearestTo()
|
### nearestTo()
|
||||||
|
|
||||||
> **nearestTo**(`vector`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
nearestTo(vector): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
Find the nearest vectors to the given query vector.
|
Find the nearest vectors to the given query vector.
|
||||||
|
|
||||||
@@ -232,7 +294,7 @@ If there is more than one vector column you must use
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **vector**: `IntoVector`
|
* **vector**: `IntoVector`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -264,9 +326,49 @@ a default `limit` of 10 will be used.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### nearestToText()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
nearestToText(query, columns?): Query
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
|
||||||
|
* **columns?**: `string`[]
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Query`](Query.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### offset()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
offset(offset): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **offset**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### select()
|
### select()
|
||||||
|
|
||||||
> **select**(`columns`): `this`
|
```ts
|
||||||
|
select(columns): this
|
||||||
|
```
|
||||||
|
|
||||||
Return only the specified columns.
|
Return only the specified columns.
|
||||||
|
|
||||||
@@ -290,7 +392,7 @@ input to this method would be:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
* **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -317,13 +419,15 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
|||||||
|
|
||||||
### toArray()
|
### toArray()
|
||||||
|
|
||||||
> **toArray**(`options`?): `Promise`<`any`[]>
|
```ts
|
||||||
|
toArray(options?): Promise<any[]>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an array of objects.
|
Collect the results as an array of objects.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -337,13 +441,15 @@ Collect the results as an array of objects.
|
|||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
> **toArrow**(`options`?): `Promise`<`Table`<`any`>>
|
```ts
|
||||||
|
toArrow(options?): Promise<Table<any>>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an Arrow
|
Collect the results as an Arrow
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -361,7 +467,9 @@ ArrowTable.
|
|||||||
|
|
||||||
### where()
|
### where()
|
||||||
|
|
||||||
> **where**(`predicate`): `this`
|
```ts
|
||||||
|
where(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
@@ -369,7 +477,7 @@ The filter should be supplied as an SQL query string. For example:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -389,3 +497,25 @@ on the filter column(s).
|
|||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### withRowId()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
withRowId(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to return the row id in the results.
|
||||||
|
|
||||||
|
This column can be used to match results between different queries. For
|
||||||
|
example, to match results from a full text search and a vector search in
|
||||||
|
order to perform hybrid search.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||||
|
|||||||
@@ -25,11 +25,13 @@ Common methods supported by all query types
|
|||||||
|
|
||||||
### new QueryBase()
|
### new QueryBase()
|
||||||
|
|
||||||
> `protected` **new QueryBase**<`NativeQueryType`>(`inner`): [`QueryBase`](QueryBase.md)<`NativeQueryType`>
|
```ts
|
||||||
|
protected new QueryBase<NativeQueryType>(inner): QueryBase<NativeQueryType>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **inner**: `NativeQueryType` \| `Promise`<`NativeQueryType`>
|
* **inner**: `NativeQueryType` \| `Promise`<`NativeQueryType`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -39,13 +41,17 @@ Common methods supported by all query types
|
|||||||
|
|
||||||
### inner
|
### inner
|
||||||
|
|
||||||
> `protected` **inner**: `NativeQueryType` \| `Promise`<`NativeQueryType`>
|
```ts
|
||||||
|
protected inner: NativeQueryType | Promise<NativeQueryType>;
|
||||||
|
```
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
### \[asyncIterator\]()
|
### \[asyncIterator\]()
|
||||||
|
|
||||||
> **\[asyncIterator\]**(): `AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
```ts
|
||||||
|
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -59,11 +65,13 @@ Common methods supported by all query types
|
|||||||
|
|
||||||
### doCall()
|
### doCall()
|
||||||
|
|
||||||
> `protected` **doCall**(`fn`): `void`
|
```ts
|
||||||
|
protected doCall(fn): void
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **fn**
|
* **fn**
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -73,13 +81,15 @@ Common methods supported by all query types
|
|||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
> `protected` **execute**(`options`?): [`RecordBatchIterator`](RecordBatchIterator.md)
|
```ts
|
||||||
|
protected execute(options?): RecordBatchIterator
|
||||||
|
```
|
||||||
|
|
||||||
Execute the query and return the results as an
|
Execute the query and return the results as an
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -101,15 +111,16 @@ single query)
|
|||||||
|
|
||||||
### explainPlan()
|
### explainPlan()
|
||||||
|
|
||||||
> **explainPlan**(`verbose`): `Promise`<`string`>
|
```ts
|
||||||
|
explainPlan(verbose): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
Generates an explanation of the query execution plan.
|
Generates an explanation of the query execution plan.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **verbose**: `boolean` = `false`
|
* **verbose**: `boolean` = `false`
|
||||||
|
If true, provides a more detailed explanation. Defaults to false.
|
||||||
If true, provides a more detailed explanation. Defaults to false.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -130,15 +141,34 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fastSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fastSearch(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Skip searching un-indexed data. This can make search faster, but will miss
|
||||||
|
any data that is not yet indexed.
|
||||||
|
|
||||||
|
Use lancedb.Table#optimize to index all un-indexed data.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ~~filter()~~
|
### ~~filter()~~
|
||||||
|
|
||||||
> **filter**(`predicate`): `this`
|
```ts
|
||||||
|
filter(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -154,9 +184,29 @@ Use `where` instead
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fullTextSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fullTextSearch(query, options?): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### limit()
|
### limit()
|
||||||
|
|
||||||
> **limit**(`limit`): `this`
|
```ts
|
||||||
|
limit(limit): this
|
||||||
|
```
|
||||||
|
|
||||||
Set the maximum number of results to return.
|
Set the maximum number of results to return.
|
||||||
|
|
||||||
@@ -165,7 +215,7 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **limit**: `number`
|
* **limit**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -175,11 +225,13 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
### nativeExecute()
|
### nativeExecute()
|
||||||
|
|
||||||
> `protected` **nativeExecute**(`options`?): `Promise`<`RecordBatchIterator`>
|
```ts
|
||||||
|
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -187,9 +239,27 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### offset()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
offset(offset): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **offset**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### select()
|
### select()
|
||||||
|
|
||||||
> **select**(`columns`): `this`
|
```ts
|
||||||
|
select(columns): this
|
||||||
|
```
|
||||||
|
|
||||||
Return only the specified columns.
|
Return only the specified columns.
|
||||||
|
|
||||||
@@ -213,7 +283,7 @@ input to this method would be:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
* **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -236,13 +306,15 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
|||||||
|
|
||||||
### toArray()
|
### toArray()
|
||||||
|
|
||||||
> **toArray**(`options`?): `Promise`<`any`[]>
|
```ts
|
||||||
|
toArray(options?): Promise<any[]>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an array of objects.
|
Collect the results as an array of objects.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -252,13 +324,15 @@ Collect the results as an array of objects.
|
|||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
> **toArrow**(`options`?): `Promise`<`Table`<`any`>>
|
```ts
|
||||||
|
toArrow(options?): Promise<Table<any>>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an Arrow
|
Collect the results as an Arrow
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -272,7 +346,9 @@ ArrowTable.
|
|||||||
|
|
||||||
### where()
|
### where()
|
||||||
|
|
||||||
> **where**(`predicate`): `this`
|
```ts
|
||||||
|
where(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
@@ -280,7 +356,7 @@ The filter should be supplied as an SQL query string. For example:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -296,3 +372,21 @@ x > 5 OR y = 'test'
|
|||||||
Filtering performance can often be improved by creating a scalar index
|
Filtering performance can often be improved by creating a scalar index
|
||||||
on the filter column(s).
|
on the filter column(s).
|
||||||
```
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### withRowId()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
withRowId(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to return the row id in the results.
|
||||||
|
|
||||||
|
This column can be used to match results between different queries. For
|
||||||
|
example, to match results from a full text search and a vector search in
|
||||||
|
order to perform hybrid search.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|||||||
@@ -14,11 +14,13 @@
|
|||||||
|
|
||||||
### new RecordBatchIterator()
|
### new RecordBatchIterator()
|
||||||
|
|
||||||
> **new RecordBatchIterator**(`promise`?): [`RecordBatchIterator`](RecordBatchIterator.md)
|
```ts
|
||||||
|
new RecordBatchIterator(promise?): RecordBatchIterator
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **promise?**: `Promise`<`RecordBatchIterator`>
|
* **promise?**: `Promise`<`RecordBatchIterator`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -28,7 +30,9 @@
|
|||||||
|
|
||||||
### next()
|
### next()
|
||||||
|
|
||||||
> **next**(): `Promise`<`IteratorResult`<`RecordBatch`<`any`>, `any`>>
|
```ts
|
||||||
|
next(): Promise<IteratorResult<RecordBatch<any>, any>>
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ collected.
|
|||||||
|
|
||||||
### new Table()
|
### new Table()
|
||||||
|
|
||||||
> **new Table**(): [`Table`](Table.md)
|
```ts
|
||||||
|
new Table(): Table
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -31,7 +33,9 @@ collected.
|
|||||||
|
|
||||||
### name
|
### name
|
||||||
|
|
||||||
> `get` `abstract` **name**(): `string`
|
```ts
|
||||||
|
get abstract name(): string
|
||||||
|
```
|
||||||
|
|
||||||
Returns the name of the table
|
Returns the name of the table
|
||||||
|
|
||||||
@@ -43,17 +47,18 @@ Returns the name of the table
|
|||||||
|
|
||||||
### add()
|
### add()
|
||||||
|
|
||||||
> `abstract` **add**(`data`, `options`?): `Promise`<`void`>
|
```ts
|
||||||
|
abstract add(data, options?): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Insert records into this Table.
|
Insert records into this Table.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: [`Data`](../type-aliases/Data.md)
|
* **data**: [`Data`](../type-aliases/Data.md)
|
||||||
|
Records to be inserted into the Table
|
||||||
|
|
||||||
Records to be inserted into the Table
|
* **options?**: `Partial`<[`AddDataOptions`](../interfaces/AddDataOptions.md)>
|
||||||
|
|
||||||
• **options?**: `Partial`<[`AddDataOptions`](../interfaces/AddDataOptions.md)>
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -63,18 +68,19 @@ Records to be inserted into the Table
|
|||||||
|
|
||||||
### addColumns()
|
### addColumns()
|
||||||
|
|
||||||
> `abstract` **addColumns**(`newColumnTransforms`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract addColumns(newColumnTransforms): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||||
|
pairs of column names and
|
||||||
pairs of column names and
|
the SQL expression to use to calculate the value of the new column. These
|
||||||
the SQL expression to use to calculate the value of the new column. These
|
expressions will be evaluated for each row in the table, and can
|
||||||
expressions will be evaluated for each row in the table, and can
|
reference existing columns in the table.
|
||||||
reference existing columns in the table.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -84,16 +90,17 @@ reference existing columns in the table.
|
|||||||
|
|
||||||
### alterColumns()
|
### alterColumns()
|
||||||
|
|
||||||
> `abstract` **alterColumns**(`columnAlterations`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract alterColumns(columnAlterations): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Alter the name or nullability of columns.
|
Alter the name or nullability of columns.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **columnAlterations**: [`ColumnAlteration`](../interfaces/ColumnAlteration.md)[]
|
* **columnAlterations**: [`ColumnAlteration`](../interfaces/ColumnAlteration.md)[]
|
||||||
|
One or more alterations to
|
||||||
One or more alterations to
|
apply to columns.
|
||||||
apply to columns.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -103,7 +110,9 @@ apply to columns.
|
|||||||
|
|
||||||
### checkout()
|
### checkout()
|
||||||
|
|
||||||
> `abstract` **checkout**(`version`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract checkout(version): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Checks out a specific version of the table _This is an in-place operation._
|
Checks out a specific version of the table _This is an in-place operation._
|
||||||
|
|
||||||
@@ -116,9 +125,8 @@ wish to return to standard mode, call `checkoutLatest`.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **version**: `number`
|
* **version**: `number`
|
||||||
|
The version to checkout
|
||||||
The version to checkout
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -144,7 +152,9 @@ console.log(await table.version()); // 2
|
|||||||
|
|
||||||
### checkoutLatest()
|
### checkoutLatest()
|
||||||
|
|
||||||
> `abstract` **checkoutLatest**(): `Promise`<`void`>
|
```ts
|
||||||
|
abstract checkoutLatest(): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Checkout the latest version of the table. _This is an in-place operation._
|
Checkout the latest version of the table. _This is an in-place operation._
|
||||||
|
|
||||||
@@ -159,7 +169,9 @@ version of the table.
|
|||||||
|
|
||||||
### close()
|
### close()
|
||||||
|
|
||||||
> `abstract` **close**(): `void`
|
```ts
|
||||||
|
abstract close(): void
|
||||||
|
```
|
||||||
|
|
||||||
Close the table, releasing any underlying resources.
|
Close the table, releasing any underlying resources.
|
||||||
|
|
||||||
@@ -175,13 +187,15 @@ Any attempt to use the table after it is closed will result in an error.
|
|||||||
|
|
||||||
### countRows()
|
### countRows()
|
||||||
|
|
||||||
> `abstract` **countRows**(`filter`?): `Promise`<`number`>
|
```ts
|
||||||
|
abstract countRows(filter?): Promise<number>
|
||||||
|
```
|
||||||
|
|
||||||
Count the total number of rows in the dataset.
|
Count the total number of rows in the dataset.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **filter?**: `string`
|
* **filter?**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -191,7 +205,9 @@ Count the total number of rows in the dataset.
|
|||||||
|
|
||||||
### createIndex()
|
### createIndex()
|
||||||
|
|
||||||
> `abstract` **createIndex**(`column`, `options`?): `Promise`<`void`>
|
```ts
|
||||||
|
abstract createIndex(column, options?): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Create an index to speed up queries.
|
Create an index to speed up queries.
|
||||||
|
|
||||||
@@ -202,9 +218,9 @@ vector and non-vector searches)
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **column**: `string`
|
* **column**: `string`
|
||||||
|
|
||||||
• **options?**: `Partial`<[`IndexOptions`](../interfaces/IndexOptions.md)>
|
* **options?**: `Partial`<[`IndexOptions`](../interfaces/IndexOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -245,13 +261,15 @@ await table.createIndex("my_float_col");
|
|||||||
|
|
||||||
### delete()
|
### delete()
|
||||||
|
|
||||||
> `abstract` **delete**(`predicate`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract delete(predicate): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Delete the rows that satisfy the predicate.
|
Delete the rows that satisfy the predicate.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -261,7 +279,9 @@ Delete the rows that satisfy the predicate.
|
|||||||
|
|
||||||
### display()
|
### display()
|
||||||
|
|
||||||
> `abstract` **display**(): `string`
|
```ts
|
||||||
|
abstract display(): string
|
||||||
|
```
|
||||||
|
|
||||||
Return a brief description of the table
|
Return a brief description of the table
|
||||||
|
|
||||||
@@ -273,7 +293,9 @@ Return a brief description of the table
|
|||||||
|
|
||||||
### dropColumns()
|
### dropColumns()
|
||||||
|
|
||||||
> `abstract` **dropColumns**(`columnNames`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract dropColumns(columnNames): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Drop one or more columns from the dataset
|
Drop one or more columns from the dataset
|
||||||
|
|
||||||
@@ -284,11 +306,10 @@ then call ``cleanup_files`` to remove the old files.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **columnNames**: `string`[]
|
* **columnNames**: `string`[]
|
||||||
|
The names of the columns to drop. These can
|
||||||
The names of the columns to drop. These can
|
be nested column references (e.g. "a.b.c") or top-level column names
|
||||||
be nested column references (e.g. "a.b.c") or top-level column names
|
(e.g. "a").
|
||||||
(e.g. "a").
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -298,15 +319,16 @@ be nested column references (e.g. "a.b.c") or top-level column names
|
|||||||
|
|
||||||
### indexStats()
|
### indexStats()
|
||||||
|
|
||||||
> `abstract` **indexStats**(`name`): `Promise`<`undefined` \| [`IndexStatistics`](../interfaces/IndexStatistics.md)>
|
```ts
|
||||||
|
abstract indexStats(name): Promise<undefined | IndexStatistics>
|
||||||
|
```
|
||||||
|
|
||||||
List all the stats of a specified index
|
List all the stats of a specified index
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **name**: `string`
|
* **name**: `string`
|
||||||
|
The name of the index.
|
||||||
The name of the index.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -318,7 +340,9 @@ The stats of the index. If the index does not exist, it will return undefined
|
|||||||
|
|
||||||
### isOpen()
|
### isOpen()
|
||||||
|
|
||||||
> `abstract` **isOpen**(): `boolean`
|
```ts
|
||||||
|
abstract isOpen(): boolean
|
||||||
|
```
|
||||||
|
|
||||||
Return true if the table has not been closed
|
Return true if the table has not been closed
|
||||||
|
|
||||||
@@ -330,7 +354,9 @@ Return true if the table has not been closed
|
|||||||
|
|
||||||
### listIndices()
|
### listIndices()
|
||||||
|
|
||||||
> `abstract` **listIndices**(): `Promise`<[`IndexConfig`](../interfaces/IndexConfig.md)[]>
|
```ts
|
||||||
|
abstract listIndices(): Promise<IndexConfig[]>
|
||||||
|
```
|
||||||
|
|
||||||
List all indices that have been created with [Table.createIndex](Table.md#createindex)
|
List all indices that have been created with [Table.createIndex](Table.md#createindex)
|
||||||
|
|
||||||
@@ -340,13 +366,29 @@ List all indices that have been created with [Table.createIndex](Table.md#create
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### listVersions()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract listVersions(): Promise<Version[]>
|
||||||
|
```
|
||||||
|
|
||||||
|
List all the versions of the table
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`Version`[]>
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### mergeInsert()
|
### mergeInsert()
|
||||||
|
|
||||||
> `abstract` **mergeInsert**(`on`): `MergeInsertBuilder`
|
```ts
|
||||||
|
abstract mergeInsert(on): MergeInsertBuilder
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **on**: `string` \| `string`[]
|
* **on**: `string` \| `string`[]
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -356,7 +398,9 @@ List all indices that have been created with [Table.createIndex](Table.md#create
|
|||||||
|
|
||||||
### optimize()
|
### optimize()
|
||||||
|
|
||||||
> `abstract` **optimize**(`options`?): `Promise`<`OptimizeStats`>
|
```ts
|
||||||
|
abstract optimize(options?): Promise<OptimizeStats>
|
||||||
|
```
|
||||||
|
|
||||||
Optimize the on-disk data and indices for better performance.
|
Optimize the on-disk data and indices for better performance.
|
||||||
|
|
||||||
@@ -388,7 +432,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`OptimizeOptions`>
|
* **options?**: `Partial`<[`OptimizeOptions`](../interfaces/OptimizeOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -398,7 +442,9 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
|||||||
|
|
||||||
### query()
|
### query()
|
||||||
|
|
||||||
> `abstract` **query**(): [`Query`](Query.md)
|
```ts
|
||||||
|
abstract query(): Query
|
||||||
|
```
|
||||||
|
|
||||||
Create a [Query](Query.md) Builder.
|
Create a [Query](Query.md) Builder.
|
||||||
|
|
||||||
@@ -466,7 +512,9 @@ for await (const batch of table.query()) {
|
|||||||
|
|
||||||
### restore()
|
### restore()
|
||||||
|
|
||||||
> `abstract` **restore**(): `Promise`<`void`>
|
```ts
|
||||||
|
abstract restore(): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Restore the table to the currently checked out version
|
Restore the table to the currently checked out version
|
||||||
|
|
||||||
@@ -487,7 +535,9 @@ out state and the read_consistency_interval, if any, will apply.
|
|||||||
|
|
||||||
### schema()
|
### schema()
|
||||||
|
|
||||||
> `abstract` **schema**(): `Promise`<`Schema`<`any`>>
|
```ts
|
||||||
|
abstract schema(): Promise<Schema<any>>
|
||||||
|
```
|
||||||
|
|
||||||
Get the schema of the table.
|
Get the schema of the table.
|
||||||
|
|
||||||
@@ -499,61 +549,41 @@ Get the schema of the table.
|
|||||||
|
|
||||||
### search()
|
### search()
|
||||||
|
|
||||||
#### search(query)
|
```ts
|
||||||
|
abstract search(
|
||||||
> `abstract` **search**(`query`, `queryType`, `ftsColumns`): [`VectorQuery`](VectorQuery.md)
|
query,
|
||||||
|
queryType?,
|
||||||
|
ftsColumns?): VectorQuery | Query
|
||||||
|
```
|
||||||
|
|
||||||
Create a search query to find the nearest neighbors
|
Create a search query to find the nearest neighbors
|
||||||
of the given query vector, or the documents
|
of the given query
|
||||||
with the highest relevance to the query string.
|
|
||||||
|
|
||||||
##### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **query**: `string`
|
* **query**: `string` \| `IntoVector`
|
||||||
|
the query, a vector or string
|
||||||
|
|
||||||
the query. This will be converted to a vector using the table's provided embedding function,
|
* **queryType?**: `string`
|
||||||
or the query string for full-text search if `queryType` is "fts".
|
the type of the query, "vector", "fts", or "auto"
|
||||||
|
|
||||||
• **queryType**: `string` = `"auto"` \| `"fts"`
|
* **ftsColumns?**: `string` \| `string`[]
|
||||||
|
the columns to search in for full text search
|
||||||
|
for now, only one column can be searched at a time.
|
||||||
|
when "auto" is used, if the query is a string and an embedding function is defined, it will be treated as a vector query
|
||||||
|
if the query is a string and no embedding function is defined, it will be treated as a full text search query
|
||||||
|
|
||||||
the type of query to run. If "auto", the query type will be determined based on the query.
|
#### Returns
|
||||||
|
|
||||||
• **ftsColumns**: `string[] | str` = undefined
|
[`VectorQuery`](VectorQuery.md) \| [`Query`](Query.md)
|
||||||
|
|
||||||
the columns to search in. If not provided, all indexed columns will be searched.
|
|
||||||
|
|
||||||
For now, this can support to search only one column.
|
|
||||||
|
|
||||||
##### Returns
|
|
||||||
|
|
||||||
[`VectorQuery`](VectorQuery.md)
|
|
||||||
|
|
||||||
##### Note
|
|
||||||
|
|
||||||
If no embedding functions are defined in the table, this will error when collecting the results.
|
|
||||||
|
|
||||||
#### search(query)
|
|
||||||
|
|
||||||
> `abstract` **search**(`query`): [`VectorQuery`](VectorQuery.md)
|
|
||||||
|
|
||||||
Create a search query to find the nearest neighbors
|
|
||||||
of the given query vector
|
|
||||||
|
|
||||||
##### Parameters
|
|
||||||
|
|
||||||
• **query**: `IntoVector`
|
|
||||||
|
|
||||||
the query vector
|
|
||||||
|
|
||||||
##### Returns
|
|
||||||
|
|
||||||
[`VectorQuery`](VectorQuery.md)
|
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
> `abstract` **toArrow**(): `Promise`<`Table`<`any`>>
|
```ts
|
||||||
|
abstract toArrow(): Promise<Table<any>>
|
||||||
|
```
|
||||||
|
|
||||||
Return the table as an arrow table
|
Return the table as an arrow table
|
||||||
|
|
||||||
@@ -567,13 +597,15 @@ Return the table as an arrow table
|
|||||||
|
|
||||||
#### update(opts)
|
#### update(opts)
|
||||||
|
|
||||||
> `abstract` **update**(`opts`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract update(opts): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **opts**: `object` & `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
* **opts**: `object` & `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -587,13 +619,15 @@ table.update({where:"x = 2", values:{"vector": [10, 10]}})
|
|||||||
|
|
||||||
#### update(opts)
|
#### update(opts)
|
||||||
|
|
||||||
> `abstract` **update**(`opts`): `Promise`<`void`>
|
```ts
|
||||||
|
abstract update(opts): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **opts**: `object` & `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
* **opts**: `object` & `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -607,7 +641,9 @@ table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
|
|||||||
|
|
||||||
#### update(updates, options)
|
#### update(updates, options)
|
||||||
|
|
||||||
> `abstract` **update**(`updates`, `options`?): `Promise`<`void`>
|
```ts
|
||||||
|
abstract update(updates, options?): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
|
|
||||||
@@ -626,20 +662,17 @@ repeatedly calilng this method.
|
|||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **updates**: `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
* **updates**: `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
|
the
|
||||||
|
columns to update
|
||||||
|
Keys in the map should specify the name of the column to update.
|
||||||
|
Values in the map provide the new value of the column. These can
|
||||||
|
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
||||||
|
based on the row being updated (e.g. "my_col + 1")
|
||||||
|
|
||||||
the
|
* **options?**: `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
||||||
columns to update
|
additional options to control
|
||||||
|
the update behavior
|
||||||
Keys in the map should specify the name of the column to update.
|
|
||||||
Values in the map provide the new value of the column. These can
|
|
||||||
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
|
||||||
based on the row being updated (e.g. "my_col + 1")
|
|
||||||
|
|
||||||
• **options?**: `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
|
||||||
|
|
||||||
additional options to control
|
|
||||||
the update behavior
|
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -649,7 +682,9 @@ the update behavior
|
|||||||
|
|
||||||
### vectorSearch()
|
### vectorSearch()
|
||||||
|
|
||||||
> `abstract` **vectorSearch**(`vector`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
abstract vectorSearch(vector): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
Search the table with a given query vector.
|
Search the table with a given query vector.
|
||||||
|
|
||||||
@@ -659,7 +694,7 @@ by `query`.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **vector**: `IntoVector`
|
* **vector**: `IntoVector`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -673,7 +708,9 @@ by `query`.
|
|||||||
|
|
||||||
### version()
|
### version()
|
||||||
|
|
||||||
> `abstract` **version**(): `Promise`<`number`>
|
```ts
|
||||||
|
abstract version(): Promise<number>
|
||||||
|
```
|
||||||
|
|
||||||
Retrieve the version of the table
|
Retrieve the version of the table
|
||||||
|
|
||||||
@@ -685,15 +722,20 @@ Retrieve the version of the table
|
|||||||
|
|
||||||
### parseTableData()
|
### parseTableData()
|
||||||
|
|
||||||
> `static` **parseTableData**(`data`, `options`?, `streaming`?): `Promise`<`object`>
|
```ts
|
||||||
|
static parseTableData(
|
||||||
|
data,
|
||||||
|
options?,
|
||||||
|
streaming?): Promise<object>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
* **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
||||||
|
|
||||||
• **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
|
||||||
• **streaming?**: `boolean` = `false`
|
* **streaming?**: `boolean` = `false`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -701,8 +743,12 @@ Retrieve the version of the table
|
|||||||
|
|
||||||
##### buf
|
##### buf
|
||||||
|
|
||||||
> **buf**: `Buffer`
|
```ts
|
||||||
|
buf: Buffer;
|
||||||
|
```
|
||||||
|
|
||||||
##### mode
|
##### mode
|
||||||
|
|
||||||
> **mode**: `string`
|
```ts
|
||||||
|
mode: string;
|
||||||
|
```
|
||||||
|
|||||||
@@ -10,11 +10,13 @@
|
|||||||
|
|
||||||
### new VectorColumnOptions()
|
### new VectorColumnOptions()
|
||||||
|
|
||||||
> **new VectorColumnOptions**(`values`?): [`VectorColumnOptions`](VectorColumnOptions.md)
|
```ts
|
||||||
|
new VectorColumnOptions(values?): VectorColumnOptions
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **values?**: `Partial`<[`VectorColumnOptions`](VectorColumnOptions.md)>
|
* **values?**: `Partial`<[`VectorColumnOptions`](VectorColumnOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -24,6 +26,8 @@
|
|||||||
|
|
||||||
### type
|
### type
|
||||||
|
|
||||||
> **type**: `Float`<`Floats`>
|
```ts
|
||||||
|
type: Float<Floats>;
|
||||||
|
```
|
||||||
|
|
||||||
Vector column type.
|
Vector column type.
|
||||||
|
|||||||
@@ -18,11 +18,13 @@ This builder can be reused to execute the query many times.
|
|||||||
|
|
||||||
### new VectorQuery()
|
### new VectorQuery()
|
||||||
|
|
||||||
> **new VectorQuery**(`inner`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
new VectorQuery(inner): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **inner**: `VectorQuery` \| `Promise`<`VectorQuery`>
|
* **inner**: `VectorQuery` \| `Promise`<`VectorQuery`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -36,7 +38,9 @@ This builder can be reused to execute the query many times.
|
|||||||
|
|
||||||
### inner
|
### inner
|
||||||
|
|
||||||
> `protected` **inner**: `VectorQuery` \| `Promise`<`VectorQuery`>
|
```ts
|
||||||
|
protected inner: VectorQuery | Promise<VectorQuery>;
|
||||||
|
```
|
||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
@@ -46,7 +50,9 @@ This builder can be reused to execute the query many times.
|
|||||||
|
|
||||||
### \[asyncIterator\]()
|
### \[asyncIterator\]()
|
||||||
|
|
||||||
> **\[asyncIterator\]**(): `AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
```ts
|
||||||
|
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -58,9 +64,27 @@ This builder can be reused to execute the query many times.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### addQueryVector()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
addQueryVector(vector): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **vector**: `IntoVector`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`VectorQuery`](VectorQuery.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### bypassVectorIndex()
|
### bypassVectorIndex()
|
||||||
|
|
||||||
> **bypassVectorIndex**(): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
bypassVectorIndex(): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
If this is called then any vector index is skipped
|
If this is called then any vector index is skipped
|
||||||
|
|
||||||
@@ -78,7 +102,9 @@ calculate your recall to select an appropriate value for nprobes.
|
|||||||
|
|
||||||
### column()
|
### column()
|
||||||
|
|
||||||
> **column**(`column`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
column(column): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
Set the vector column to query
|
Set the vector column to query
|
||||||
|
|
||||||
@@ -87,7 +113,7 @@ the call to
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **column**: `string`
|
* **column**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -104,7 +130,9 @@ whose data type is a fixed-size-list of floats.
|
|||||||
|
|
||||||
### distanceType()
|
### distanceType()
|
||||||
|
|
||||||
> **distanceType**(`distanceType`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
distanceType(distanceType): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
Set the distance metric to use
|
Set the distance metric to use
|
||||||
|
|
||||||
@@ -114,7 +142,7 @@ use. See
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **distanceType**: `"l2"` \| `"cosine"` \| `"dot"`
|
* **distanceType**: `"l2"` \| `"cosine"` \| `"dot"`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -135,11 +163,13 @@ By default "l2" is used.
|
|||||||
|
|
||||||
### doCall()
|
### doCall()
|
||||||
|
|
||||||
> `protected` **doCall**(`fn`): `void`
|
```ts
|
||||||
|
protected doCall(fn): void
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **fn**
|
* **fn**
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -151,15 +181,41 @@ By default "l2" is used.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### ef()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
ef(ef): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Set the number of candidates to consider during the search
|
||||||
|
|
||||||
|
This argument is only used when the vector column has an HNSW index.
|
||||||
|
If there is no index then this value is ignored.
|
||||||
|
|
||||||
|
Increasing this value will increase the recall of your query but will
|
||||||
|
also increase the latency of your query. The default value is 1.5*limit.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **ef**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`VectorQuery`](VectorQuery.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
> `protected` **execute**(`options`?): [`RecordBatchIterator`](RecordBatchIterator.md)
|
```ts
|
||||||
|
protected execute(options?): RecordBatchIterator
|
||||||
|
```
|
||||||
|
|
||||||
Execute the query and return the results as an
|
Execute the query and return the results as an
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -185,15 +241,16 @@ single query)
|
|||||||
|
|
||||||
### explainPlan()
|
### explainPlan()
|
||||||
|
|
||||||
> **explainPlan**(`verbose`): `Promise`<`string`>
|
```ts
|
||||||
|
explainPlan(verbose): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
Generates an explanation of the query execution plan.
|
Generates an explanation of the query execution plan.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **verbose**: `boolean` = `false`
|
* **verbose**: `boolean` = `false`
|
||||||
|
If true, provides a more detailed explanation. Defaults to false.
|
||||||
If true, provides a more detailed explanation. Defaults to false.
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -218,15 +275,38 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fastSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fastSearch(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Skip searching un-indexed data. This can make search faster, but will miss
|
||||||
|
any data that is not yet indexed.
|
||||||
|
|
||||||
|
Use lancedb.Table#optimize to index all un-indexed data.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ~~filter()~~
|
### ~~filter()~~
|
||||||
|
|
||||||
> **filter**(`predicate`): `this`
|
```ts
|
||||||
|
filter(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -246,9 +326,33 @@ Use `where` instead
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### fullTextSearch()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
fullTextSearch(query, options?): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
|
||||||
|
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### limit()
|
### limit()
|
||||||
|
|
||||||
> **limit**(`limit`): `this`
|
```ts
|
||||||
|
limit(limit): this
|
||||||
|
```
|
||||||
|
|
||||||
Set the maximum number of results to return.
|
Set the maximum number of results to return.
|
||||||
|
|
||||||
@@ -257,7 +361,7 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **limit**: `number`
|
* **limit**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -271,11 +375,13 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
### nativeExecute()
|
### nativeExecute()
|
||||||
|
|
||||||
> `protected` **nativeExecute**(`options`?): `Promise`<`RecordBatchIterator`>
|
```ts
|
||||||
|
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -289,7 +395,9 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
### nprobes()
|
### nprobes()
|
||||||
|
|
||||||
> **nprobes**(`nprobes`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
nprobes(nprobes): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
Set the number of partitions to search (probe)
|
Set the number of partitions to search (probe)
|
||||||
|
|
||||||
@@ -314,7 +422,7 @@ you the desired recall.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **nprobes**: `number`
|
* **nprobes**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -322,9 +430,31 @@ you the desired recall.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### offset()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
offset(offset): this
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **offset**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### postfilter()
|
### postfilter()
|
||||||
|
|
||||||
> **postfilter**(): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
postfilter(): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
If this is called then filtering will happen after the vector search instead of
|
If this is called then filtering will happen after the vector search instead of
|
||||||
before.
|
before.
|
||||||
@@ -356,7 +486,9 @@ factor can often help restore some of the results lost by post filtering.
|
|||||||
|
|
||||||
### refineFactor()
|
### refineFactor()
|
||||||
|
|
||||||
> **refineFactor**(`refineFactor`): [`VectorQuery`](VectorQuery.md)
|
```ts
|
||||||
|
refineFactor(refineFactor): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
A multiplier to control how many additional rows are taken during the refine step
|
A multiplier to control how many additional rows are taken during the refine step
|
||||||
|
|
||||||
@@ -388,7 +520,7 @@ distance between the query vector and the actual uncompressed vector.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **refineFactor**: `number`
|
* **refineFactor**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -398,7 +530,9 @@ distance between the query vector and the actual uncompressed vector.
|
|||||||
|
|
||||||
### select()
|
### select()
|
||||||
|
|
||||||
> **select**(`columns`): `this`
|
```ts
|
||||||
|
select(columns): this
|
||||||
|
```
|
||||||
|
|
||||||
Return only the specified columns.
|
Return only the specified columns.
|
||||||
|
|
||||||
@@ -422,7 +556,7 @@ input to this method would be:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
* **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -449,13 +583,15 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
|||||||
|
|
||||||
### toArray()
|
### toArray()
|
||||||
|
|
||||||
> **toArray**(`options`?): `Promise`<`any`[]>
|
```ts
|
||||||
|
toArray(options?): Promise<any[]>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an array of objects.
|
Collect the results as an array of objects.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -469,13 +605,15 @@ Collect the results as an array of objects.
|
|||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
> **toArrow**(`options`?): `Promise`<`Table`<`any`>>
|
```ts
|
||||||
|
toArrow(options?): Promise<Table<any>>
|
||||||
|
```
|
||||||
|
|
||||||
Collect the results as an Arrow
|
Collect the results as an Arrow
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **options?**: `Partial`<`QueryExecutionOptions`>
|
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -493,7 +631,9 @@ ArrowTable.
|
|||||||
|
|
||||||
### where()
|
### where()
|
||||||
|
|
||||||
> **where**(`predicate`): `this`
|
```ts
|
||||||
|
where(predicate): this
|
||||||
|
```
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
A filter statement to be applied to this query.
|
||||||
|
|
||||||
@@ -501,7 +641,7 @@ The filter should be supplied as an SQL query string. For example:
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **predicate**: `string`
|
* **predicate**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -521,3 +661,25 @@ on the filter column(s).
|
|||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### withRowId()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
withRowId(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to return the row id in the results.
|
||||||
|
|
||||||
|
This column can be used to match results between different queries. For
|
||||||
|
example, to match results from a full text search and a vector search in
|
||||||
|
order to perform hybrid search.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||||
|
|||||||
@@ -12,16 +12,22 @@ Write mode for writing a table.
|
|||||||
|
|
||||||
### Append
|
### Append
|
||||||
|
|
||||||
> **Append**: `"Append"`
|
```ts
|
||||||
|
Append: "Append";
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### Create
|
### Create
|
||||||
|
|
||||||
> **Create**: `"Create"`
|
```ts
|
||||||
|
Create: "Create";
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### Overwrite
|
### Overwrite
|
||||||
|
|
||||||
> **Overwrite**: `"Overwrite"`
|
```ts
|
||||||
|
Overwrite: "Overwrite";
|
||||||
|
```
|
||||||
|
|||||||
@@ -8,7 +8,9 @@
|
|||||||
|
|
||||||
## connect(uri, opts)
|
## connect(uri, opts)
|
||||||
|
|
||||||
> **connect**(`uri`, `opts`?): `Promise`<[`Connection`](../classes/Connection.md)>
|
```ts
|
||||||
|
function connect(uri, opts?): Promise<Connection>
|
||||||
|
```
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI.
|
Connect to a LanceDB instance at the given URI.
|
||||||
|
|
||||||
@@ -20,12 +22,11 @@ Accepted formats:
|
|||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
• **uri**: `string`
|
* **uri**: `string`
|
||||||
|
The uri of the database. If the database uri starts
|
||||||
|
with `db://` then it connects to a remote database.
|
||||||
|
|
||||||
The uri of the database. If the database uri starts
|
* **opts?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||||
with `db://` then it connects to a remote database.
|
|
||||||
|
|
||||||
• **opts?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md) \| `RemoteConnectionOptions`>
|
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
@@ -50,7 +51,9 @@ const conn = await connect(
|
|||||||
|
|
||||||
## connect(opts)
|
## connect(opts)
|
||||||
|
|
||||||
> **connect**(`opts`): `Promise`<[`Connection`](../classes/Connection.md)>
|
```ts
|
||||||
|
function connect(opts): Promise<Connection>
|
||||||
|
```
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI.
|
Connect to a LanceDB instance at the given URI.
|
||||||
|
|
||||||
@@ -62,7 +65,7 @@ Accepted formats:
|
|||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
• **opts**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md) \| `RemoteConnectionOptions`> & `object`
|
* **opts**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> & `object`
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,12 @@
|
|||||||
|
|
||||||
# Function: makeArrowTable()
|
# Function: makeArrowTable()
|
||||||
|
|
||||||
> **makeArrowTable**(`data`, `options`?, `metadata`?): `ArrowTable`
|
```ts
|
||||||
|
function makeArrowTable(
|
||||||
|
data,
|
||||||
|
options?,
|
||||||
|
metadata?): ArrowTable
|
||||||
|
```
|
||||||
|
|
||||||
An enhanced version of the makeTable function from Apache Arrow
|
An enhanced version of the makeTable function from Apache Arrow
|
||||||
that supports nested fields and embeddings columns.
|
that supports nested fields and embeddings columns.
|
||||||
@@ -40,11 +45,11 @@ rules are as follows:
|
|||||||
|
|
||||||
## Parameters
|
## Parameters
|
||||||
|
|
||||||
• **data**: `Record`<`string`, `unknown`>[]
|
* **data**: `Record`<`string`, `unknown`>[]
|
||||||
|
|
||||||
• **options?**: `Partial`<[`MakeArrowTableOptions`](../classes/MakeArrowTableOptions.md)>
|
* **options?**: `Partial`<[`MakeArrowTableOptions`](../classes/MakeArrowTableOptions.md)>
|
||||||
|
|
||||||
• **metadata?**: `Map`<`string`, `string`>
|
* **metadata?**: `Map`<`string`, `string`>
|
||||||
|
|
||||||
## Returns
|
## Returns
|
||||||
|
|
||||||
|
|||||||
@@ -28,17 +28,19 @@
|
|||||||
|
|
||||||
- [AddColumnsSql](interfaces/AddColumnsSql.md)
|
- [AddColumnsSql](interfaces/AddColumnsSql.md)
|
||||||
- [AddDataOptions](interfaces/AddDataOptions.md)
|
- [AddDataOptions](interfaces/AddDataOptions.md)
|
||||||
|
- [ClientConfig](interfaces/ClientConfig.md)
|
||||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||||
- [IndexConfig](interfaces/IndexConfig.md)
|
- [IndexConfig](interfaces/IndexConfig.md)
|
||||||
- [IndexMetadata](interfaces/IndexMetadata.md)
|
|
||||||
- [IndexOptions](interfaces/IndexOptions.md)
|
- [IndexOptions](interfaces/IndexOptions.md)
|
||||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||||
- [FtsOptions](interfaces/FtsOptions.md)
|
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||||
|
- [RetryConfig](interfaces/RetryConfig.md)
|
||||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||||
|
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||||
- [WriteOptions](interfaces/WriteOptions.md)
|
- [WriteOptions](interfaces/WriteOptions.md)
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,9 @@ A definition of a new column to add to a table.
|
|||||||
|
|
||||||
### name
|
### name
|
||||||
|
|
||||||
> **name**: `string`
|
```ts
|
||||||
|
name: string;
|
||||||
|
```
|
||||||
|
|
||||||
The name of the new column.
|
The name of the new column.
|
||||||
|
|
||||||
@@ -20,7 +22,9 @@ The name of the new column.
|
|||||||
|
|
||||||
### valueSql
|
### valueSql
|
||||||
|
|
||||||
> **valueSql**: `string`
|
```ts
|
||||||
|
valueSql: string;
|
||||||
|
```
|
||||||
|
|
||||||
The values to populate the new column with, as a SQL expression.
|
The values to populate the new column with, as a SQL expression.
|
||||||
The expression can reference other columns in the table.
|
The expression can reference other columns in the table.
|
||||||
|
|||||||
@@ -12,7 +12,9 @@ Options for adding data to a table.
|
|||||||
|
|
||||||
### mode
|
### mode
|
||||||
|
|
||||||
> **mode**: `"append"` \| `"overwrite"`
|
```ts
|
||||||
|
mode: "append" | "overwrite";
|
||||||
|
```
|
||||||
|
|
||||||
If "append" (the default) then the new data will be added to the table
|
If "append" (the default) then the new data will be added to the table
|
||||||
|
|
||||||
|
|||||||
31
docs/src/js/interfaces/ClientConfig.md
Normal file
31
docs/src/js/interfaces/ClientConfig.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / ClientConfig
|
||||||
|
|
||||||
|
# Interface: ClientConfig
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### retryConfig?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional retryConfig: RetryConfig;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### timeoutConfig?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional timeoutConfig: TimeoutConfig;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### userAgent?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional userAgent: string;
|
||||||
|
```
|
||||||
@@ -13,9 +13,29 @@ must be provided.
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
|
### dataType?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional dataType: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
A new data type for the column. If not provided then the data type will not be changed.
|
||||||
|
Changing data types is limited to casting to the same general type. For example, these
|
||||||
|
changes are valid:
|
||||||
|
* `int32` -> `int64` (integers)
|
||||||
|
* `double` -> `float` (floats)
|
||||||
|
* `string` -> `large_string` (strings)
|
||||||
|
But these changes are not:
|
||||||
|
* `int32` -> `double` (mix integers and floats)
|
||||||
|
* `string` -> `int32` (mix strings and integers)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### nullable?
|
### nullable?
|
||||||
|
|
||||||
> `optional` **nullable**: `boolean`
|
```ts
|
||||||
|
optional nullable: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||||
|
|
||||||
@@ -23,7 +43,9 @@ Set the new nullability. Note that a nullable column cannot be made non-nullable
|
|||||||
|
|
||||||
### path
|
### path
|
||||||
|
|
||||||
> **path**: `string`
|
```ts
|
||||||
|
path: string;
|
||||||
|
```
|
||||||
|
|
||||||
The path to the column to alter. This is a dot-separated path to the column.
|
The path to the column to alter. This is a dot-separated path to the column.
|
||||||
If it is a top-level column then it is just the name of the column. If it is
|
If it is a top-level column then it is just the name of the column. If it is
|
||||||
@@ -34,7 +56,9 @@ a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
|||||||
|
|
||||||
### rename?
|
### rename?
|
||||||
|
|
||||||
> `optional` **rename**: `string`
|
```ts
|
||||||
|
optional rename: string;
|
||||||
|
```
|
||||||
|
|
||||||
The new name of the column. If not provided then the name will not be changed.
|
The new name of the column. If not provided then the name will not be changed.
|
||||||
This must be distinct from the names of all other columns in the table.
|
This must be distinct from the names of all other columns in the table.
|
||||||
|
|||||||
@@ -8,9 +8,44 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
|
### apiKey?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional apiKey: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB cloud only): the API key to use with LanceDB Cloud.
|
||||||
|
|
||||||
|
Can also be set via the environment variable `LANCEDB_API_KEY`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### clientConfig?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional clientConfig: ClientConfig;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB cloud only): configuration for the remote HTTP client.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### hostOverride?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional hostOverride: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB cloud only): the host to use for LanceDB cloud. Used
|
||||||
|
for testing purposes.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### readConsistencyInterval?
|
### readConsistencyInterval?
|
||||||
|
|
||||||
> `optional` **readConsistencyInterval**: `number`
|
```ts
|
||||||
|
optional readConsistencyInterval: number;
|
||||||
|
```
|
||||||
|
|
||||||
(For LanceDB OSS only): The interval, in seconds, at which to check for
|
(For LanceDB OSS only): The interval, in seconds, at which to check for
|
||||||
updates to the table from other processes. If None, then consistency is not
|
updates to the table from other processes. If None, then consistency is not
|
||||||
@@ -24,9 +59,22 @@ always consistent.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### region?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional region: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB cloud only): the region to use for LanceDB cloud.
|
||||||
|
Defaults to 'us-east-1'.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### storageOptions?
|
### storageOptions?
|
||||||
|
|
||||||
> `optional` **storageOptions**: `Record`<`string`, `string`>
|
```ts
|
||||||
|
optional storageOptions: Record<string, string>;
|
||||||
|
```
|
||||||
|
|
||||||
(For LanceDB OSS only): configuration for object storage.
|
(For LanceDB OSS only): configuration for object storage.
|
||||||
|
|
||||||
|
|||||||
@@ -8,15 +8,46 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
|
### dataStorageVersion?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional dataStorageVersion: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
The version of the data storage format to use.
|
||||||
|
|
||||||
|
The default is `stable`.
|
||||||
|
Set to "legacy" to use the old format.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### embeddingFunction?
|
### embeddingFunction?
|
||||||
|
|
||||||
> `optional` **embeddingFunction**: [`EmbeddingFunctionConfig`](../namespaces/embedding/interfaces/EmbeddingFunctionConfig.md)
|
```ts
|
||||||
|
optional embeddingFunction: EmbeddingFunctionConfig;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### enableV2ManifestPaths?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional enableV2ManifestPaths: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
|
Use the new V2 manifest paths. These paths provide more efficient
|
||||||
|
opening of datasets with many versions on object stores. WARNING:
|
||||||
|
turning this on will make the dataset unreadable for older versions
|
||||||
|
of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
|
||||||
|
use the LocalTable#migrateManifestPathsV2 method.
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### existOk
|
### existOk
|
||||||
|
|
||||||
> **existOk**: `boolean`
|
```ts
|
||||||
|
existOk: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
If this is true and the table already exists and the mode is "create"
|
If this is true and the table already exists and the mode is "create"
|
||||||
then no error will be raised.
|
then no error will be raised.
|
||||||
@@ -25,7 +56,9 @@ then no error will be raised.
|
|||||||
|
|
||||||
### mode
|
### mode
|
||||||
|
|
||||||
> **mode**: `"overwrite"` \| `"create"`
|
```ts
|
||||||
|
mode: "overwrite" | "create";
|
||||||
|
```
|
||||||
|
|
||||||
The mode to use when creating the table.
|
The mode to use when creating the table.
|
||||||
|
|
||||||
@@ -39,13 +72,17 @@ If this is set to "overwrite" then any existing table will be replaced.
|
|||||||
|
|
||||||
### schema?
|
### schema?
|
||||||
|
|
||||||
> `optional` **schema**: `SchemaLike`
|
```ts
|
||||||
|
optional schema: SchemaLike;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### storageOptions?
|
### storageOptions?
|
||||||
|
|
||||||
> `optional` **storageOptions**: `Record`<`string`, `string`>
|
```ts
|
||||||
|
optional storageOptions: Record<string, string>;
|
||||||
|
```
|
||||||
|
|
||||||
Configuration for object storage.
|
Configuration for object storage.
|
||||||
|
|
||||||
@@ -58,8 +95,12 @@ The available options are described at https://lancedb.github.io/lancedb/guides/
|
|||||||
|
|
||||||
### useLegacyFormat?
|
### useLegacyFormat?
|
||||||
|
|
||||||
> `optional` **useLegacyFormat**: `boolean`
|
```ts
|
||||||
|
optional useLegacyFormat: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
If true then data files will be written with the legacy format
|
If true then data files will be written with the legacy format
|
||||||
|
|
||||||
The default is true while the new format is in beta
|
The default is false.
|
||||||
|
|
||||||
|
Deprecated. Use data storage version instead.
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / FtsOptions
|
|
||||||
|
|
||||||
# Interface: FtsOptions
|
|
||||||
|
|
||||||
Options to create an `FTS` index
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### withPosition?
|
|
||||||
|
|
||||||
> `optional` **withPosition**: `boolean`
|
|
||||||
|
|
||||||
Whether to store the positions of the term in the document.
|
|
||||||
|
|
||||||
If this is true then the index will store the positions of the term in the document.
|
|
||||||
This allows phrase queries to be run. But it also increases the size of the index,
|
|
||||||
and the time to build the index.
|
|
||||||
|
|
||||||
The default value is true.
|
|
||||||
|
|
||||||
***
|
|
||||||
@@ -12,7 +12,9 @@ A description of an index currently configured on a column
|
|||||||
|
|
||||||
### columns
|
### columns
|
||||||
|
|
||||||
> **columns**: `string`[]
|
```ts
|
||||||
|
columns: string[];
|
||||||
|
```
|
||||||
|
|
||||||
The columns in the index
|
The columns in the index
|
||||||
|
|
||||||
@@ -23,7 +25,9 @@ be more columns to represent composite indices.
|
|||||||
|
|
||||||
### indexType
|
### indexType
|
||||||
|
|
||||||
> **indexType**: `string`
|
```ts
|
||||||
|
indexType: string;
|
||||||
|
```
|
||||||
|
|
||||||
The type of the index
|
The type of the index
|
||||||
|
|
||||||
@@ -31,6 +35,8 @@ The type of the index
|
|||||||
|
|
||||||
### name
|
### name
|
||||||
|
|
||||||
> **name**: `string`
|
```ts
|
||||||
|
name: string;
|
||||||
|
```
|
||||||
|
|
||||||
The name of the index
|
The name of the index
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / IndexMetadata
|
|
||||||
|
|
||||||
# Interface: IndexMetadata
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### indexType?
|
|
||||||
|
|
||||||
> `optional` **indexType**: `string`
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### metricType?
|
|
||||||
|
|
||||||
> `optional` **metricType**: `string`
|
|
||||||
@@ -10,7 +10,9 @@
|
|||||||
|
|
||||||
### config?
|
### config?
|
||||||
|
|
||||||
> `optional` **config**: [`Index`](../classes/Index.md)
|
```ts
|
||||||
|
optional config: Index;
|
||||||
|
```
|
||||||
|
|
||||||
Advanced index configuration
|
Advanced index configuration
|
||||||
|
|
||||||
@@ -26,7 +28,9 @@ will be used to determine the most useful kind of index to create.
|
|||||||
|
|
||||||
### replace?
|
### replace?
|
||||||
|
|
||||||
> `optional` **replace**: `boolean`
|
```ts
|
||||||
|
optional replace: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
Whether to replace the existing index
|
Whether to replace the existing index
|
||||||
|
|
||||||
|
|||||||
@@ -8,32 +8,52 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
### indexType?
|
### distanceType?
|
||||||
|
|
||||||
> `optional` **indexType**: `string`
|
```ts
|
||||||
|
optional distanceType: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the distance function used by the index. This is only
|
||||||
|
present for vector indices. Scalar and full text search indices do
|
||||||
|
not have a distance function.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### indexType
|
||||||
|
|
||||||
|
```ts
|
||||||
|
indexType: string;
|
||||||
|
```
|
||||||
|
|
||||||
The type of the index
|
The type of the index
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### indices
|
|
||||||
|
|
||||||
> **indices**: [`IndexMetadata`](IndexMetadata.md)[]
|
|
||||||
|
|
||||||
The metadata for each index
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numIndexedRows
|
### numIndexedRows
|
||||||
|
|
||||||
> **numIndexedRows**: `number`
|
```ts
|
||||||
|
numIndexedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
The number of rows indexed by the index
|
The number of rows indexed by the index
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### numIndices?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional numIndices: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The number of parts this index is split into.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### numUnindexedRows
|
### numUnindexedRows
|
||||||
|
|
||||||
> **numUnindexedRows**: `number`
|
```ts
|
||||||
|
numUnindexedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
The number of rows not indexed
|
The number of rows not indexed
|
||||||
|
|||||||
@@ -12,7 +12,9 @@ Options to create an `IVF_PQ` index
|
|||||||
|
|
||||||
### distanceType?
|
### distanceType?
|
||||||
|
|
||||||
> `optional` **distanceType**: `"l2"` \| `"cosine"` \| `"dot"`
|
```ts
|
||||||
|
optional distanceType: "l2" | "cosine" | "dot";
|
||||||
|
```
|
||||||
|
|
||||||
Distance type to use to build the index.
|
Distance type to use to build the index.
|
||||||
|
|
||||||
@@ -50,7 +52,9 @@ L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
|||||||
|
|
||||||
### maxIterations?
|
### maxIterations?
|
||||||
|
|
||||||
> `optional` **maxIterations**: `number`
|
```ts
|
||||||
|
optional maxIterations: number;
|
||||||
|
```
|
||||||
|
|
||||||
Max iteration to train IVF kmeans.
|
Max iteration to train IVF kmeans.
|
||||||
|
|
||||||
@@ -66,7 +70,9 @@ The default value is 50.
|
|||||||
|
|
||||||
### numPartitions?
|
### numPartitions?
|
||||||
|
|
||||||
> `optional` **numPartitions**: `number`
|
```ts
|
||||||
|
optional numPartitions: number;
|
||||||
|
```
|
||||||
|
|
||||||
The number of IVF partitions to create.
|
The number of IVF partitions to create.
|
||||||
|
|
||||||
@@ -82,7 +88,9 @@ part of the search (searching within a partition) will be slow.
|
|||||||
|
|
||||||
### numSubVectors?
|
### numSubVectors?
|
||||||
|
|
||||||
> `optional` **numSubVectors**: `number`
|
```ts
|
||||||
|
optional numSubVectors: number;
|
||||||
|
```
|
||||||
|
|
||||||
Number of sub-vectors of PQ.
|
Number of sub-vectors of PQ.
|
||||||
|
|
||||||
@@ -101,7 +109,9 @@ will likely result in poor performance.
|
|||||||
|
|
||||||
### sampleRate?
|
### sampleRate?
|
||||||
|
|
||||||
> `optional` **sampleRate**: `number`
|
```ts
|
||||||
|
optional sampleRate: number;
|
||||||
|
```
|
||||||
|
|
||||||
The number of vectors, per partition, to sample when training IVF kmeans.
|
The number of vectors, per partition, to sample when training IVF kmeans.
|
||||||
|
|
||||||
|
|||||||
39
docs/src/js/interfaces/OptimizeOptions.md
Normal file
39
docs/src/js/interfaces/OptimizeOptions.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / OptimizeOptions
|
||||||
|
|
||||||
|
# Interface: OptimizeOptions
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### cleanupOlderThan
|
||||||
|
|
||||||
|
```ts
|
||||||
|
cleanupOlderThan: Date;
|
||||||
|
```
|
||||||
|
|
||||||
|
If set then all versions older than the given date
|
||||||
|
be removed. The current version will never be removed.
|
||||||
|
The default is 7 days
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// Delete all versions older than 1 day
|
||||||
|
const olderThan = new Date();
|
||||||
|
olderThan.setDate(olderThan.getDate() - 1));
|
||||||
|
tbl.cleanupOlderVersions(olderThan);
|
||||||
|
|
||||||
|
// Delete all versions except the current version
|
||||||
|
tbl.cleanupOlderVersions(new Date());
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### deleteUnverified
|
||||||
|
|
||||||
|
```ts
|
||||||
|
deleteUnverified: boolean;
|
||||||
|
```
|
||||||
90
docs/src/js/interfaces/RetryConfig.md
Normal file
90
docs/src/js/interfaces/RetryConfig.md
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / RetryConfig
|
||||||
|
|
||||||
|
# Interface: RetryConfig
|
||||||
|
|
||||||
|
Retry configuration for the remote HTTP client.
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### backoffFactor?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional backoffFactor: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The backoff factor to apply between retries. Default is 0.25. Between each retry
|
||||||
|
the client will wait for the amount of seconds:
|
||||||
|
`{backoff factor} * (2 ** ({number of previous retries}))`. So for the default
|
||||||
|
of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5
|
||||||
|
seconds, the third retry will wait 1 second, etc.
|
||||||
|
|
||||||
|
You can also set this via the environment variable
|
||||||
|
`LANCE_CLIENT_RETRY_BACKOFF_FACTOR`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### backoffJitter?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional backoffJitter: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The jitter to apply to the backoff factor, in seconds. Default is 0.25.
|
||||||
|
|
||||||
|
A random value between 0 and `backoff_jitter` will be added to the backoff
|
||||||
|
factor in seconds. So for the default of 0.25 seconds, between 0 and 250
|
||||||
|
milliseconds will be added to the sleep between each retry.
|
||||||
|
|
||||||
|
You can also set this via the environment variable
|
||||||
|
`LANCE_CLIENT_RETRY_BACKOFF_JITTER`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### connectRetries?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional connectRetries: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The maximum number of retries for connection errors. Default is 3. You
|
||||||
|
can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### readRetries?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional readRetries: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The maximum number of retries for read errors. Default is 3. You can also
|
||||||
|
set this via the environment variable `LANCE_CLIENT_READ_RETRIES`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### retries?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional retries: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The maximum number of retries for a request. Default is 3. You can also
|
||||||
|
set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### statuses?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional statuses: number[];
|
||||||
|
```
|
||||||
|
|
||||||
|
The HTTP status codes for which to retry the request. Default is
|
||||||
|
[429, 500, 502, 503].
|
||||||
|
|
||||||
|
You can also set this via the environment variable
|
||||||
|
`LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers.
|
||||||
@@ -10,7 +10,9 @@
|
|||||||
|
|
||||||
### limit?
|
### limit?
|
||||||
|
|
||||||
> `optional` **limit**: `number`
|
```ts
|
||||||
|
optional limit: number;
|
||||||
|
```
|
||||||
|
|
||||||
An optional limit to the number of results to return.
|
An optional limit to the number of results to return.
|
||||||
|
|
||||||
@@ -18,7 +20,9 @@ An optional limit to the number of results to return.
|
|||||||
|
|
||||||
### startAfter?
|
### startAfter?
|
||||||
|
|
||||||
> `optional` **startAfter**: `string`
|
```ts
|
||||||
|
optional startAfter: string;
|
||||||
|
```
|
||||||
|
|
||||||
If present, only return names that come lexicographically after the
|
If present, only return names that come lexicographically after the
|
||||||
supplied value.
|
supplied value.
|
||||||
|
|||||||
46
docs/src/js/interfaces/TimeoutConfig.md
Normal file
46
docs/src/js/interfaces/TimeoutConfig.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / TimeoutConfig
|
||||||
|
|
||||||
|
# Interface: TimeoutConfig
|
||||||
|
|
||||||
|
Timeout configuration for remote HTTP client.
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### connectTimeout?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional connectTimeout: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The timeout for establishing a connection in seconds. Default is 120
|
||||||
|
seconds (2 minutes). This can also be set via the environment variable
|
||||||
|
`LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### poolIdleTimeout?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional poolIdleTimeout: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The timeout for keeping idle connections in the connection pool in seconds.
|
||||||
|
Default is 300 seconds (5 minutes). This can also be set via the
|
||||||
|
environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer
|
||||||
|
number of seconds.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### readTimeout?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional readTimeout: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The timeout for reading data from the server in seconds. Default is 300
|
||||||
|
seconds (5 minutes). This can also be set via the environment variable
|
||||||
|
`LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
|
||||||
@@ -10,7 +10,9 @@
|
|||||||
|
|
||||||
### where
|
### where
|
||||||
|
|
||||||
> **where**: `string`
|
```ts
|
||||||
|
where: string;
|
||||||
|
```
|
||||||
|
|
||||||
A filter that limits the scope of the update.
|
A filter that limits the scope of the update.
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ Write options when creating a Table.
|
|||||||
|
|
||||||
### mode?
|
### mode?
|
||||||
|
|
||||||
> `optional` **mode**: [`WriteMode`](../enumerations/WriteMode.md)
|
```ts
|
||||||
|
optional mode: WriteMode;
|
||||||
|
```
|
||||||
|
|
||||||
Write mode for writing to a table.
|
Write mode for writing to a table.
|
||||||
|
|||||||
@@ -12,16 +12,12 @@
|
|||||||
|
|
||||||
- [EmbeddingFunction](classes/EmbeddingFunction.md)
|
- [EmbeddingFunction](classes/EmbeddingFunction.md)
|
||||||
- [EmbeddingFunctionRegistry](classes/EmbeddingFunctionRegistry.md)
|
- [EmbeddingFunctionRegistry](classes/EmbeddingFunctionRegistry.md)
|
||||||
- [OpenAIEmbeddingFunction](classes/OpenAIEmbeddingFunction.md)
|
- [TextEmbeddingFunction](classes/TextEmbeddingFunction.md)
|
||||||
|
|
||||||
### Interfaces
|
### Interfaces
|
||||||
|
|
||||||
- [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md)
|
- [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md)
|
||||||
|
|
||||||
### Type Aliases
|
|
||||||
|
|
||||||
- [OpenAIOptions](type-aliases/OpenAIOptions.md)
|
|
||||||
|
|
||||||
### Functions
|
### Functions
|
||||||
|
|
||||||
- [LanceSchema](functions/LanceSchema.md)
|
- [LanceSchema](functions/LanceSchema.md)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
## Extended by
|
## Extended by
|
||||||
|
|
||||||
- [`OpenAIEmbeddingFunction`](OpenAIEmbeddingFunction.md)
|
- [`TextEmbeddingFunction`](TextEmbeddingFunction.md)
|
||||||
|
|
||||||
## Type Parameters
|
## Type Parameters
|
||||||
|
|
||||||
@@ -22,7 +22,9 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
### new EmbeddingFunction()
|
### new EmbeddingFunction()
|
||||||
|
|
||||||
> **new EmbeddingFunction**<`T`, `M`>(): [`EmbeddingFunction`](EmbeddingFunction.md)<`T`, `M`>
|
```ts
|
||||||
|
new EmbeddingFunction<T, M>(): EmbeddingFunction<T, M>
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -32,13 +34,15 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
### computeQueryEmbeddings()
|
### computeQueryEmbeddings()
|
||||||
|
|
||||||
> **computeQueryEmbeddings**(`data`): `Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
```ts
|
||||||
|
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||||
|
```
|
||||||
|
|
||||||
Compute the embeddings for a single query
|
Compute the embeddings for a single query
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: `T`
|
* **data**: `T`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -48,13 +52,15 @@ Compute the embeddings for a single query
|
|||||||
|
|
||||||
### computeSourceEmbeddings()
|
### computeSourceEmbeddings()
|
||||||
|
|
||||||
> `abstract` **computeSourceEmbeddings**(`data`): `Promise`<`number`[][] \| `Float32Array`[] \| `Float64Array`[]>
|
```ts
|
||||||
|
abstract computeSourceEmbeddings(data): Promise<number[][] | Float32Array[] | Float64Array[]>
|
||||||
|
```
|
||||||
|
|
||||||
Creates a vector representation for the given values.
|
Creates a vector representation for the given values.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: `T`[]
|
* **data**: `T`[]
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -64,7 +70,9 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
### embeddingDataType()
|
### embeddingDataType()
|
||||||
|
|
||||||
> `abstract` **embeddingDataType**(): `Float`<`Floats`>
|
```ts
|
||||||
|
abstract embeddingDataType(): Float<Floats>
|
||||||
|
```
|
||||||
|
|
||||||
The datatype of the embeddings
|
The datatype of the embeddings
|
||||||
|
|
||||||
@@ -74,9 +82,23 @@ The datatype of the embeddings
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### init()?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional init(): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`>
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ndims()
|
### ndims()
|
||||||
|
|
||||||
> **ndims**(): `undefined` \| `number`
|
```ts
|
||||||
|
ndims(): undefined | number
|
||||||
|
```
|
||||||
|
|
||||||
The number of dimensions of the embeddings
|
The number of dimensions of the embeddings
|
||||||
|
|
||||||
@@ -88,15 +110,16 @@ The number of dimensions of the embeddings
|
|||||||
|
|
||||||
### sourceField()
|
### sourceField()
|
||||||
|
|
||||||
> **sourceField**(`optionsOrDatatype`): [`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
```ts
|
||||||
|
sourceField(optionsOrDatatype): [DataType<Type, any>, Map<string, EmbeddingFunction<any, FunctionOptions>>]
|
||||||
|
```
|
||||||
|
|
||||||
sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
* **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||||
|
The options for the field or the datatype
|
||||||
The options for the field or the datatype
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -110,7 +133,9 @@ lancedb.LanceSchema
|
|||||||
|
|
||||||
### toJSON()
|
### toJSON()
|
||||||
|
|
||||||
> `abstract` **toJSON**(): `Partial`<`M`>
|
```ts
|
||||||
|
abstract toJSON(): Partial<M>
|
||||||
|
```
|
||||||
|
|
||||||
Convert the embedding function to a JSON object
|
Convert the embedding function to a JSON object
|
||||||
It is used to serialize the embedding function to the schema
|
It is used to serialize the embedding function to the schema
|
||||||
@@ -145,13 +170,15 @@ class MyEmbeddingFunction extends EmbeddingFunction {
|
|||||||
|
|
||||||
### vectorField()
|
### vectorField()
|
||||||
|
|
||||||
> **vectorField**(`optionsOrDatatype`?): [`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
```ts
|
||||||
|
vectorField(optionsOrDatatype?): [DataType<Type, any>, Map<string, EmbeddingFunction<any, FunctionOptions>>]
|
||||||
|
```
|
||||||
|
|
||||||
vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ or TextEmbeddingFunction and registering it with the registry
|
|||||||
|
|
||||||
### new EmbeddingFunctionRegistry()
|
### new EmbeddingFunctionRegistry()
|
||||||
|
|
||||||
> **new EmbeddingFunctionRegistry**(): [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
|
```ts
|
||||||
|
new EmbeddingFunctionRegistry(): EmbeddingFunctionRegistry
|
||||||
|
```
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -25,11 +27,13 @@ or TextEmbeddingFunction and registering it with the registry
|
|||||||
|
|
||||||
### functionToMetadata()
|
### functionToMetadata()
|
||||||
|
|
||||||
> **functionToMetadata**(`conf`): `Record`<`string`, `any`>
|
```ts
|
||||||
|
functionToMetadata(conf): Record<string, any>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **conf**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)
|
* **conf**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -39,7 +43,9 @@ or TextEmbeddingFunction and registering it with the registry
|
|||||||
|
|
||||||
### get()
|
### get()
|
||||||
|
|
||||||
> **get**<`T`, `Name`>(`name`): `Name` *extends* `"openai"` ? `EmbeddingFunctionCreate`<[`OpenAIEmbeddingFunction`](OpenAIEmbeddingFunction.md)> : `undefined` \| `EmbeddingFunctionCreate`<`T`>
|
```ts
|
||||||
|
get<T>(name): undefined | EmbeddingFunctionCreate<T>
|
||||||
|
```
|
||||||
|
|
||||||
Fetch an embedding function by name
|
Fetch an embedding function by name
|
||||||
|
|
||||||
@@ -47,27 +53,26 @@ Fetch an embedding function by name
|
|||||||
|
|
||||||
• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, `FunctionOptions`>
|
• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, `FunctionOptions`>
|
||||||
|
|
||||||
• **Name** *extends* `string` = `""`
|
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **name**: `Name` *extends* `"openai"` ? `"openai"` : `string`
|
* **name**: `string`
|
||||||
|
The name of the function
|
||||||
The name of the function
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Name` *extends* `"openai"` ? `EmbeddingFunctionCreate`<[`OpenAIEmbeddingFunction`](OpenAIEmbeddingFunction.md)> : `undefined` \| `EmbeddingFunctionCreate`<`T`>
|
`undefined` \| `EmbeddingFunctionCreate`<`T`>
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### getTableMetadata()
|
### getTableMetadata()
|
||||||
|
|
||||||
> **getTableMetadata**(`functions`): `Map`<`string`, `string`>
|
```ts
|
||||||
|
getTableMetadata(functions): Map<string, string>
|
||||||
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **functions**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)[]
|
* **functions**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)[]
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -75,9 +80,25 @@ The name of the function
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### length()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
length(): number
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the number of registered functions
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`number`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### register()
|
### register()
|
||||||
|
|
||||||
> **register**<`T`>(`this`, `alias`?): (`ctor`) => `any`
|
```ts
|
||||||
|
register<T>(this, alias?): (ctor) => any
|
||||||
|
```
|
||||||
|
|
||||||
Register an embedding function
|
Register an embedding function
|
||||||
|
|
||||||
@@ -87,9 +108,9 @@ Register an embedding function
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
|
* **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
|
||||||
|
|
||||||
• **alias?**: `string`
|
* **alias?**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -97,7 +118,7 @@ Register an embedding function
|
|||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
• **ctor**: `T`
|
* **ctor**: `T`
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -111,13 +132,15 @@ Error if the function is already registered
|
|||||||
|
|
||||||
### reset()
|
### reset()
|
||||||
|
|
||||||
> **reset**(`this`): `void`
|
```ts
|
||||||
|
reset(this): void
|
||||||
|
```
|
||||||
|
|
||||||
reset the registry to the initial state
|
reset the registry to the initial state
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
|
* **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -2,31 +2,33 @@
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / OpenAIEmbeddingFunction
|
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / TextEmbeddingFunction
|
||||||
|
|
||||||
# Class: OpenAIEmbeddingFunction
|
# Class: `abstract` TextEmbeddingFunction<M>
|
||||||
|
|
||||||
An embedding function that automatically creates vector representation for a given column.
|
an abstract class for implementing embedding functions that take text as input
|
||||||
|
|
||||||
## Extends
|
## Extends
|
||||||
|
|
||||||
- [`EmbeddingFunction`](EmbeddingFunction.md)<`string`, `Partial`<[`OpenAIOptions`](../type-aliases/OpenAIOptions.md)>>
|
- [`EmbeddingFunction`](EmbeddingFunction.md)<`string`, `M`>
|
||||||
|
|
||||||
|
## Type Parameters
|
||||||
|
|
||||||
|
• **M** *extends* `FunctionOptions` = `FunctionOptions`
|
||||||
|
|
||||||
## Constructors
|
## Constructors
|
||||||
|
|
||||||
### new OpenAIEmbeddingFunction()
|
### new TextEmbeddingFunction()
|
||||||
|
|
||||||
> **new OpenAIEmbeddingFunction**(`options`): [`OpenAIEmbeddingFunction`](OpenAIEmbeddingFunction.md)
|
```ts
|
||||||
|
new TextEmbeddingFunction<M>(): TextEmbeddingFunction<M>
|
||||||
#### Parameters
|
```
|
||||||
|
|
||||||
• **options**: `Partial`<[`OpenAIOptions`](../type-aliases/OpenAIOptions.md)> = `...`
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`OpenAIEmbeddingFunction`](OpenAIEmbeddingFunction.md)
|
[`TextEmbeddingFunction`](TextEmbeddingFunction.md)<`M`>
|
||||||
|
|
||||||
#### Overrides
|
#### Inherited from
|
||||||
|
|
||||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`constructor`](EmbeddingFunction.md#constructors)
|
[`EmbeddingFunction`](EmbeddingFunction.md).[`constructor`](EmbeddingFunction.md#constructors)
|
||||||
|
|
||||||
@@ -34,17 +36,19 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
### computeQueryEmbeddings()
|
### computeQueryEmbeddings()
|
||||||
|
|
||||||
> **computeQueryEmbeddings**(`data`): `Promise`<`number`[]>
|
```ts
|
||||||
|
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||||
|
```
|
||||||
|
|
||||||
Compute the embeddings for a single query
|
Compute the embeddings for a single query
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: `string`
|
* **data**: `string`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`number`[]>
|
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
||||||
|
|
||||||
#### Overrides
|
#### Overrides
|
||||||
|
|
||||||
@@ -54,17 +58,19 @@ Compute the embeddings for a single query
|
|||||||
|
|
||||||
### computeSourceEmbeddings()
|
### computeSourceEmbeddings()
|
||||||
|
|
||||||
> **computeSourceEmbeddings**(`data`): `Promise`<`number`[][]>
|
```ts
|
||||||
|
computeSourceEmbeddings(data): Promise<number[][] | Float32Array[] | Float64Array[]>
|
||||||
|
```
|
||||||
|
|
||||||
Creates a vector representation for the given values.
|
Creates a vector representation for the given values.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **data**: `string`[]
|
* **data**: `string`[]
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`number`[][]>
|
`Promise`<`number`[][] \| `Float32Array`[] \| `Float64Array`[]>
|
||||||
|
|
||||||
#### Overrides
|
#### Overrides
|
||||||
|
|
||||||
@@ -74,7 +80,9 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
### embeddingDataType()
|
### embeddingDataType()
|
||||||
|
|
||||||
> **embeddingDataType**(): `Float`<`Floats`>
|
```ts
|
||||||
|
embeddingDataType(): Float<Floats>
|
||||||
|
```
|
||||||
|
|
||||||
The datatype of the embeddings
|
The datatype of the embeddings
|
||||||
|
|
||||||
@@ -88,17 +96,53 @@ The datatype of the embeddings
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### generateEmbeddings()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract generateEmbeddings(texts, ...args): Promise<number[][] | Float32Array[] | Float64Array[]>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **texts**: `string`[]
|
||||||
|
|
||||||
|
* ...**args**: `any`[]
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`number`[][] \| `Float32Array`[] \| `Float64Array`[]>
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### init()?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional init(): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`>
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`EmbeddingFunction`](EmbeddingFunction.md).[`init`](EmbeddingFunction.md#init)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### ndims()
|
### ndims()
|
||||||
|
|
||||||
> **ndims**(): `number`
|
```ts
|
||||||
|
ndims(): undefined | number
|
||||||
|
```
|
||||||
|
|
||||||
The number of dimensions of the embeddings
|
The number of dimensions of the embeddings
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`number`
|
`undefined` \| `number`
|
||||||
|
|
||||||
#### Overrides
|
#### Inherited from
|
||||||
|
|
||||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`ndims`](EmbeddingFunction.md#ndims)
|
[`EmbeddingFunction`](EmbeddingFunction.md).[`ndims`](EmbeddingFunction.md#ndims)
|
||||||
|
|
||||||
@@ -106,16 +150,12 @@ The number of dimensions of the embeddings
|
|||||||
|
|
||||||
### sourceField()
|
### sourceField()
|
||||||
|
|
||||||
> **sourceField**(`optionsOrDatatype`): [`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
```ts
|
||||||
|
sourceField(): [DataType<Type, any>, Map<string, EmbeddingFunction<any, FunctionOptions>>]
|
||||||
|
```
|
||||||
|
|
||||||
sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
• **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
|
||||||
|
|
||||||
The options for the field or the datatype
|
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
||||||
@@ -124,7 +164,7 @@ The options for the field or the datatype
|
|||||||
|
|
||||||
lancedb.LanceSchema
|
lancedb.LanceSchema
|
||||||
|
|
||||||
#### Inherited from
|
#### Overrides
|
||||||
|
|
||||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`sourceField`](EmbeddingFunction.md#sourcefield)
|
[`EmbeddingFunction`](EmbeddingFunction.md).[`sourceField`](EmbeddingFunction.md#sourcefield)
|
||||||
|
|
||||||
@@ -132,7 +172,9 @@ lancedb.LanceSchema
|
|||||||
|
|
||||||
### toJSON()
|
### toJSON()
|
||||||
|
|
||||||
> **toJSON**(): `object`
|
```ts
|
||||||
|
abstract toJSON(): Partial<M>
|
||||||
|
```
|
||||||
|
|
||||||
Convert the embedding function to a JSON object
|
Convert the embedding function to a JSON object
|
||||||
It is used to serialize the embedding function to the schema
|
It is used to serialize the embedding function to the schema
|
||||||
@@ -144,11 +186,7 @@ If it does not, the embedding function will not be able to be recreated, or coul
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`object`
|
`Partial`<`M`>
|
||||||
|
|
||||||
##### model
|
|
||||||
|
|
||||||
> **model**: `string` & `object` \| `"text-embedding-ada-002"` \| `"text-embedding-3-small"` \| `"text-embedding-3-large"`
|
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
@@ -167,7 +205,7 @@ class MyEmbeddingFunction extends EmbeddingFunction {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Overrides
|
#### Inherited from
|
||||||
|
|
||||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`toJSON`](EmbeddingFunction.md#tojson)
|
[`EmbeddingFunction`](EmbeddingFunction.md).[`toJSON`](EmbeddingFunction.md#tojson)
|
||||||
|
|
||||||
@@ -175,13 +213,15 @@ class MyEmbeddingFunction extends EmbeddingFunction {
|
|||||||
|
|
||||||
### vectorField()
|
### vectorField()
|
||||||
|
|
||||||
> **vectorField**(`optionsOrDatatype`?): [`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
```ts
|
||||||
|
vectorField(optionsOrDatatype?): [DataType<Type, any>, Map<string, EmbeddingFunction<any, FunctionOptions>>]
|
||||||
|
```
|
||||||
|
|
||||||
vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
• **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -6,13 +6,15 @@
|
|||||||
|
|
||||||
# Function: LanceSchema()
|
# Function: LanceSchema()
|
||||||
|
|
||||||
> **LanceSchema**(`fields`): `Schema`
|
```ts
|
||||||
|
function LanceSchema(fields): Schema
|
||||||
|
```
|
||||||
|
|
||||||
Create a schema with embedding functions.
|
Create a schema with embedding functions.
|
||||||
|
|
||||||
## Parameters
|
## Parameters
|
||||||
|
|
||||||
• **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>]>
|
* **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>]>
|
||||||
|
|
||||||
## Returns
|
## Returns
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,9 @@
|
|||||||
|
|
||||||
# Function: getRegistry()
|
# Function: getRegistry()
|
||||||
|
|
||||||
> **getRegistry**(): [`EmbeddingFunctionRegistry`](../classes/EmbeddingFunctionRegistry.md)
|
```ts
|
||||||
|
function getRegistry(): EmbeddingFunctionRegistry
|
||||||
|
```
|
||||||
|
|
||||||
Utility function to get the global instance of the registry
|
Utility function to get the global instance of the registry
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,13 @@
|
|||||||
|
|
||||||
# Function: register()
|
# Function: register()
|
||||||
|
|
||||||
> **register**(`name`?): (`ctor`) => `any`
|
```ts
|
||||||
|
function register(name?): (ctor) => any
|
||||||
|
```
|
||||||
|
|
||||||
## Parameters
|
## Parameters
|
||||||
|
|
||||||
• **name?**: `string`
|
* **name?**: `string`
|
||||||
|
|
||||||
## Returns
|
## Returns
|
||||||
|
|
||||||
@@ -18,7 +20,7 @@
|
|||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
• **ctor**: `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>
|
* **ctor**: `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
|
|||||||
@@ -10,16 +10,22 @@
|
|||||||
|
|
||||||
### function
|
### function
|
||||||
|
|
||||||
> **function**: [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>
|
```ts
|
||||||
|
function: EmbeddingFunction<any, FunctionOptions>;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### sourceColumn
|
### sourceColumn
|
||||||
|
|
||||||
> **sourceColumn**: `string`
|
```ts
|
||||||
|
sourceColumn: string;
|
||||||
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### vectorColumn?
|
### vectorColumn?
|
||||||
|
|
||||||
> `optional` **vectorColumn**: `string`
|
```ts
|
||||||
|
optional vectorColumn: string;
|
||||||
|
```
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / OpenAIOptions
|
|
||||||
|
|
||||||
# Type Alias: OpenAIOptions
|
|
||||||
|
|
||||||
> **OpenAIOptions**: `object`
|
|
||||||
|
|
||||||
## Type declaration
|
|
||||||
|
|
||||||
### apiKey
|
|
||||||
|
|
||||||
> **apiKey**: `string`
|
|
||||||
|
|
||||||
### model
|
|
||||||
|
|
||||||
> **model**: `EmbeddingCreateParams`\[`"model"`\]
|
|
||||||
@@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
# Type Alias: Data
|
# Type Alias: Data
|
||||||
|
|
||||||
> **Data**: `Record`<`string`, `unknown`>[] \| `TableLike`
|
```ts
|
||||||
|
type Data: Record<string, unknown>[] | TableLike;
|
||||||
|
```
|
||||||
|
|
||||||
Data type accepted by NodeJS SDK
|
Data type accepted by NodeJS SDK
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.14.0-beta.0</version>
|
<version>0.14.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.14.0-beta.0</version>
|
<version>0.14.1-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
20
node/package-lock.json
generated
20
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,14 +52,14 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0",
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.1-beta.1"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -91,13 +92,13 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0",
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.1-beta.1",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0"
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.1-beta.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.14.0-beta.0"
|
version = "0.14.1-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -13,11 +13,10 @@ import { Schema } from "apache-arrow";
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import * as arrow13 from "apache-arrow-13";
|
|
||||||
import * as arrow14 from "apache-arrow-14";
|
|
||||||
import * as arrow15 from "apache-arrow-15";
|
import * as arrow15 from "apache-arrow-15";
|
||||||
import * as arrow16 from "apache-arrow-16";
|
import * as arrow16 from "apache-arrow-16";
|
||||||
import * as arrow17 from "apache-arrow-17";
|
import * as arrow17 from "apache-arrow-17";
|
||||||
|
import * as arrow18 from "apache-arrow-18";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
convertToTable,
|
convertToTable,
|
||||||
@@ -45,22 +44,16 @@ function sampleRecords(): Array<Record<string, any>> {
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||||
"Arrow",
|
"Arrow",
|
||||||
(
|
(
|
||||||
arrow:
|
arrow: typeof arrow15 | typeof arrow16 | typeof arrow17 | typeof arrow18,
|
||||||
| typeof arrow13
|
|
||||||
| typeof arrow14
|
|
||||||
| typeof arrow15
|
|
||||||
| typeof arrow16
|
|
||||||
| typeof arrow17,
|
|
||||||
) => {
|
) => {
|
||||||
type ApacheArrow =
|
type ApacheArrow =
|
||||||
| typeof arrow13
|
|
||||||
| typeof arrow14
|
|
||||||
| typeof arrow15
|
| typeof arrow15
|
||||||
| typeof arrow16
|
| typeof arrow16
|
||||||
| typeof arrow17;
|
| typeof arrow17
|
||||||
|
| typeof arrow18;
|
||||||
const {
|
const {
|
||||||
Schema,
|
Schema,
|
||||||
Field,
|
Field,
|
||||||
@@ -498,40 +491,40 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
|||||||
|
|
||||||
describe("when using two versions of arrow", function () {
|
describe("when using two versions of arrow", function () {
|
||||||
it("can still import data", async function () {
|
it("can still import data", async function () {
|
||||||
const schema = new arrow13.Schema([
|
const schema = new arrow15.Schema([
|
||||||
new arrow13.Field("id", new arrow13.Int32()),
|
new arrow15.Field("id", new arrow15.Int32()),
|
||||||
new arrow13.Field(
|
new arrow15.Field(
|
||||||
"vector",
|
"vector",
|
||||||
new arrow13.FixedSizeList(
|
new arrow15.FixedSizeList(
|
||||||
1024,
|
1024,
|
||||||
new arrow13.Field("item", new arrow13.Float32(), true),
|
new arrow15.Field("item", new arrow15.Float32(), true),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
new arrow13.Field(
|
new arrow15.Field(
|
||||||
"struct",
|
"struct",
|
||||||
new arrow13.Struct([
|
new arrow15.Struct([
|
||||||
new arrow13.Field(
|
new arrow15.Field(
|
||||||
"nested",
|
"nested",
|
||||||
new arrow13.Dictionary(
|
new arrow15.Dictionary(
|
||||||
new arrow13.Utf8(),
|
new arrow15.Utf8(),
|
||||||
new arrow13.Int32(),
|
new arrow15.Int32(),
|
||||||
1,
|
1,
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
new arrow13.Field(
|
new arrow15.Field(
|
||||||
"ts_with_tz",
|
"ts_with_tz",
|
||||||
new arrow13.TimestampNanosecond("some_tz"),
|
new arrow15.TimestampNanosecond("some_tz"),
|
||||||
),
|
),
|
||||||
new arrow13.Field(
|
new arrow15.Field(
|
||||||
"ts_no_tz",
|
"ts_no_tz",
|
||||||
new arrow13.TimestampNanosecond(null),
|
new arrow15.TimestampNanosecond(null),
|
||||||
),
|
),
|
||||||
]),
|
]),
|
||||||
),
|
),
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||||
]) as any;
|
]) as any;
|
||||||
schema.metadataVersion = arrow13.MetadataVersion.V5;
|
schema.metadataVersion = arrow15.MetadataVersion.V5;
|
||||||
const table = makeArrowTable([], { schema });
|
const table = makeArrowTable([], { schema });
|
||||||
|
|
||||||
const buf = await fromTableToBuffer(table);
|
const buf = await fromTableToBuffer(table);
|
||||||
@@ -543,13 +536,13 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
|||||||
// Deep equality gets hung up on some very minor unimportant differences
|
// Deep equality gets hung up on some very minor unimportant differences
|
||||||
// between arrow version 13 and 15 which isn't really what we're testing for
|
// between arrow version 13 and 15 which isn't really what we're testing for
|
||||||
// and so we do our own comparison that just checks name/type/nullability
|
// and so we do our own comparison that just checks name/type/nullability
|
||||||
function compareFields(lhs: arrow13.Field, rhs: arrow13.Field) {
|
function compareFields(lhs: arrow15.Field, rhs: arrow15.Field) {
|
||||||
expect(lhs.name).toEqual(rhs.name);
|
expect(lhs.name).toEqual(rhs.name);
|
||||||
expect(lhs.nullable).toEqual(rhs.nullable);
|
expect(lhs.nullable).toEqual(rhs.nullable);
|
||||||
expect(lhs.typeId).toEqual(rhs.typeId);
|
expect(lhs.typeId).toEqual(rhs.typeId);
|
||||||
if ("children" in lhs.type && lhs.type.children !== null) {
|
if ("children" in lhs.type && lhs.type.children !== null) {
|
||||||
const lhsChildren = lhs.type.children as arrow13.Field[];
|
const lhsChildren = lhs.type.children as arrow15.Field[];
|
||||||
lhsChildren.forEach((child: arrow13.Field, idx) => {
|
lhsChildren.forEach((child: arrow15.Field, idx) => {
|
||||||
compareFields(child, rhs.type.children[idx]);
|
compareFields(child, rhs.type.children[idx]);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,11 +12,10 @@ import * as apiArrow from "apache-arrow";
|
|||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
import * as arrow13 from "apache-arrow-13";
|
|
||||||
import * as arrow14 from "apache-arrow-14";
|
|
||||||
import * as arrow15 from "apache-arrow-15";
|
import * as arrow15 from "apache-arrow-15";
|
||||||
import * as arrow16 from "apache-arrow-16";
|
import * as arrow16 from "apache-arrow-16";
|
||||||
import * as arrow17 from "apache-arrow-17";
|
import * as arrow17 from "apache-arrow-17";
|
||||||
|
import * as arrow18 from "apache-arrow-18";
|
||||||
|
|
||||||
import * as tmp from "tmp";
|
import * as tmp from "tmp";
|
||||||
|
|
||||||
@@ -24,154 +23,144 @@ import { connect } from "../lancedb";
|
|||||||
import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
|
import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
|
||||||
import { getRegistry, register } from "../lancedb/embedding/registry";
|
import { getRegistry, register } from "../lancedb/embedding/registry";
|
||||||
|
|
||||||
describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])("LanceSchema", (arrow) => {
|
||||||
"LanceSchema",
|
test("should preserve input order", async () => {
|
||||||
(arrow) => {
|
const schema = LanceSchema({
|
||||||
test("should preserve input order", async () => {
|
id: new arrow.Int32(),
|
||||||
const schema = LanceSchema({
|
text: new arrow.Utf8(),
|
||||||
id: new arrow.Int32(),
|
vector: new arrow.Float32(),
|
||||||
text: new arrow.Utf8(),
|
|
||||||
vector: new arrow.Float32(),
|
|
||||||
});
|
|
||||||
expect(schema.fields.map((x) => x.name)).toEqual([
|
|
||||||
"id",
|
|
||||||
"text",
|
|
||||||
"vector",
|
|
||||||
]);
|
|
||||||
});
|
});
|
||||||
},
|
expect(schema.fields.map((x) => x.name)).toEqual(["id", "text", "vector"]);
|
||||||
);
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||||
"Registry",
|
let tmpDir: tmp.DirResult;
|
||||||
(arrow) => {
|
beforeEach(() => {
|
||||||
let tmpDir: tmp.DirResult;
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
beforeEach(() => {
|
});
|
||||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
tmpDir.removeCallback();
|
tmpDir.removeCallback();
|
||||||
getRegistry().reset();
|
getRegistry().reset();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should register a new item to the registry", async () => {
|
it("should register a new item to the registry", async () => {
|
||||||
@register("mock-embedding")
|
@register("mock-embedding")
|
||||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||||
toJSON(): object {
|
toJSON(): object {
|
||||||
return {
|
return {
|
||||||
someText: "hello",
|
someText: "hello",
|
||||||
};
|
};
|
||||||
}
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
ndims() {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
embeddingDataType() {
|
|
||||||
return new arrow.Float32() as apiArrow.Float;
|
|
||||||
}
|
|
||||||
async computeSourceEmbeddings(data: string[]) {
|
|
||||||
return data.map(() => [1, 2, 3]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
constructor() {
|
||||||
const func = getRegistry()
|
super();
|
||||||
.get<MockEmbeddingFunction>("mock-embedding")!
|
|
||||||
.create();
|
|
||||||
|
|
||||||
const schema = LanceSchema({
|
|
||||||
id: new arrow.Int32(),
|
|
||||||
text: func.sourceField(new arrow.Utf8() as apiArrow.DataType),
|
|
||||||
vector: func.vectorField(),
|
|
||||||
});
|
|
||||||
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const table = await db.createTable(
|
|
||||||
"test",
|
|
||||||
[
|
|
||||||
{ id: 1, text: "hello" },
|
|
||||||
{ id: 2, text: "world" },
|
|
||||||
],
|
|
||||||
{ schema },
|
|
||||||
);
|
|
||||||
const expected = [
|
|
||||||
[1, 2, 3],
|
|
||||||
[1, 2, 3],
|
|
||||||
];
|
|
||||||
const actual = await table.query().toArrow();
|
|
||||||
const vectors = actual.getChild("vector")!.toArray();
|
|
||||||
expect(JSON.parse(JSON.stringify(vectors))).toEqual(
|
|
||||||
JSON.parse(JSON.stringify(expected)),
|
|
||||||
);
|
|
||||||
});
|
|
||||||
test("should error if registering with the same name", async () => {
|
|
||||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
|
||||||
toJSON(): object {
|
|
||||||
return {
|
|
||||||
someText: "hello",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
ndims() {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
embeddingDataType() {
|
|
||||||
return new arrow.Float32() as apiArrow.Float;
|
|
||||||
}
|
|
||||||
async computeSourceEmbeddings(data: string[]) {
|
|
||||||
return data.map(() => [1, 2, 3]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
register("mock-embedding")(MockEmbeddingFunction);
|
ndims() {
|
||||||
expect(() => register("mock-embedding")(MockEmbeddingFunction)).toThrow(
|
return 3;
|
||||||
'Embedding function with alias "mock-embedding" already exists',
|
|
||||||
);
|
|
||||||
});
|
|
||||||
test("schema should contain correct metadata", async () => {
|
|
||||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
|
||||||
toJSON(): object {
|
|
||||||
return {
|
|
||||||
someText: "hello",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
ndims() {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
embeddingDataType() {
|
|
||||||
return new arrow.Float32() as apiArrow.Float;
|
|
||||||
}
|
|
||||||
async computeSourceEmbeddings(data: string[]) {
|
|
||||||
return data.map(() => [1, 2, 3]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
const func = new MockEmbeddingFunction();
|
embeddingDataType() {
|
||||||
|
return new arrow.Float32() as apiArrow.Float;
|
||||||
|
}
|
||||||
|
async computeSourceEmbeddings(data: string[]) {
|
||||||
|
return data.map(() => [1, 2, 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const schema = LanceSchema({
|
const func = getRegistry()
|
||||||
id: new arrow.Int32(),
|
.get<MockEmbeddingFunction>("mock-embedding")!
|
||||||
text: func.sourceField(new arrow.Utf8() as apiArrow.DataType),
|
.create();
|
||||||
vector: func.vectorField(),
|
|
||||||
});
|
const schema = LanceSchema({
|
||||||
const expectedMetadata = new Map<string, string>([
|
id: new arrow.Int32(),
|
||||||
[
|
text: func.sourceField(new arrow.Utf8() as apiArrow.DataType),
|
||||||
"embedding_functions",
|
vector: func.vectorField(),
|
||||||
JSON.stringify([
|
|
||||||
{
|
|
||||||
sourceColumn: "text",
|
|
||||||
vectorColumn: "vector",
|
|
||||||
name: "MockEmbeddingFunction",
|
|
||||||
model: { someText: "hello" },
|
|
||||||
},
|
|
||||||
]),
|
|
||||||
],
|
|
||||||
]);
|
|
||||||
expect(schema.metadata).toEqual(expectedMetadata);
|
|
||||||
});
|
});
|
||||||
},
|
|
||||||
);
|
const db = await connect(tmpDir.name);
|
||||||
|
const table = await db.createTable(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
{ id: 1, text: "hello" },
|
||||||
|
{ id: 2, text: "world" },
|
||||||
|
],
|
||||||
|
{ schema },
|
||||||
|
);
|
||||||
|
const expected = [
|
||||||
|
[1, 2, 3],
|
||||||
|
[1, 2, 3],
|
||||||
|
];
|
||||||
|
const actual = await table.query().toArrow();
|
||||||
|
const vectors = actual.getChild("vector")!.toArray();
|
||||||
|
expect(JSON.parse(JSON.stringify(vectors))).toEqual(
|
||||||
|
JSON.parse(JSON.stringify(expected)),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
test("should error if registering with the same name", async () => {
|
||||||
|
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||||
|
toJSON(): object {
|
||||||
|
return {
|
||||||
|
someText: "hello",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
constructor() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
ndims() {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
embeddingDataType() {
|
||||||
|
return new arrow.Float32() as apiArrow.Float;
|
||||||
|
}
|
||||||
|
async computeSourceEmbeddings(data: string[]) {
|
||||||
|
return data.map(() => [1, 2, 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
register("mock-embedding")(MockEmbeddingFunction);
|
||||||
|
expect(() => register("mock-embedding")(MockEmbeddingFunction)).toThrow(
|
||||||
|
'Embedding function with alias "mock-embedding" already exists',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
test("schema should contain correct metadata", async () => {
|
||||||
|
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||||
|
toJSON(): object {
|
||||||
|
return {
|
||||||
|
someText: "hello",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
constructor() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
ndims() {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
embeddingDataType() {
|
||||||
|
return new arrow.Float32() as apiArrow.Float;
|
||||||
|
}
|
||||||
|
async computeSourceEmbeddings(data: string[]) {
|
||||||
|
return data.map(() => [1, 2, 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const func = new MockEmbeddingFunction();
|
||||||
|
|
||||||
|
const schema = LanceSchema({
|
||||||
|
id: new arrow.Int32(),
|
||||||
|
text: func.sourceField(new arrow.Utf8() as apiArrow.DataType),
|
||||||
|
vector: func.vectorField(),
|
||||||
|
});
|
||||||
|
const expectedMetadata = new Map<string, string>([
|
||||||
|
[
|
||||||
|
"embedding_functions",
|
||||||
|
JSON.stringify([
|
||||||
|
{
|
||||||
|
sourceColumn: "text",
|
||||||
|
vectorColumn: "vector",
|
||||||
|
name: "MockEmbeddingFunction",
|
||||||
|
model: { someText: "hello" },
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
expect(schema.metadata).toEqual(expectedMetadata);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -16,11 +16,10 @@ import * as fs from "fs";
|
|||||||
import * as path from "path";
|
import * as path from "path";
|
||||||
import * as tmp from "tmp";
|
import * as tmp from "tmp";
|
||||||
|
|
||||||
import * as arrow13 from "apache-arrow-13";
|
|
||||||
import * as arrow14 from "apache-arrow-14";
|
|
||||||
import * as arrow15 from "apache-arrow-15";
|
import * as arrow15 from "apache-arrow-15";
|
||||||
import * as arrow16 from "apache-arrow-16";
|
import * as arrow16 from "apache-arrow-16";
|
||||||
import * as arrow17 from "apache-arrow-17";
|
import * as arrow17 from "apache-arrow-17";
|
||||||
|
import * as arrow18 from "apache-arrow-18";
|
||||||
|
|
||||||
import { Table, connect } from "../lancedb";
|
import { Table, connect } from "../lancedb";
|
||||||
import {
|
import {
|
||||||
@@ -44,7 +43,7 @@ import {
|
|||||||
} from "../lancedb/embedding";
|
} from "../lancedb/embedding";
|
||||||
import { Index } from "../lancedb/indices";
|
import { Index } from "../lancedb/indices";
|
||||||
|
|
||||||
describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||||
"Given a table",
|
"Given a table",
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
(arrow: any) => {
|
(arrow: any) => {
|
||||||
@@ -52,11 +51,10 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
|||||||
let table: Table;
|
let table: Table;
|
||||||
|
|
||||||
const schema:
|
const schema:
|
||||||
| import("apache-arrow-13").Schema
|
|
||||||
| import("apache-arrow-14").Schema
|
|
||||||
| import("apache-arrow-15").Schema
|
| import("apache-arrow-15").Schema
|
||||||
| import("apache-arrow-16").Schema
|
| import("apache-arrow-16").Schema
|
||||||
| import("apache-arrow-17").Schema = new arrow.Schema([
|
| import("apache-arrow-17").Schema
|
||||||
|
| import("apache-arrow-18").Schema = new arrow.Schema([
|
||||||
new arrow.Field("id", new arrow.Float64(), true),
|
new arrow.Field("id", new arrow.Float64(), true),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -569,6 +567,15 @@ describe("When creating an index", () => {
|
|||||||
// TODO: Verify parameters when we can load index config as part of list indices
|
// TODO: Verify parameters when we can load index config as part of list indices
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should be able to create 4bit IVF_PQ", async () => {
|
||||||
|
await tbl.createIndex("vec", {
|
||||||
|
config: Index.ivfPq({
|
||||||
|
numPartitions: 10,
|
||||||
|
numBits: 4,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("should allow me to replace (or not) an existing index", async () => {
|
it("should allow me to replace (or not) an existing index", async () => {
|
||||||
await tbl.createIndex("id");
|
await tbl.createIndex("id");
|
||||||
// Default is replace=true
|
// Default is replace=true
|
||||||
@@ -825,6 +832,18 @@ describe("schema evolution", function () {
|
|||||||
new Field("price", new Float64(), true),
|
new Field("price", new Float64(), true),
|
||||||
]);
|
]);
|
||||||
expect(await table.schema()).toEqual(expectedSchema);
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
|
|
||||||
|
await table.alterColumns([{ path: "new_id", dataType: "int32" }]);
|
||||||
|
const expectedSchema2 = new Schema([
|
||||||
|
new Field("new_id", new Int32(), true),
|
||||||
|
new Field(
|
||||||
|
"vector",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
]);
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema2);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("can drop a column from the schema", async function () {
|
it("can drop a column from the schema", async function () {
|
||||||
@@ -927,7 +946,7 @@ describe("when optimizing a dataset", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||||
"when optimizing a dataset",
|
"when optimizing a dataset",
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
(arrow: any) => {
|
(arrow: any) => {
|
||||||
|
|||||||
@@ -116,6 +116,26 @@ test("basic table examples", async () => {
|
|||||||
await tbl.add(data);
|
await tbl.add(data);
|
||||||
// --8<-- [end:add_data]
|
// --8<-- [end:add_data]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// --8<-- [start:add_columns]
|
||||||
|
await tbl.addColumns([{ name: "double_price", valueSql: "price * 2" }]);
|
||||||
|
// --8<-- [end:add_columns]
|
||||||
|
// --8<-- [start:alter_columns]
|
||||||
|
await tbl.alterColumns([
|
||||||
|
{
|
||||||
|
path: "double_price",
|
||||||
|
rename: "dbl_price",
|
||||||
|
dataType: "float",
|
||||||
|
nullable: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
// --8<-- [end:alter_columns]
|
||||||
|
// --8<-- [start:drop_columns]
|
||||||
|
await tbl.dropColumns(["dbl_price"]);
|
||||||
|
// --8<-- [end:drop_columns]
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// --8<-- [start:vector_search]
|
// --8<-- [start:vector_search]
|
||||||
const res = await tbl.search([100, 100]).limit(2).toArray();
|
const res = await tbl.search([100, 100]).limit(2).toArray();
|
||||||
|
|||||||
@@ -47,6 +47,16 @@ export interface IvfPqOptions {
|
|||||||
*/
|
*/
|
||||||
numSubVectors?: number;
|
numSubVectors?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of bits per sub-vector.
|
||||||
|
*
|
||||||
|
* This value controls how much each subvector is compressed. The more bits the more
|
||||||
|
* accurate the index will be but the slower search. The default is 8 bits.
|
||||||
|
*
|
||||||
|
* The number of bits must be 4 or 8.
|
||||||
|
*/
|
||||||
|
numBits?: number;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Distance type to use to build the index.
|
* Distance type to use to build the index.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.1-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
152
nodejs/package-lock.json
generated
152
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.13.0",
|
"version": "0.14.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.13.0",
|
"version": "0.14.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -31,11 +31,10 @@
|
|||||||
"@types/jest": "^29.1.2",
|
"@types/jest": "^29.1.2",
|
||||||
"@types/node": "^22.7.4",
|
"@types/node": "^22.7.4",
|
||||||
"@types/tmp": "^0.2.6",
|
"@types/tmp": "^0.2.6",
|
||||||
"apache-arrow-13": "npm:apache-arrow@13.0.0",
|
|
||||||
"apache-arrow-14": "npm:apache-arrow@14.0.0",
|
|
||||||
"apache-arrow-15": "npm:apache-arrow@15.0.0",
|
"apache-arrow-15": "npm:apache-arrow@15.0.0",
|
||||||
"apache-arrow-16": "npm:apache-arrow@16.0.0",
|
"apache-arrow-16": "npm:apache-arrow@16.0.0",
|
||||||
"apache-arrow-17": "npm:apache-arrow@17.0.0",
|
"apache-arrow-17": "npm:apache-arrow@17.0.0",
|
||||||
|
"apache-arrow-18": "npm:apache-arrow@18.0.0",
|
||||||
"eslint": "^8.57.0",
|
"eslint": "^8.57.0",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"shx": "^0.3.4",
|
"shx": "^0.3.4",
|
||||||
@@ -54,7 +53,7 @@
|
|||||||
"openai": "^4.29.2"
|
"openai": "^4.29.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"apache-arrow": ">=13.0.0 <=17.0.0"
|
"apache-arrow": ">=15.0.0 <=18.1.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@75lb/deep-merge": {
|
"node_modules/@75lb/deep-merge": {
|
||||||
@@ -5146,12 +5145,6 @@
|
|||||||
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
||||||
"devOptional": true
|
"devOptional": true
|
||||||
},
|
},
|
||||||
"node_modules/@types/pad-left": {
|
|
||||||
"version": "2.1.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/pad-left/-/pad-left-2.1.1.tgz",
|
|
||||||
"integrity": "sha512-Xd22WCRBydkGSApl5Bw0PhAOHKSVjNL3E3AwzKaps96IMraPqy5BvZIsBVK6JLwdybUzjHnuWVwpDd0JjTfHXA==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/@types/semver": {
|
"node_modules/@types/semver": {
|
||||||
"version": "7.5.6",
|
"version": "7.5.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.6.tgz",
|
||||||
@@ -5341,74 +5334,6 @@
|
|||||||
"arrow2csv": "bin/arrow2csv.cjs"
|
"arrow2csv": "bin/arrow2csv.cjs"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/apache-arrow-13": {
|
|
||||||
"name": "apache-arrow",
|
|
||||||
"version": "13.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-13.0.0.tgz",
|
|
||||||
"integrity": "sha512-3gvCX0GDawWz6KFNC28p65U+zGh/LZ6ZNKWNu74N6CQlKzxeoWHpi4CgEQsgRSEMuyrIIXi1Ea2syja7dwcHvw==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"@types/command-line-args": "5.2.0",
|
|
||||||
"@types/command-line-usage": "5.0.2",
|
|
||||||
"@types/node": "20.3.0",
|
|
||||||
"@types/pad-left": "2.1.1",
|
|
||||||
"command-line-args": "5.2.1",
|
|
||||||
"command-line-usage": "7.0.1",
|
|
||||||
"flatbuffers": "23.5.26",
|
|
||||||
"json-bignum": "^0.0.3",
|
|
||||||
"pad-left": "^2.1.0",
|
|
||||||
"tslib": "^2.5.3"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"arrow2csv": "bin/arrow2csv.js"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-13/node_modules/@types/command-line-args": {
|
|
||||||
"version": "5.2.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
|
||||||
"integrity": "sha512-UuKzKpJJ/Ief6ufIaIzr3A/0XnluX7RvFgwkV89Yzvm77wCh1kFaFmqN8XEnGcN62EuHdedQjEMb8mYxFLGPyA==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-13/node_modules/@types/node": {
|
|
||||||
"version": "20.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
|
|
||||||
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-14": {
|
|
||||||
"name": "apache-arrow",
|
|
||||||
"version": "14.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-14.0.0.tgz",
|
|
||||||
"integrity": "sha512-9cKE24YxkaqAZWJddrVnjUJMLwq6CokOjK+AHpm145rMJNsBZXQkzqouemQyEX0+/iHYRnGym6X6ZgNcHHrcWA==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"@types/command-line-args": "5.2.0",
|
|
||||||
"@types/command-line-usage": "5.0.2",
|
|
||||||
"@types/node": "20.3.0",
|
|
||||||
"@types/pad-left": "2.1.1",
|
|
||||||
"command-line-args": "5.2.1",
|
|
||||||
"command-line-usage": "7.0.1",
|
|
||||||
"flatbuffers": "23.5.26",
|
|
||||||
"json-bignum": "^0.0.3",
|
|
||||||
"pad-left": "^2.1.0",
|
|
||||||
"tslib": "^2.5.3"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"arrow2csv": "bin/arrow2csv.js"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-14/node_modules/@types/command-line-args": {
|
|
||||||
"version": "5.2.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz",
|
|
||||||
"integrity": "sha512-UuKzKpJJ/Ief6ufIaIzr3A/0XnluX7RvFgwkV89Yzvm77wCh1kFaFmqN8XEnGcN62EuHdedQjEMb8mYxFLGPyA==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-14/node_modules/@types/node": {
|
|
||||||
"version": "20.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
|
|
||||||
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/apache-arrow-15": {
|
"node_modules/apache-arrow-15": {
|
||||||
"name": "apache-arrow",
|
"name": "apache-arrow",
|
||||||
"version": "15.0.0",
|
"version": "15.0.0",
|
||||||
@@ -5529,6 +5454,54 @@
|
|||||||
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/apache-arrow-18": {
|
||||||
|
"name": "apache-arrow",
|
||||||
|
"version": "18.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-18.0.0.tgz",
|
||||||
|
"integrity": "sha512-gFlPaqN9osetbB83zC29AbbZqGiCuFH1vyyPseJ+B7SIbfBtESV62mMT/CkiIt77W6ykC/nTWFzTXFs0Uldg4g==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@swc/helpers": "^0.5.11",
|
||||||
|
"@types/command-line-args": "^5.2.3",
|
||||||
|
"@types/command-line-usage": "^5.0.4",
|
||||||
|
"@types/node": "^20.13.0",
|
||||||
|
"command-line-args": "^5.2.1",
|
||||||
|
"command-line-usage": "^7.0.1",
|
||||||
|
"flatbuffers": "^24.3.25",
|
||||||
|
"json-bignum": "^0.0.3",
|
||||||
|
"tslib": "^2.6.2"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"arrow2csv": "bin/arrow2csv.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/apache-arrow-18/node_modules/@types/command-line-usage": {
|
||||||
|
"version": "5.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.4.tgz",
|
||||||
|
"integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
|
"node_modules/apache-arrow-18/node_modules/@types/node": {
|
||||||
|
"version": "20.17.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.9.tgz",
|
||||||
|
"integrity": "sha512-0JOXkRyLanfGPE2QRCwgxhzlBAvaRdCNMcvbd7jFfpmD4eEXll7LRwy5ymJmyeZqk7Nh7eD2LeUyQ68BbndmXw==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~6.19.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/apache-arrow-18/node_modules/flatbuffers": {
|
||||||
|
"version": "24.3.25",
|
||||||
|
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz",
|
||||||
|
"integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
|
"node_modules/apache-arrow-18/node_modules/undici-types": {
|
||||||
|
"version": "6.19.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
|
||||||
|
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
"node_modules/apache-arrow/node_modules/@types/node": {
|
"node_modules/apache-arrow/node_modules/@types/node": {
|
||||||
"version": "20.16.10",
|
"version": "20.16.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
|
||||||
@@ -8533,18 +8506,6 @@
|
|||||||
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==",
|
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"node_modules/pad-left": {
|
|
||||||
"version": "2.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/pad-left/-/pad-left-2.1.0.tgz",
|
|
||||||
"integrity": "sha512-HJxs9K9AztdIQIAIa/OIazRAUW/L6B9hbQDxO4X07roW3eo9XqZc2ur9bn1StH9CnbbI9EgvejHQX7CBpCF1QA==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"repeat-string": "^1.5.4"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.10.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/parent-module": {
|
"node_modules/parent-module": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
|
||||||
@@ -8885,15 +8846,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.2.2.tgz",
|
||||||
"integrity": "sha512-urBwgfrvVP/eAyXx4hluJivBKzuEbSQs9rKWCrCkbSxNv8mxPcUZKeuoF3Uy4mJl3Lwprp6yy5/39VWigZ4K6Q=="
|
"integrity": "sha512-urBwgfrvVP/eAyXx4hluJivBKzuEbSQs9rKWCrCkbSxNv8mxPcUZKeuoF3Uy4mJl3Lwprp6yy5/39VWigZ4K6Q=="
|
||||||
},
|
},
|
||||||
"node_modules/repeat-string": {
|
|
||||||
"version": "1.6.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz",
|
|
||||||
"integrity": "sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/require-directory": {
|
"node_modules/require-directory": {
|
||||||
"version": "2.1.1",
|
"version": "2.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||||
|
|||||||
@@ -10,7 +10,8 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.14.0-beta.0",
|
"private": false,
|
||||||
|
"version": "0.14.1-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@@ -30,7 +31,8 @@
|
|||||||
"aarch64-unknown-linux-gnu",
|
"aarch64-unknown-linux-gnu",
|
||||||
"x86_64-unknown-linux-musl",
|
"x86_64-unknown-linux-musl",
|
||||||
"aarch64-unknown-linux-musl",
|
"aarch64-unknown-linux-musl",
|
||||||
"x86_64-pc-windows-msvc"
|
"x86_64-pc-windows-msvc",
|
||||||
|
"aarch64-pc-windows-msvc"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -46,11 +48,10 @@
|
|||||||
"@types/jest": "^29.1.2",
|
"@types/jest": "^29.1.2",
|
||||||
"@types/node": "^22.7.4",
|
"@types/node": "^22.7.4",
|
||||||
"@types/tmp": "^0.2.6",
|
"@types/tmp": "^0.2.6",
|
||||||
"apache-arrow-13": "npm:apache-arrow@13.0.0",
|
|
||||||
"apache-arrow-14": "npm:apache-arrow@14.0.0",
|
|
||||||
"apache-arrow-15": "npm:apache-arrow@15.0.0",
|
"apache-arrow-15": "npm:apache-arrow@15.0.0",
|
||||||
"apache-arrow-16": "npm:apache-arrow@16.0.0",
|
"apache-arrow-16": "npm:apache-arrow@16.0.0",
|
||||||
"apache-arrow-17": "npm:apache-arrow@17.0.0",
|
"apache-arrow-17": "npm:apache-arrow@17.0.0",
|
||||||
|
"apache-arrow-18": "npm:apache-arrow@18.0.0",
|
||||||
"eslint": "^8.57.0",
|
"eslint": "^8.57.0",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"shx": "^0.3.4",
|
"shx": "^0.3.4",
|
||||||
@@ -77,6 +78,7 @@
|
|||||||
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
|
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
|
||||||
"lint-ci": "biome ci .",
|
"lint-ci": "biome ci .",
|
||||||
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
|
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
|
||||||
|
"postdocs": "node typedoc_post_process.js",
|
||||||
"lint": "biome check . && biome format .",
|
"lint": "biome check . && biome format .",
|
||||||
"lint-fix": "biome check --write . && biome format --write .",
|
"lint-fix": "biome check --write . && biome format --write .",
|
||||||
"prepublishOnly": "napi prepublish -t npm",
|
"prepublishOnly": "napi prepublish -t npm",
|
||||||
@@ -93,6 +95,6 @@
|
|||||||
"openai": "^4.29.2"
|
"openai": "^4.29.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"apache-arrow": ">=13.0.0 <=17.0.0"
|
"apache-arrow": ">=15.0.0 <=18.1.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ impl Index {
|
|||||||
distance_type: Option<String>,
|
distance_type: Option<String>,
|
||||||
num_partitions: Option<u32>,
|
num_partitions: Option<u32>,
|
||||||
num_sub_vectors: Option<u32>,
|
num_sub_vectors: Option<u32>,
|
||||||
|
num_bits: Option<u32>,
|
||||||
max_iterations: Option<u32>,
|
max_iterations: Option<u32>,
|
||||||
sample_rate: Option<u32>,
|
sample_rate: Option<u32>,
|
||||||
) -> napi::Result<Self> {
|
) -> napi::Result<Self> {
|
||||||
@@ -59,6 +60,9 @@ impl Index {
|
|||||||
if let Some(num_sub_vectors) = num_sub_vectors {
|
if let Some(num_sub_vectors) = num_sub_vectors {
|
||||||
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
|
if let Some(num_bits) = num_bits {
|
||||||
|
ivf_pq_builder = ivf_pq_builder.num_bits(num_bits);
|
||||||
|
}
|
||||||
if let Some(max_iterations) = max_iterations {
|
if let Some(max_iterations) = max_iterations {
|
||||||
ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations);
|
ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -178,16 +178,20 @@ impl Table {
|
|||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
||||||
for alteration in &alterations {
|
for alteration in &alterations {
|
||||||
if alteration.rename.is_none() && alteration.nullable.is_none() {
|
if alteration.rename.is_none()
|
||||||
|
&& alteration.nullable.is_none()
|
||||||
|
&& alteration.data_type.is_none()
|
||||||
|
{
|
||||||
return Err(napi::Error::from_reason(
|
return Err(napi::Error::from_reason(
|
||||||
"Alteration must have a 'rename' or 'nullable' field.",
|
"Alteration must have a 'rename', 'dataType', or 'nullable' field.",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let alterations = alterations
|
let alterations = alterations
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(LanceColumnAlteration::from)
|
.map(LanceColumnAlteration::try_from)
|
||||||
.collect::<Vec<_>>();
|
.collect::<std::result::Result<Vec<_>, String>>()
|
||||||
|
.map_err(napi::Error::from_reason)?;
|
||||||
|
|
||||||
self.inner_ref()?
|
self.inner_ref()?
|
||||||
.alter_columns(&alterations)
|
.alter_columns(&alterations)
|
||||||
@@ -433,24 +437,43 @@ pub struct ColumnAlteration {
|
|||||||
/// The new name of the column. If not provided then the name will not be changed.
|
/// The new name of the column. If not provided then the name will not be changed.
|
||||||
/// This must be distinct from the names of all other columns in the table.
|
/// This must be distinct from the names of all other columns in the table.
|
||||||
pub rename: Option<String>,
|
pub rename: Option<String>,
|
||||||
|
/// A new data type for the column. If not provided then the data type will not be changed.
|
||||||
|
/// Changing data types is limited to casting to the same general type. For example, these
|
||||||
|
/// changes are valid:
|
||||||
|
/// * `int32` -> `int64` (integers)
|
||||||
|
/// * `double` -> `float` (floats)
|
||||||
|
/// * `string` -> `large_string` (strings)
|
||||||
|
/// But these changes are not:
|
||||||
|
/// * `int32` -> `double` (mix integers and floats)
|
||||||
|
/// * `string` -> `int32` (mix strings and integers)
|
||||||
|
pub data_type: Option<String>,
|
||||||
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||||
pub nullable: Option<bool>,
|
pub nullable: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ColumnAlteration> for LanceColumnAlteration {
|
impl TryFrom<ColumnAlteration> for LanceColumnAlteration {
|
||||||
fn from(js: ColumnAlteration) -> Self {
|
type Error = String;
|
||||||
|
fn try_from(js: ColumnAlteration) -> std::result::Result<Self, Self::Error> {
|
||||||
let ColumnAlteration {
|
let ColumnAlteration {
|
||||||
path,
|
path,
|
||||||
rename,
|
rename,
|
||||||
nullable,
|
nullable,
|
||||||
|
data_type,
|
||||||
} = js;
|
} = js;
|
||||||
Self {
|
let data_type = if let Some(data_type) = data_type {
|
||||||
|
Some(
|
||||||
|
lancedb::utils::string_to_datatype(&data_type)
|
||||||
|
.ok_or_else(|| format!("Invalid data type: {}", data_type))?,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Ok(Self {
|
||||||
path,
|
path,
|
||||||
rename,
|
rename,
|
||||||
nullable,
|
nullable,
|
||||||
// TODO: wire up this field
|
data_type,
|
||||||
data_type: None,
|
})
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,5 +8,6 @@
|
|||||||
"lancedb/native.d.ts:Table"
|
"lancedb/native.d.ts:Table"
|
||||||
],
|
],
|
||||||
"useHTMLEncodedBrackets": true,
|
"useHTMLEncodedBrackets": true,
|
||||||
|
"useCodeBlocks": true,
|
||||||
"disableSources": true
|
"disableSources": true
|
||||||
}
|
}
|
||||||
|
|||||||
63
nodejs/typedoc_post_process.js
Normal file
63
nodejs/typedoc_post_process.js
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
const fs = require("fs");
|
||||||
|
const path = require("path");
|
||||||
|
|
||||||
|
// Read all files in the directory
|
||||||
|
function processDirectory(directoryPath) {
|
||||||
|
fs.readdir(directoryPath, { withFileTypes: true }, (err, files) => {
|
||||||
|
if (err) {
|
||||||
|
return console.error("Unable to scan directory: " + err);
|
||||||
|
}
|
||||||
|
|
||||||
|
files.forEach((file) => {
|
||||||
|
const filePath = path.join(directoryPath, file.name);
|
||||||
|
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
// Recursively process subdirectory
|
||||||
|
processDirectory(filePath);
|
||||||
|
} else if (file.isFile()) {
|
||||||
|
// Read each file
|
||||||
|
fs.readFile(filePath, "utf8", (err, data) => {
|
||||||
|
if (err) {
|
||||||
|
return console.error("Unable to read file: " + err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the file content
|
||||||
|
const processedData = processContents(data);
|
||||||
|
|
||||||
|
// Write the processed content back to the file
|
||||||
|
fs.writeFile(filePath, processedData, "utf8", (err) => {
|
||||||
|
if (err) {
|
||||||
|
return console.error("Unable to write file: " + err);
|
||||||
|
}
|
||||||
|
console.log(`Processed file: ${filePath}`);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function processContents(contents) {
|
||||||
|
// This changes the parameters section to put the parameter description on
|
||||||
|
// the same line as the bullet with the parameter name and type.
|
||||||
|
return contents.replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => {
|
||||||
|
let lines = match
|
||||||
|
.split("\n")
|
||||||
|
.map((line) => line.trim())
|
||||||
|
|
||||||
|
.filter((line) => line !== "")
|
||||||
|
.map((line) => {
|
||||||
|
if (line.startsWith("##")) {
|
||||||
|
return line;
|
||||||
|
} else if (line.startsWith("•")) {
|
||||||
|
return "\n*" + line.substring(1);
|
||||||
|
} else {
|
||||||
|
return " " + line;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return lines.join("\n") + "\n\n";
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start processing from the root directory
|
||||||
|
processDirectory("../docs/src/js");
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.17.0-beta.2"
|
current_version = "0.17.1-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.17.0-beta.2"
|
version = "0.17.1-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -14,23 +14,18 @@ name = "_lancedb"
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "52.1", features = ["pyarrow"] }
|
arrow = { version = "53.2", features = ["pyarrow"] }
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.21", features = [
|
pyo3 = { version = "0.22.2", features = [
|
||||||
"extension-module",
|
"extension-module",
|
||||||
"abi3-py39",
|
"abi3-py39",
|
||||||
"gil-refs"
|
"gil-refs"
|
||||||
] }
|
] }
|
||||||
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
pyo3-async-runtimes = { version = "0.22", features = ["attributes", "tokio-runtime"] }
|
||||||
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
|
||||||
pyo3-asyncio-0-21 = { version = "0.21.0", features = [
|
|
||||||
"attributes",
|
|
||||||
"tokio-runtime"
|
|
||||||
] }
|
|
||||||
pin-project = "1.1.5"
|
pin-project = "1.1.5"
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
tokio = { version = "1.36.0", features = ["sync"] }
|
tokio = { version = "1.40", features = ["sync"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
pyo3-build-config = { version = "0.20.3", features = [
|
pyo3-build-config = { version = "0.20.3", features = [
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.20.0b3",
|
"pylance==0.20.0",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"packaging",
|
"packaging",
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ def connect(
|
|||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DBConnection:
|
) -> DBConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
@@ -67,6 +68,9 @@ def connect(
|
|||||||
Configuration options for the LanceDB Cloud HTTP client. If a dict, then
|
Configuration options for the LanceDB Cloud HTTP client. If a dict, then
|
||||||
the keys are the attributes of the ClientConfig class. If None, then the
|
the keys are the attributes of the ClientConfig class. If None, then the
|
||||||
default configuration is used.
|
default configuration is used.
|
||||||
|
storage_options: dict, optional
|
||||||
|
Additional options for the storage backend. See available options at
|
||||||
|
https://lancedb.github.io/lancedb/guides/storage/
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -106,12 +110,17 @@ def connect(
|
|||||||
# TODO: remove this (deprecation warning downstream)
|
# TODO: remove this (deprecation warning downstream)
|
||||||
request_thread_pool=request_thread_pool,
|
request_thread_pool=request_thread_pool,
|
||||||
client_config=client_config,
|
client_config=client_config,
|
||||||
|
storage_options=storage_options,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
if kwargs:
|
if kwargs:
|
||||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||||
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)
|
return LanceDBConnection(
|
||||||
|
uri,
|
||||||
|
read_consistency_interval=read_consistency_interval,
|
||||||
|
storage_options=storage_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def connect_async(
|
async def connect_async(
|
||||||
|
|||||||
@@ -79,9 +79,21 @@ class Query:
|
|||||||
def limit(self, limit: int): ...
|
def limit(self, limit: int): ...
|
||||||
def offset(self, offset: int): ...
|
def offset(self, offset: int): ...
|
||||||
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
||||||
def nearest_to_text(self, query: dict) -> Query: ...
|
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
||||||
async def execute(self, max_batch_legnth: Optional[int]) -> RecordBatchStream: ...
|
async def execute(self, max_batch_legnth: Optional[int]) -> RecordBatchStream: ...
|
||||||
|
|
||||||
|
class FTSQuery:
|
||||||
|
def where(self, filter: str): ...
|
||||||
|
def select(self, columns: List[str]): ...
|
||||||
|
def limit(self, limit: int): ...
|
||||||
|
def offset(self, offset: int): ...
|
||||||
|
def fast_search(self): ...
|
||||||
|
def with_row_id(self): ...
|
||||||
|
def postfilter(self): ...
|
||||||
|
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
||||||
|
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
|
||||||
|
async def explain_plan(self) -> str: ...
|
||||||
|
|
||||||
class VectorQuery:
|
class VectorQuery:
|
||||||
async def execute(self) -> RecordBatchStream: ...
|
async def execute(self) -> RecordBatchStream: ...
|
||||||
def where(self, filter: str): ...
|
def where(self, filter: str): ...
|
||||||
@@ -95,6 +107,24 @@ class VectorQuery:
|
|||||||
def refine_factor(self, refine_factor: int): ...
|
def refine_factor(self, refine_factor: int): ...
|
||||||
def nprobes(self, nprobes: int): ...
|
def nprobes(self, nprobes: int): ...
|
||||||
def bypass_vector_index(self): ...
|
def bypass_vector_index(self): ...
|
||||||
|
def nearest_to_text(self, query: dict) -> HybridQuery: ...
|
||||||
|
|
||||||
|
class HybridQuery:
|
||||||
|
def where(self, filter: str): ...
|
||||||
|
def select(self, columns: List[str]): ...
|
||||||
|
def limit(self, limit: int): ...
|
||||||
|
def offset(self, offset: int): ...
|
||||||
|
def fast_search(self): ...
|
||||||
|
def with_row_id(self): ...
|
||||||
|
def postfilter(self): ...
|
||||||
|
def distance_type(self, distance_type: str): ...
|
||||||
|
def refine_factor(self, refine_factor: int): ...
|
||||||
|
def nprobes(self, nprobes: int): ...
|
||||||
|
def bypass_vector_index(self): ...
|
||||||
|
def to_vector_query(self) -> VectorQuery: ...
|
||||||
|
def to_fts_query(self) -> FTSQuery: ...
|
||||||
|
def get_limit(self) -> int: ...
|
||||||
|
def get_with_row_id(self) -> bool: ...
|
||||||
|
|
||||||
class CompactionStats:
|
class CompactionStats:
|
||||||
fragments_removed: int
|
fragments_removed: int
|
||||||
|
|||||||
@@ -13,34 +13,29 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
from overrides import EnforceOverrides, override
|
from overrides import EnforceOverrides, override
|
||||||
from pyarrow import fs
|
|
||||||
|
|
||||||
from lancedb.common import data_to_reader, validate_schema
|
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||||
|
from lancedb.background_loop import BackgroundEventLoop
|
||||||
|
|
||||||
from ._lancedb import connect as lancedb_connect
|
from ._lancedb import connect as lancedb_connect
|
||||||
from .table import (
|
from .table import (
|
||||||
AsyncTable,
|
AsyncTable,
|
||||||
LanceTable,
|
LanceTable,
|
||||||
Table,
|
Table,
|
||||||
_table_path,
|
|
||||||
sanitize_create_table,
|
sanitize_create_table,
|
||||||
)
|
)
|
||||||
from .util import (
|
from .util import (
|
||||||
fs_from_uri,
|
|
||||||
get_uri_location,
|
|
||||||
get_uri_scheme,
|
get_uri_scheme,
|
||||||
validate_table_name,
|
validate_table_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
import pyarrow as pa
|
||||||
from .pydantic import LanceModel
|
from .pydantic import LanceModel
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
@@ -48,6 +43,8 @@ if TYPE_CHECKING:
|
|||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
|
|
||||||
|
LOOP = BackgroundEventLoop()
|
||||||
|
|
||||||
|
|
||||||
class DBConnection(EnforceOverrides):
|
class DBConnection(EnforceOverrides):
|
||||||
"""An active LanceDB connection interface."""
|
"""An active LanceDB connection interface."""
|
||||||
@@ -180,6 +177,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
control over how data is saved, either provide the PyArrow schema to
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
>>> custom_schema = pa.schema([
|
>>> custom_schema = pa.schema([
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
... pa.field("lat", pa.float32()),
|
... pa.field("lat", pa.float32()),
|
||||||
@@ -327,7 +325,11 @@ class LanceDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, uri: URI, *, read_consistency_interval: Optional[timedelta] = None
|
self,
|
||||||
|
uri: URI,
|
||||||
|
*,
|
||||||
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
):
|
):
|
||||||
if not isinstance(uri, Path):
|
if not isinstance(uri, Path):
|
||||||
scheme = get_uri_scheme(uri)
|
scheme = get_uri_scheme(uri)
|
||||||
@@ -338,9 +340,27 @@ class LanceDBConnection(DBConnection):
|
|||||||
uri = uri.expanduser().absolute()
|
uri = uri.expanduser().absolute()
|
||||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||||
self._uri = str(uri)
|
self._uri = str(uri)
|
||||||
|
|
||||||
self._entered = False
|
self._entered = False
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
|
self.storage_options = storage_options
|
||||||
|
|
||||||
|
if read_consistency_interval is not None:
|
||||||
|
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
||||||
|
else:
|
||||||
|
read_consistency_interval_secs = None
|
||||||
|
|
||||||
|
async def do_connect():
|
||||||
|
return await lancedb_connect(
|
||||||
|
sanitize_uri(uri),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
read_consistency_interval_secs,
|
||||||
|
None,
|
||||||
|
storage_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
val = f"{self.__class__.__name__}({self._uri}"
|
val = f"{self.__class__.__name__}({self._uri}"
|
||||||
@@ -364,32 +384,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
Iterator of str.
|
Iterator of str.
|
||||||
A list of table names.
|
A list of table names.
|
||||||
"""
|
"""
|
||||||
try:
|
return LOOP.run(self._conn.table_names(start_after=page_token, limit=limit))
|
||||||
asyncio.get_running_loop()
|
|
||||||
# User application is async. Soon we will just tell them to use the
|
|
||||||
# async version. Until then fallback to the old sync implementation.
|
|
||||||
try:
|
|
||||||
filesystem = fs_from_uri(self.uri)[0]
|
|
||||||
except pa.ArrowInvalid:
|
|
||||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loc = get_uri_location(self.uri)
|
|
||||||
paths = filesystem.get_file_info(fs.FileSelector(loc))
|
|
||||||
except FileNotFoundError:
|
|
||||||
# It is ok if the file does not exist since it will be created
|
|
||||||
paths = []
|
|
||||||
tables = [
|
|
||||||
os.path.splitext(file_info.base_name)[0]
|
|
||||||
for file_info in paths
|
|
||||||
if file_info.extension == "lance"
|
|
||||||
]
|
|
||||||
tables.sort()
|
|
||||||
return tables
|
|
||||||
except RuntimeError:
|
|
||||||
# User application is sync. It is safe to use the async implementation
|
|
||||||
# under the hood.
|
|
||||||
return asyncio.run(self._async_get_table_names(page_token, limit))
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self.table_names())
|
return len(self.table_names())
|
||||||
@@ -461,19 +456,16 @@ class LanceDBConnection(DBConnection):
|
|||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
table_uri = _table_path(self.uri, name)
|
LOOP.run(self._conn.drop_table(name))
|
||||||
filesystem, path = fs_from_uri(table_uri)
|
except ValueError as e:
|
||||||
filesystem.delete_dir(path)
|
|
||||||
except FileNotFoundError:
|
|
||||||
if not ignore_missing:
|
if not ignore_missing:
|
||||||
raise
|
raise e
|
||||||
|
if f"Table '{name}' was not found" not in str(e):
|
||||||
|
raise e
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_database(self):
|
def drop_database(self):
|
||||||
dummy_table_uri = _table_path(self.uri, "dummy")
|
LOOP.run(self._conn.drop_database())
|
||||||
uri = dummy_table_uri.removesuffix("dummy.lance")
|
|
||||||
filesystem, path = fs_from_uri(uri)
|
|
||||||
filesystem.delete_dir(path)
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncConnection(object):
|
class AsyncConnection(object):
|
||||||
@@ -689,6 +681,7 @@ class AsyncConnection(object):
|
|||||||
control over how data is saved, either provide the PyArrow schema to
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
>>> custom_schema = pa.schema([
|
>>> custom_schema = pa.schema([
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
... pa.field("lat", pa.float32()),
|
... pa.field("lat", pa.float32()),
|
||||||
|
|||||||
@@ -48,6 +48,9 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
|
|||||||
organization: Optional[str] = None
|
organization: Optional[str] = None
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
|
|
||||||
|
# Set true to use Azure OpenAI API
|
||||||
|
use_azure: bool = False
|
||||||
|
|
||||||
def ndims(self):
|
def ndims(self):
|
||||||
return self._ndims
|
return self._ndims
|
||||||
|
|
||||||
@@ -123,4 +126,8 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
|
|||||||
kwargs["organization"] = self.organization
|
kwargs["organization"] = self.organization
|
||||||
if self.api_key:
|
if self.api_key:
|
||||||
kwargs["api_key"] = self.api_key
|
kwargs["api_key"] = self.api_key
|
||||||
return openai.OpenAI(**kwargs)
|
|
||||||
|
if self.use_azure:
|
||||||
|
return openai.AzureOpenAI(**kwargs)
|
||||||
|
else:
|
||||||
|
return openai.OpenAI(**kwargs)
|
||||||
|
|||||||
@@ -12,18 +12,22 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import ClassVar, List, Union
|
from typing import ClassVar, TYPE_CHECKING, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
from ..util import attempt_import_or_raise
|
from ..util import attempt_import_or_raise
|
||||||
from .base import TextEmbeddingFunction
|
from .base import EmbeddingFunction
|
||||||
from .registry import register
|
from .registry import register
|
||||||
from .utils import api_key_not_found_help, TEXT
|
from .utils import api_key_not_found_help, IMAGES
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import PIL
|
||||||
|
|
||||||
|
|
||||||
@register("voyageai")
|
@register("voyageai")
|
||||||
class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||||
"""
|
"""
|
||||||
An embedding function that uses the VoyageAI API
|
An embedding function that uses the VoyageAI API
|
||||||
|
|
||||||
@@ -36,6 +40,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
|
|
||||||
* voyage-3
|
* voyage-3
|
||||||
* voyage-3-lite
|
* voyage-3-lite
|
||||||
|
* voyage-multimodal-3
|
||||||
* voyage-finance-2
|
* voyage-finance-2
|
||||||
* voyage-multilingual-2
|
* voyage-multilingual-2
|
||||||
* voyage-law-2
|
* voyage-law-2
|
||||||
@@ -54,7 +59,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
.create(name="voyage-3")
|
.create(name="voyage-3")
|
||||||
|
|
||||||
class TextModel(LanceModel):
|
class TextModel(LanceModel):
|
||||||
text: str = voyageai.SourceField()
|
data: str = voyageai.SourceField()
|
||||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||||
|
|
||||||
data = [ { "text": "hello world" },
|
data = [ { "text": "hello world" },
|
||||||
@@ -77,6 +82,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
return 1536
|
return 1536
|
||||||
elif self.name in [
|
elif self.name in [
|
||||||
"voyage-3",
|
"voyage-3",
|
||||||
|
"voyage-multimodal-3",
|
||||||
"voyage-finance-2",
|
"voyage-finance-2",
|
||||||
"voyage-multilingual-2",
|
"voyage-multilingual-2",
|
||||||
"voyage-law-2",
|
"voyage-law-2",
|
||||||
@@ -85,19 +91,19 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Model {self.name} not supported")
|
raise ValueError(f"Model {self.name} not supported")
|
||||||
|
|
||||||
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
|
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
|
||||||
return self.compute_source_embeddings(query, input_type="query")
|
"""
|
||||||
|
Sanitize the input to the embedding function.
|
||||||
|
"""
|
||||||
|
if isinstance(images, (str, bytes)):
|
||||||
|
images = [images]
|
||||||
|
elif isinstance(images, pa.Array):
|
||||||
|
images = images.to_pylist()
|
||||||
|
elif isinstance(images, pa.ChunkedArray):
|
||||||
|
images = images.combine_chunks().to_pylist()
|
||||||
|
return images
|
||||||
|
|
||||||
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
|
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
|
||||||
texts = self.sanitize_input(texts)
|
|
||||||
input_type = (
|
|
||||||
kwargs.get("input_type") or "document"
|
|
||||||
) # assume source input type if not passed by `compute_query_embeddings`
|
|
||||||
return self.generate_embeddings(texts, input_type=input_type)
|
|
||||||
|
|
||||||
def generate_embeddings(
|
|
||||||
self, texts: Union[List[str], np.ndarray], *args, **kwargs
|
|
||||||
) -> List[np.array]:
|
|
||||||
"""
|
"""
|
||||||
Get the embeddings for the given texts
|
Get the embeddings for the given texts
|
||||||
|
|
||||||
@@ -109,15 +115,55 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
|
|
||||||
truncation: Optional[bool]
|
truncation: Optional[bool]
|
||||||
"""
|
"""
|
||||||
VoyageAIEmbeddingFunction._init_client()
|
if self.name in ["voyage-multimodal-3"]:
|
||||||
rs = VoyageAIEmbeddingFunction.client.embed(
|
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||||
texts=texts, model=self.name, **kwargs
|
inputs=[[text]], model=self.name, **kwargs
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
rs = VoyageAIEmbeddingFunction._get_client().embed(
|
||||||
|
texts=[text], model=self.name, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
return [emb for emb in rs.embeddings]
|
return rs.embeddings[0]
|
||||||
|
|
||||||
|
def generate_image_embedding(
|
||||||
|
self, image: "PIL.Image.Image", **kwargs
|
||||||
|
) -> np.ndarray:
|
||||||
|
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||||
|
inputs=[[image]], model=self.name, **kwargs
|
||||||
|
)
|
||||||
|
return rs.embeddings[0]
|
||||||
|
|
||||||
|
def compute_query_embeddings(
|
||||||
|
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||||
|
) -> List[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Compute the embeddings for a given user query
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query : Union[str, PIL.Image.Image]
|
||||||
|
The query to embed. A query can be either text or an image.
|
||||||
|
"""
|
||||||
|
if isinstance(query, str):
|
||||||
|
return [self.generate_text_embeddings(query, input_type="query")]
|
||||||
|
else:
|
||||||
|
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||||
|
if isinstance(query, PIL.Image.Image):
|
||||||
|
return [self.generate_image_embedding(query, input_type="query")]
|
||||||
|
else:
|
||||||
|
raise TypeError("Only text PIL images supported as query")
|
||||||
|
|
||||||
|
def compute_source_embeddings(
|
||||||
|
self, images: IMAGES, *args, **kwargs
|
||||||
|
) -> List[np.array]:
|
||||||
|
images = self.sanitize_input(images)
|
||||||
|
return [
|
||||||
|
self.generate_image_embedding(img, input_type="document") for img in images
|
||||||
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _init_client():
|
def _get_client():
|
||||||
if VoyageAIEmbeddingFunction.client is None:
|
if VoyageAIEmbeddingFunction.client is None:
|
||||||
voyageai = attempt_import_or_raise("voyageai")
|
voyageai = attempt_import_or_raise("voyageai")
|
||||||
if os.environ.get("VOYAGE_API_KEY") is None:
|
if os.environ.get("VOYAGE_API_KEY") is None:
|
||||||
@@ -125,3 +171,4 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
||||||
os.environ["VOYAGE_API_KEY"]
|
os.environ["VOYAGE_API_KEY"]
|
||||||
)
|
)
|
||||||
|
return VoyageAIEmbeddingFunction.client
|
||||||
|
|||||||
@@ -110,7 +110,16 @@ class FTS:
|
|||||||
remove_stop_words: bool = False,
|
remove_stop_words: bool = False,
|
||||||
ascii_folding: bool = False,
|
ascii_folding: bool = False,
|
||||||
):
|
):
|
||||||
self._inner = LanceDbIndex.fts(with_position=with_position)
|
self._inner = LanceDbIndex.fts(
|
||||||
|
with_position=with_position,
|
||||||
|
base_tokenizer=base_tokenizer,
|
||||||
|
language=language,
|
||||||
|
max_token_length=max_token_length,
|
||||||
|
lower_case=lower_case,
|
||||||
|
stem=stem,
|
||||||
|
remove_stop_words=remove_stop_words,
|
||||||
|
ascii_folding=ascii_folding,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class HnswPq:
|
class HnswPq:
|
||||||
@@ -169,6 +178,12 @@ class HnswPq:
|
|||||||
If the dimension is not visible by 8 then we use 1 subvector. This is not
|
If the dimension is not visible by 8 then we use 1 subvector. This is not
|
||||||
ideal and will likely result in poor performance.
|
ideal and will likely result in poor performance.
|
||||||
|
|
||||||
|
num_bits: int, default 8
|
||||||
|
Number of bits to encode each sub-vector.
|
||||||
|
|
||||||
|
This value controls how much the sub-vectors are compressed. The more bits
|
||||||
|
the more accurate the index but the slower search. Only 4 and 8 are supported.
|
||||||
|
|
||||||
max_iterations, default 50
|
max_iterations, default 50
|
||||||
|
|
||||||
Max iterations to train kmeans.
|
Max iterations to train kmeans.
|
||||||
@@ -223,6 +238,7 @@ class HnswPq:
|
|||||||
distance_type: Optional[str] = None,
|
distance_type: Optional[str] = None,
|
||||||
num_partitions: Optional[int] = None,
|
num_partitions: Optional[int] = None,
|
||||||
num_sub_vectors: Optional[int] = None,
|
num_sub_vectors: Optional[int] = None,
|
||||||
|
num_bits: Optional[int] = None,
|
||||||
max_iterations: Optional[int] = None,
|
max_iterations: Optional[int] = None,
|
||||||
sample_rate: Optional[int] = None,
|
sample_rate: Optional[int] = None,
|
||||||
m: Optional[int] = None,
|
m: Optional[int] = None,
|
||||||
@@ -232,6 +248,7 @@ class HnswPq:
|
|||||||
distance_type=distance_type,
|
distance_type=distance_type,
|
||||||
num_partitions=num_partitions,
|
num_partitions=num_partitions,
|
||||||
num_sub_vectors=num_sub_vectors,
|
num_sub_vectors=num_sub_vectors,
|
||||||
|
num_bits=num_bits,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
m=m,
|
m=m,
|
||||||
@@ -378,6 +395,7 @@ class IvfPq:
|
|||||||
distance_type: Optional[str] = None,
|
distance_type: Optional[str] = None,
|
||||||
num_partitions: Optional[int] = None,
|
num_partitions: Optional[int] = None,
|
||||||
num_sub_vectors: Optional[int] = None,
|
num_sub_vectors: Optional[int] = None,
|
||||||
|
num_bits: Optional[int] = None,
|
||||||
max_iterations: Optional[int] = None,
|
max_iterations: Optional[int] = None,
|
||||||
sample_rate: Optional[int] = None,
|
sample_rate: Optional[int] = None,
|
||||||
):
|
):
|
||||||
@@ -440,6 +458,12 @@ class IvfPq:
|
|||||||
|
|
||||||
If the dimension is not visible by 8 then we use 1 subvector. This is not
|
If the dimension is not visible by 8 then we use 1 subvector. This is not
|
||||||
ideal and will likely result in poor performance.
|
ideal and will likely result in poor performance.
|
||||||
|
num_bits: int, default 8
|
||||||
|
Number of bits to encode each sub-vector.
|
||||||
|
|
||||||
|
This value controls how much the sub-vectors are compressed. The more bits
|
||||||
|
the more accurate the index but the slower search. The default is 8
|
||||||
|
bits. Only 4 and 8 are supported.
|
||||||
max_iterations: int, default 50
|
max_iterations: int, default 50
|
||||||
Max iteration to train kmeans.
|
Max iteration to train kmeans.
|
||||||
|
|
||||||
@@ -473,6 +497,7 @@ class IvfPq:
|
|||||||
distance_type=distance_type,
|
distance_type=distance_type,
|
||||||
num_partitions=num_partitions,
|
num_partitions=num_partitions,
|
||||||
num_sub_vectors=num_sub_vectors,
|
num_sub_vectors=num_sub_vectors,
|
||||||
|
num_bits=num_bits,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
)
|
)
|
||||||
|
|||||||
0
python/python/lancedb/integrations/__init__.py
Normal file
0
python/python/lancedb/integrations/__init__.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, List, Optional, Tuple, Union, Literal
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from ..table import Table
|
||||||
|
|
||||||
|
Filter = Union[str, pa.compute.Expression]
|
||||||
|
Keys = Union[str, List[str]]
|
||||||
|
JoinType = Literal[
|
||||||
|
"left semi",
|
||||||
|
"right semi",
|
||||||
|
"left anti",
|
||||||
|
"right anti",
|
||||||
|
"inner",
|
||||||
|
"left outer",
|
||||||
|
"right outer",
|
||||||
|
"full outer",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class PyarrowScannerAdapter(pa.dataset.Scanner):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
table: Table,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
self.table = table
|
||||||
|
self.columns = columns
|
||||||
|
self.filter = filter
|
||||||
|
self.batch_size = batch_size
|
||||||
|
if batch_readahead is not None:
|
||||||
|
logging.debug("ignoring batch_readahead which has no lance equivalent")
|
||||||
|
if fragment_readahead is not None:
|
||||||
|
logging.debug("ignoring fragment_readahead which has no lance equivalent")
|
||||||
|
if fragment_scan_options is not None:
|
||||||
|
raise NotImplementedError("fragment_scan_options not supported")
|
||||||
|
if use_threads is False:
|
||||||
|
raise NotImplementedError("use_threads=False not supported")
|
||||||
|
if memory_pool is not None:
|
||||||
|
raise NotImplementedError("memory_pool not supported")
|
||||||
|
|
||||||
|
def count_rows(self):
|
||||||
|
return self.table.count_rows(self.filter)
|
||||||
|
|
||||||
|
def from_batches(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def from_dataset(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def from_fragment(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def head(self, num_rows: int):
|
||||||
|
return self.to_reader(limit=num_rows).read_all()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def projected_schema(self):
|
||||||
|
return self.head(1).schema
|
||||||
|
|
||||||
|
def scan_batches(self):
|
||||||
|
return self.to_reader()
|
||||||
|
|
||||||
|
def take(self, indices: List[int]):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_batches(self):
|
||||||
|
return self.to_reader()
|
||||||
|
|
||||||
|
def to_table(self):
|
||||||
|
return self.to_reader().read_all()
|
||||||
|
|
||||||
|
def to_reader(self, *, limit: Optional[int] = None):
|
||||||
|
query = self.table.search()
|
||||||
|
# Disable the builtin limit
|
||||||
|
if limit is None:
|
||||||
|
num_rows = self.count_rows()
|
||||||
|
query.limit(num_rows)
|
||||||
|
elif limit <= 0:
|
||||||
|
raise ValueError("limit must be positive")
|
||||||
|
else:
|
||||||
|
query.limit(limit)
|
||||||
|
if self.columns is not None:
|
||||||
|
query = query.select(self.columns)
|
||||||
|
if self.filter is not None:
|
||||||
|
query = query.where(self.filter, prefilter=True)
|
||||||
|
return query.to_batches(batch_size=self.batch_size)
|
||||||
|
|
||||||
|
|
||||||
|
class PyarrowDatasetAdapter(pa.dataset.Dataset):
|
||||||
|
def __init__(self, table: Table):
|
||||||
|
self.table = table
|
||||||
|
|
||||||
|
def count_rows(self, filter: Optional[Filter] = None):
|
||||||
|
return self.table.count_rows(filter)
|
||||||
|
|
||||||
|
def get_fragments(self, filter: Optional[Filter] = None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def head(
|
||||||
|
self,
|
||||||
|
num_rows: int,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).head(num_rows)
|
||||||
|
|
||||||
|
def join(
|
||||||
|
self,
|
||||||
|
right_dataset: Any,
|
||||||
|
keys: Keys,
|
||||||
|
right_keys: Optional[Keys] = None,
|
||||||
|
join_type: Optional[JoinType] = None,
|
||||||
|
left_suffix: Optional[str] = None,
|
||||||
|
right_suffix: Optional[str] = None,
|
||||||
|
coalesce_keys: bool = True,
|
||||||
|
use_threads: bool = True,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def join_asof(
|
||||||
|
self,
|
||||||
|
right_dataset: Any,
|
||||||
|
on: str,
|
||||||
|
by: Keys,
|
||||||
|
tolerance: int,
|
||||||
|
right_on: Optional[str] = None,
|
||||||
|
right_by: Optional[Keys] = None,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def partition_expression(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def replace_schema(self, schema: pa.Schema):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def scanner(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return PyarrowScannerAdapter(
|
||||||
|
self.table,
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def schema(self):
|
||||||
|
return self.table.schema
|
||||||
|
|
||||||
|
def sort_by(self, sorting: Union[str, List[Tuple[str, bool]]]):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def take(
|
||||||
|
self,
|
||||||
|
indices: List[int],
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_batches(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).to_batches()
|
||||||
|
|
||||||
|
def to_table(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).to_table()
|
||||||
@@ -1,15 +1,5 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -26,6 +16,7 @@ from typing import (
|
|||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import deprecation
|
import deprecation
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -44,6 +35,8 @@ if TYPE_CHECKING:
|
|||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
from ._lancedb import Query as LanceQuery
|
from ._lancedb import Query as LanceQuery
|
||||||
|
from ._lancedb import FTSQuery as LanceFTSQuery
|
||||||
|
from ._lancedb import HybridQuery as LanceHybridQuery
|
||||||
from ._lancedb import VectorQuery as LanceVectorQuery
|
from ._lancedb import VectorQuery as LanceVectorQuery
|
||||||
from .common import VEC
|
from .common import VEC
|
||||||
from .pydantic import LanceModel
|
from .pydantic import LanceModel
|
||||||
@@ -325,6 +318,14 @@ class LanceQueryBuilder(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.Table:
|
||||||
|
"""
|
||||||
|
Execute the query and return the results as a pyarrow
|
||||||
|
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def to_list(self) -> List[dict]:
|
def to_list(self) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a list of dictionaries.
|
Execute the query and return the results as a list of dictionaries.
|
||||||
@@ -869,6 +870,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
check_reranker_result(results)
|
check_reranker_result(results)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None):
|
||||||
|
raise NotImplementedError("to_batches on an FTS query")
|
||||||
|
|
||||||
def tantivy_to_arrow(self) -> pa.Table:
|
def tantivy_to_arrow(self) -> pa.Table:
|
||||||
try:
|
try:
|
||||||
import tantivy
|
import tantivy
|
||||||
@@ -971,6 +975,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self) -> pa.Table:
|
||||||
|
return self.to_batches().read_all()
|
||||||
|
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
||||||
query = Query(
|
query = Query(
|
||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
filter=self._where,
|
filter=self._where,
|
||||||
@@ -980,7 +987,7 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
|||||||
# not actually respected in remote query
|
# not actually respected in remote query
|
||||||
offset=self._offset or 0,
|
offset=self._offset or 0,
|
||||||
)
|
)
|
||||||
return self._table._execute_query(query).read_all()
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
||||||
"""Rerank the results using the specified reranker.
|
"""Rerank the results using the specified reranker.
|
||||||
@@ -1110,32 +1117,55 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
fts_results = fts_future.result()
|
fts_results = fts_future.result()
|
||||||
vector_results = vector_future.result()
|
vector_results = vector_future.result()
|
||||||
|
|
||||||
# convert to ranks first if needed
|
return self._combine_hybrid_results(
|
||||||
if self._norm == "rank":
|
fts_results=fts_results,
|
||||||
vector_results = self._rank(vector_results, "_distance")
|
vector_results=vector_results,
|
||||||
fts_results = self._rank(fts_results, "_score")
|
norm=self._norm,
|
||||||
|
fts_query=self._fts_query._query,
|
||||||
|
reranker=self._reranker,
|
||||||
|
limit=self._limit,
|
||||||
|
with_row_ids=self._with_row_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _combine_hybrid_results(
|
||||||
|
fts_results: pa.Table,
|
||||||
|
vector_results: pa.Table,
|
||||||
|
norm: str,
|
||||||
|
fts_query: str,
|
||||||
|
reranker,
|
||||||
|
limit: int,
|
||||||
|
with_row_ids: bool,
|
||||||
|
) -> pa.Table:
|
||||||
|
if norm == "rank":
|
||||||
|
vector_results = LanceHybridQueryBuilder._rank(vector_results, "_distance")
|
||||||
|
fts_results = LanceHybridQueryBuilder._rank(fts_results, "_score")
|
||||||
|
|
||||||
# normalize the scores to be between 0 and 1, 0 being most relevant
|
# normalize the scores to be between 0 and 1, 0 being most relevant
|
||||||
vector_results = self._normalize_scores(vector_results, "_distance")
|
vector_results = LanceHybridQueryBuilder._normalize_scores(
|
||||||
|
vector_results, "_distance"
|
||||||
|
)
|
||||||
|
|
||||||
# In fts higher scores represent relevance. Not inverting them here as
|
# In fts higher scores represent relevance. Not inverting them here as
|
||||||
# rerankers might need to preserve this score to support `return_score="all"`
|
# rerankers might need to preserve this score to support `return_score="all"`
|
||||||
fts_results = self._normalize_scores(fts_results, "_score")
|
fts_results = LanceHybridQueryBuilder._normalize_scores(fts_results, "_score")
|
||||||
|
|
||||||
results = self._reranker.rerank_hybrid(
|
results = reranker.rerank_hybrid(fts_query, vector_results, fts_results)
|
||||||
self._fts_query._query, vector_results, fts_results
|
|
||||||
)
|
|
||||||
|
|
||||||
check_reranker_result(results)
|
check_reranker_result(results)
|
||||||
|
|
||||||
# apply limit after reranking
|
results = results.slice(length=limit)
|
||||||
results = results.slice(length=self._limit)
|
|
||||||
|
|
||||||
if not self._with_row_id:
|
if not with_row_ids:
|
||||||
results = results.drop(["_rowid"])
|
results = results.drop(["_rowid"])
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _rank(self, results: pa.Table, column: str, ascending: bool = True):
|
def to_batches(self):
|
||||||
|
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _rank(results: pa.Table, column: str, ascending: bool = True):
|
||||||
if len(results) == 0:
|
if len(results) == 0:
|
||||||
return results
|
return results
|
||||||
# Get the _score column from results
|
# Get the _score column from results
|
||||||
@@ -1152,7 +1182,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
)
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _normalize_scores(self, results: pa.Table, column: str, invert=False):
|
@staticmethod
|
||||||
|
def _normalize_scores(results: pa.Table, column: str, invert=False):
|
||||||
if len(results) == 0:
|
if len(results) == 0:
|
||||||
return results
|
return results
|
||||||
# Get the _score column from results
|
# Get the _score column from results
|
||||||
@@ -1603,7 +1634,7 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
if (
|
if (
|
||||||
isinstance(query_vector, list)
|
isinstance(query_vector, list)
|
||||||
and len(query_vector) > 0
|
and len(query_vector) > 0
|
||||||
and not isinstance(query_vector[0], (float, int))
|
and isinstance(query_vector[0], (list, np.ndarray, pa.Array))
|
||||||
):
|
):
|
||||||
# multiple have been passed
|
# multiple have been passed
|
||||||
query_vectors = [AsyncQuery._query_vec_to_array(v) for v in query_vector]
|
query_vectors = [AsyncQuery._query_vec_to_array(v) for v in query_vector]
|
||||||
@@ -1618,7 +1649,7 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
|
|
||||||
def nearest_to_text(
|
def nearest_to_text(
|
||||||
self, query: str, columns: Union[str, List[str]] = []
|
self, query: str, columns: Union[str, List[str]] = []
|
||||||
) -> AsyncQuery:
|
) -> AsyncFTSQuery:
|
||||||
"""
|
"""
|
||||||
Find the documents that are most relevant to the given text query.
|
Find the documents that are most relevant to the given text query.
|
||||||
|
|
||||||
@@ -1641,8 +1672,90 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
"""
|
"""
|
||||||
if isinstance(columns, str):
|
if isinstance(columns, str):
|
||||||
columns = [columns]
|
columns = [columns]
|
||||||
self._inner.nearest_to_text({"query": query, "columns": columns})
|
return AsyncFTSQuery(
|
||||||
return self
|
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncFTSQuery(AsyncQueryBase):
|
||||||
|
"""A query for full text search for LanceDB."""
|
||||||
|
|
||||||
|
def __init__(self, inner: LanceFTSQuery):
|
||||||
|
super().__init__(inner)
|
||||||
|
self._inner = inner
|
||||||
|
|
||||||
|
def get_query(self):
|
||||||
|
self._inner.get_query()
|
||||||
|
|
||||||
|
def nearest_to(
|
||||||
|
self,
|
||||||
|
query_vector: Union[VEC, Tuple, List[VEC]],
|
||||||
|
) -> AsyncHybridQuery:
|
||||||
|
"""
|
||||||
|
In addition doing text search on the LanceDB Table, also
|
||||||
|
find the nearest vectors to the given query vector.
|
||||||
|
|
||||||
|
This converts the query from a FTS Query to a Hybrid query. Results
|
||||||
|
from the vector search will be combined with results from the FTS query.
|
||||||
|
|
||||||
|
This method will attempt to convert the input to the query vector
|
||||||
|
expected by the embedding model. If the input cannot be converted
|
||||||
|
then an error will be thrown.
|
||||||
|
|
||||||
|
By default, there is no embedding model, and the input should be
|
||||||
|
something that can be converted to a pyarrow array of floats. This
|
||||||
|
includes lists, numpy arrays, and tuples.
|
||||||
|
|
||||||
|
If there is only one vector column (a column whose data type is a
|
||||||
|
fixed size list of floats) then the column does not need to be specified.
|
||||||
|
If there is more than one vector column you must use
|
||||||
|
[AsyncVectorQuery.column][lancedb.query.AsyncVectorQuery.column] to specify
|
||||||
|
which column you would like to compare with.
|
||||||
|
|
||||||
|
If no index has been created on the vector column then a vector query
|
||||||
|
will perform a distance comparison between the query vector and every
|
||||||
|
vector in the database and then sort the results. This is sometimes
|
||||||
|
called a "flat search"
|
||||||
|
|
||||||
|
For small databases, with tens of thousands of vectors or less, this can
|
||||||
|
be reasonably fast. In larger databases you should create a vector index
|
||||||
|
on the column. If there is a vector index then an "approximate" nearest
|
||||||
|
neighbor search (frequently called an ANN search) will be performed. This
|
||||||
|
search is much faster, but the results will be approximate.
|
||||||
|
|
||||||
|
The query can be further parameterized using the returned builder. There
|
||||||
|
are various ANN search parameters that will let you fine tune your recall
|
||||||
|
accuracy vs search latency.
|
||||||
|
|
||||||
|
Hybrid searches always have a [limit][]. If `limit` has not been called then
|
||||||
|
a default `limit` of 10 will be used.
|
||||||
|
|
||||||
|
Typically, a single vector is passed in as the query. However, you can also
|
||||||
|
pass in multiple vectors. This can be useful if you want to find the nearest
|
||||||
|
vectors to multiple query vectors. This is not expected to be faster than
|
||||||
|
making multiple queries concurrently; it is just a convenience method.
|
||||||
|
If multiple vectors are passed in then an additional column `query_index`
|
||||||
|
will be added to the results. This column will contain the index of the
|
||||||
|
query vector that the result is nearest to.
|
||||||
|
"""
|
||||||
|
if query_vector is None:
|
||||||
|
raise ValueError("query_vector can not be None")
|
||||||
|
|
||||||
|
if (
|
||||||
|
isinstance(query_vector, list)
|
||||||
|
and len(query_vector) > 0
|
||||||
|
and not isinstance(query_vector[0], (float, int))
|
||||||
|
):
|
||||||
|
# multiple have been passed
|
||||||
|
query_vectors = [AsyncQuery._query_vec_to_array(v) for v in query_vector]
|
||||||
|
new_self = self._inner.nearest_to(query_vectors[0])
|
||||||
|
for v in query_vectors[1:]:
|
||||||
|
new_self.add_query_vector(v)
|
||||||
|
return AsyncHybridQuery(new_self)
|
||||||
|
else:
|
||||||
|
return AsyncHybridQuery(
|
||||||
|
self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AsyncVectorQuery(AsyncQueryBase):
|
class AsyncVectorQuery(AsyncQueryBase):
|
||||||
@@ -1779,3 +1892,160 @@ class AsyncVectorQuery(AsyncQueryBase):
|
|||||||
"""
|
"""
|
||||||
self._inner.bypass_vector_index()
|
self._inner.bypass_vector_index()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def nearest_to_text(
|
||||||
|
self, query: str, columns: Union[str, List[str]] = []
|
||||||
|
) -> AsyncHybridQuery:
|
||||||
|
"""
|
||||||
|
Find the documents that are most relevant to the given text query,
|
||||||
|
in addition to vector search.
|
||||||
|
|
||||||
|
This converts the vector query into a hybrid query.
|
||||||
|
|
||||||
|
This search will perform a full text search on the table and return
|
||||||
|
the most relevant documents, combined with the vector query results.
|
||||||
|
The text relevance is determined by BM25.
|
||||||
|
|
||||||
|
The columns to search must be with native FTS index
|
||||||
|
(Tantivy-based can't work with this method).
|
||||||
|
|
||||||
|
By default, all indexed columns are searched,
|
||||||
|
now only one column can be searched at a time.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query: str
|
||||||
|
The text query to search for.
|
||||||
|
columns: str or list of str, default None
|
||||||
|
The columns to search in. If None, all indexed columns are searched.
|
||||||
|
For now only one column can be searched at a time.
|
||||||
|
"""
|
||||||
|
if isinstance(columns, str):
|
||||||
|
columns = [columns]
|
||||||
|
return AsyncHybridQuery(
|
||||||
|
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncHybridQuery(AsyncQueryBase):
|
||||||
|
"""
|
||||||
|
A query builder that performs hybrid vector and full text search.
|
||||||
|
Results are combined and reranked based on the specified reranker.
|
||||||
|
By default, the results are reranked using the RRFReranker, which
|
||||||
|
uses reciprocal rank fusion score for reranking.
|
||||||
|
|
||||||
|
To make the vector and fts results comparable, the scores are normalized.
|
||||||
|
Instead of normalizing scores, the `normalize` parameter can be set to "rank"
|
||||||
|
in the `rerank` method to convert the scores to ranks and then normalize them.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, inner: LanceHybridQuery):
|
||||||
|
super().__init__(inner)
|
||||||
|
self._inner = inner
|
||||||
|
self._norm = "score"
|
||||||
|
self._reranker = RRFReranker()
|
||||||
|
|
||||||
|
def rerank(
|
||||||
|
self, reranker: Reranker = RRFReranker(), normalize: str = "score"
|
||||||
|
) -> AsyncHybridQuery:
|
||||||
|
"""
|
||||||
|
Rerank the hybrid search results using the specified reranker. The reranker
|
||||||
|
must be an instance of Reranker class.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
reranker: Reranker, default RRFReranker()
|
||||||
|
The reranker to use. Must be an instance of Reranker class.
|
||||||
|
normalize: str, default "score"
|
||||||
|
The method to normalize the scores. Can be "rank" or "score". If "rank",
|
||||||
|
the scores are converted to ranks and then normalized. If "score", the
|
||||||
|
scores are normalized directly.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
AsyncHybridQuery
|
||||||
|
The AsyncHybridQuery object.
|
||||||
|
"""
|
||||||
|
if normalize not in ["rank", "score"]:
|
||||||
|
raise ValueError("normalize must be 'rank' or 'score'.")
|
||||||
|
if reranker and not isinstance(reranker, Reranker):
|
||||||
|
raise ValueError("reranker must be an instance of Reranker class.")
|
||||||
|
|
||||||
|
self._norm = normalize
|
||||||
|
self._reranker = reranker
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def to_batches(self):
|
||||||
|
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
||||||
|
|
||||||
|
async def to_arrow(self) -> pa.Table:
|
||||||
|
fts_query = AsyncFTSQuery(self._inner.to_fts_query())
|
||||||
|
vec_query = AsyncVectorQuery(self._inner.to_vector_query())
|
||||||
|
|
||||||
|
# save the row ID choice that was made on the query builder and force it
|
||||||
|
# to actually fetch the row ids because we need this for reranking
|
||||||
|
with_row_ids = self._inner.get_with_row_id()
|
||||||
|
fts_query.with_row_id()
|
||||||
|
vec_query.with_row_id()
|
||||||
|
|
||||||
|
fts_results, vector_results = await asyncio.gather(
|
||||||
|
fts_query.to_arrow(),
|
||||||
|
vec_query.to_arrow(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return LanceHybridQueryBuilder._combine_hybrid_results(
|
||||||
|
fts_results=fts_results,
|
||||||
|
vector_results=vector_results,
|
||||||
|
norm=self._norm,
|
||||||
|
fts_query=fts_query.get_query(),
|
||||||
|
reranker=self._reranker,
|
||||||
|
limit=self._inner.get_limit(),
|
||||||
|
with_row_ids=with_row_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def explain_plan(self, verbose: Optional[bool] = False):
|
||||||
|
"""Return the execution plan for this query.
|
||||||
|
|
||||||
|
The output includes both the vector and FTS search plans.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import asyncio
|
||||||
|
>>> from lancedb import connect_async
|
||||||
|
>>> from lancedb.index import FTS
|
||||||
|
>>> async def doctest_example():
|
||||||
|
... conn = await connect_async("./.lancedb")
|
||||||
|
... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
|
||||||
|
... await table.create_index("text", config=FTS(with_position=False))
|
||||||
|
... query = [100, 100]
|
||||||
|
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
|
||||||
|
... print(plan)
|
||||||
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
|
Vector Search Plan:
|
||||||
|
ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
|
||||||
|
Take: columns="vector, _rowid, _distance, (text)"
|
||||||
|
CoalesceBatchesExec: target_batch_size=1024
|
||||||
|
GlobalLimitExec: skip=0, fetch=10
|
||||||
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
|
KNNVectorDistance: metric=l2
|
||||||
|
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||||
|
FTS Search Plan:
|
||||||
|
LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
verbose : bool, default False
|
||||||
|
Use a verbose output format.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plan
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
|
results = ["Vector Search Plan:"]
|
||||||
|
results.append(await self._inner.to_vector_query().explain_plan(verbose))
|
||||||
|
results.append("FTS Search Plan:")
|
||||||
|
results.append(await self._inner.to_fts_query().explain_plan(verbose))
|
||||||
|
|
||||||
|
return "\n".join(results)
|
||||||
|
|||||||
@@ -20,19 +20,16 @@ import warnings
|
|||||||
|
|
||||||
from lancedb import connect_async
|
from lancedb import connect_async
|
||||||
from lancedb.remote import ClientConfig
|
from lancedb.remote import ClientConfig
|
||||||
from lancedb.remote.background_loop import BackgroundEventLoop
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from overrides import override
|
from overrides import override
|
||||||
|
|
||||||
from ..common import DATA
|
from ..common import DATA
|
||||||
from ..db import DBConnection
|
from ..db import DBConnection, LOOP
|
||||||
from ..embeddings import EmbeddingFunctionConfig
|
from ..embeddings import EmbeddingFunctionConfig
|
||||||
from ..pydantic import LanceModel
|
from ..pydantic import LanceModel
|
||||||
from ..table import Table
|
from ..table import Table
|
||||||
from ..util import validate_table_name
|
from ..util import validate_table_name
|
||||||
|
|
||||||
LOOP = BackgroundEventLoop()
|
|
||||||
|
|
||||||
|
|
||||||
class RemoteDBConnection(DBConnection):
|
class RemoteDBConnection(DBConnection):
|
||||||
"""A connection to a remote LanceDB database."""
|
"""A connection to a remote LanceDB database."""
|
||||||
@@ -47,9 +44,9 @@ class RemoteDBConnection(DBConnection):
|
|||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
connection_timeout: Optional[float] = None,
|
connection_timeout: Optional[float] = None,
|
||||||
read_timeout: Optional[float] = None,
|
read_timeout: Optional[float] = None,
|
||||||
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
):
|
):
|
||||||
"""Connect to a remote LanceDB database."""
|
"""Connect to a remote LanceDB database."""
|
||||||
|
|
||||||
if isinstance(client_config, dict):
|
if isinstance(client_config, dict):
|
||||||
client_config = ClientConfig(**client_config)
|
client_config = ClientConfig(**client_config)
|
||||||
elif client_config is None:
|
elif client_config is None:
|
||||||
@@ -97,6 +94,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
region=region,
|
region=region,
|
||||||
host_override=host_override,
|
host_override=host_override,
|
||||||
client_config=client_config,
|
client_config=client_config,
|
||||||
|
storage_options=storage_options,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
def list_versions(self):
|
def list_versions(self):
|
||||||
"""List all versions of the table"""
|
"""List all versions of the table"""
|
||||||
return self._loop.run_until_complete(self._table.list_versions())
|
return LOOP.run(self._table.list_versions())
|
||||||
|
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self) -> pa.Table:
|
||||||
"""to_arrow() is not yet supported on LanceDB cloud."""
|
"""to_arrow() is not yet supported on LanceDB cloud."""
|
||||||
@@ -89,10 +89,10 @@ class RemoteTable(Table):
|
|||||||
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||||
|
|
||||||
def checkout(self, version):
|
def checkout(self, version):
|
||||||
return self._loop.run_until_complete(self._table.checkout(version))
|
return LOOP.run(self._table.checkout(version))
|
||||||
|
|
||||||
def checkout_latest(self):
|
def checkout_latest(self):
|
||||||
return self._loop.run_until_complete(self._table.checkout_latest())
|
return LOOP.run(self._table.checkout_latest())
|
||||||
|
|
||||||
def list_indices(self):
|
def list_indices(self):
|
||||||
"""List all the indices on the table"""
|
"""List all the indices on the table"""
|
||||||
@@ -138,8 +138,25 @@ class RemoteTable(Table):
|
|||||||
*,
|
*,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
with_position: bool = True,
|
with_position: bool = True,
|
||||||
|
# tokenizer configs:
|
||||||
|
base_tokenizer: str = "simple",
|
||||||
|
language: str = "English",
|
||||||
|
max_token_length: Optional[int] = 40,
|
||||||
|
lower_case: bool = True,
|
||||||
|
stem: bool = False,
|
||||||
|
remove_stop_words: bool = False,
|
||||||
|
ascii_folding: bool = False,
|
||||||
):
|
):
|
||||||
config = FTS(with_position=with_position)
|
config = FTS(
|
||||||
|
with_position=with_position,
|
||||||
|
base_tokenizer=base_tokenizer,
|
||||||
|
language=language,
|
||||||
|
max_token_length=max_token_length,
|
||||||
|
lower_case=lower_case,
|
||||||
|
stem=stem,
|
||||||
|
remove_stop_words=remove_stop_words,
|
||||||
|
ascii_folding=ascii_folding,
|
||||||
|
)
|
||||||
LOOP.run(self._table.create_index(column, config=config, replace=replace))
|
LOOP.run(self._table.create_index(column, config=config, replace=replace))
|
||||||
|
|
||||||
def create_index(
|
def create_index(
|
||||||
@@ -490,19 +507,13 @@ class RemoteTable(Table):
|
|||||||
return LOOP.run(self._table.count_rows(filter))
|
return LOOP.run(self._table.count_rows(filter))
|
||||||
|
|
||||||
def add_columns(self, transforms: Dict[str, str]):
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.add_columns(transforms))
|
||||||
"add_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.alter_columns(*alterations))
|
||||||
"alter_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
def drop_columns(self, columns: Iterable[str]):
|
def drop_columns(self, columns: Iterable[str]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.drop_columns(columns))
|
||||||
"drop_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
||||||
|
|||||||
@@ -413,6 +413,8 @@ class Table(ABC):
|
|||||||
replace: bool = True,
|
replace: bool = True,
|
||||||
accelerator: Optional[str] = None,
|
accelerator: Optional[str] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
|
*,
|
||||||
|
num_bits: int = 8,
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
|
|
||||||
@@ -439,6 +441,9 @@ class Table(ABC):
|
|||||||
Only support "cuda" for now.
|
Only support "cuda" for now.
|
||||||
index_cache_size : int, optional
|
index_cache_size : int, optional
|
||||||
The size of the index cache in number of entries. Default value is 256.
|
The size of the index cache in number of entries. Default value is 256.
|
||||||
|
num_bits: int
|
||||||
|
The number of bits to encode sub-vectors. Only used with the IVF_PQ index.
|
||||||
|
Only 4 and 8 are supported.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -967,8 +972,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
transforms: Dict[str, str]
|
transforms: Dict[str, str]
|
||||||
@@ -978,20 +981,21 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
"""
|
"""
|
||||||
Alter column names and nullability.
|
Alter column names and nullability.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
alterations : Iterable[Dict[str, Any]]
|
alterations : Iterable[Dict[str, Any]]
|
||||||
A sequence of dictionaries, each with the following keys:
|
A sequence of dictionaries, each with the following keys:
|
||||||
- "path": str
|
- "path": str
|
||||||
The column path to alter. For a top-level column, this is the name.
|
The column path to alter. For a top-level column, this is the name.
|
||||||
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
||||||
- "name": str, optional
|
- "rename": str, optional
|
||||||
The new name of the column. If not specified, the column name is
|
The new name of the column. If not specified, the column name is
|
||||||
not changed.
|
not changed.
|
||||||
|
- "data_type": pyarrow.DataType, optional
|
||||||
|
The new data type of the column. Existing values will be casted
|
||||||
|
to this type. If not specified, the column data type is not changed.
|
||||||
- "nullable": bool, optional
|
- "nullable": bool, optional
|
||||||
Whether the column should be nullable. If not specified, the column
|
Whether the column should be nullable. If not specified, the column
|
||||||
nullability is not changed. Only non-nullable columns can be changed
|
nullability is not changed. Only non-nullable columns can be changed
|
||||||
@@ -1004,8 +1008,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
Drop columns from the table.
|
Drop columns from the table.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
columns : Iterable[str]
|
columns : Iterable[str]
|
||||||
@@ -1080,13 +1082,16 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
|
|||||||
index_cache_size: Optional[int] = None
|
index_cache_size: Optional[int] = None
|
||||||
read_consistency_interval: Optional[timedelta] = None
|
read_consistency_interval: Optional[timedelta] = None
|
||||||
last_consistency_check: Optional[float] = None
|
last_consistency_check: Optional[float] = None
|
||||||
|
storage_options: Optional[Dict[str, str]] = None
|
||||||
_dataset: Optional[LanceDataset] = None
|
_dataset: Optional[LanceDataset] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset(self) -> LanceDataset:
|
def dataset(self) -> LanceDataset:
|
||||||
if not self._dataset:
|
if not self._dataset:
|
||||||
self._dataset = lance.dataset(
|
self._dataset = lance.dataset(
|
||||||
self.uri, index_cache_size=self.index_cache_size
|
self.uri,
|
||||||
|
index_cache_size=self.index_cache_size,
|
||||||
|
storage_options=self.storage_options,
|
||||||
)
|
)
|
||||||
self.last_consistency_check = time.monotonic()
|
self.last_consistency_check = time.monotonic()
|
||||||
elif self.read_consistency_interval is not None:
|
elif self.read_consistency_interval is not None:
|
||||||
@@ -1117,13 +1122,17 @@ class _LanceTimeTravelRef(_LanceDatasetRef):
|
|||||||
uri: str
|
uri: str
|
||||||
version: int
|
version: int
|
||||||
index_cache_size: Optional[int] = None
|
index_cache_size: Optional[int] = None
|
||||||
|
storage_options: Optional[Dict[str, str]] = None
|
||||||
_dataset: Optional[LanceDataset] = None
|
_dataset: Optional[LanceDataset] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset(self) -> LanceDataset:
|
def dataset(self) -> LanceDataset:
|
||||||
if not self._dataset:
|
if not self._dataset:
|
||||||
self._dataset = lance.dataset(
|
self._dataset = lance.dataset(
|
||||||
self.uri, version=self.version, index_cache_size=self.index_cache_size
|
self.uri,
|
||||||
|
version=self.version,
|
||||||
|
index_cache_size=self.index_cache_size,
|
||||||
|
storage_options=self.storage_options,
|
||||||
)
|
)
|
||||||
return self._dataset
|
return self._dataset
|
||||||
|
|
||||||
@@ -1172,24 +1181,27 @@ class LanceTable(Table):
|
|||||||
uri=self._dataset_uri,
|
uri=self._dataset_uri,
|
||||||
version=version,
|
version=version,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
storage_options=connection.storage_options,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._ref = _LanceLatestDatasetRef(
|
self._ref = _LanceLatestDatasetRef(
|
||||||
uri=self._dataset_uri,
|
uri=self._dataset_uri,
|
||||||
read_consistency_interval=connection.read_consistency_interval,
|
read_consistency_interval=connection.read_consistency_interval,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
storage_options=connection.storage_options,
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def open(cls, db, name, **kwargs):
|
def open(cls, db, name, **kwargs):
|
||||||
tbl = cls(db, name, **kwargs)
|
tbl = cls(db, name, **kwargs)
|
||||||
fs, path = fs_from_uri(tbl._dataset_path)
|
|
||||||
file_info = fs.get_file_info(path)
|
# check the dataset exists
|
||||||
if file_info.type != pa.fs.FileType.Directory:
|
try:
|
||||||
raise FileNotFoundError(
|
tbl.version
|
||||||
f"Table {name} does not exist."
|
except ValueError as e:
|
||||||
f"Please first call db.create_table({name}, data)"
|
if "Not found:" in str(e):
|
||||||
)
|
raise FileNotFoundError(f"Table {name} does not exist")
|
||||||
|
raise e
|
||||||
|
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
@@ -1423,6 +1435,8 @@ class LanceTable(Table):
|
|||||||
accelerator: Optional[str] = None,
|
accelerator: Optional[str] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
index_type="IVF_PQ",
|
index_type="IVF_PQ",
|
||||||
|
*,
|
||||||
|
num_bits: int = 8,
|
||||||
):
|
):
|
||||||
"""Create an index on the table."""
|
"""Create an index on the table."""
|
||||||
self._dataset_mut.create_index(
|
self._dataset_mut.create_index(
|
||||||
@@ -1434,6 +1448,7 @@ class LanceTable(Table):
|
|||||||
replace=replace,
|
replace=replace,
|
||||||
accelerator=accelerator,
|
accelerator=accelerator,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
num_bits=num_bits,
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
@@ -1617,11 +1632,7 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
)
|
)
|
||||||
# Access the dataset_mut property to ensure that the dataset is mutable.
|
self._ref.dataset_mut.insert(data, mode=mode, schema=self.schema)
|
||||||
self._ref.dataset_mut
|
|
||||||
self._ref.dataset = lance.write_dataset(
|
|
||||||
data, self._dataset_uri, schema=self.schema, mode=mode
|
|
||||||
)
|
|
||||||
|
|
||||||
def merge(
|
def merge(
|
||||||
self,
|
self,
|
||||||
@@ -1905,7 +1916,13 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
empty = pa.Table.from_batches([], schema=schema)
|
empty = pa.Table.from_batches([], schema=schema)
|
||||||
try:
|
try:
|
||||||
lance.write_dataset(empty, tbl._dataset_uri, schema=schema, mode=mode)
|
lance.write_dataset(
|
||||||
|
empty,
|
||||||
|
tbl._dataset_uri,
|
||||||
|
schema=schema,
|
||||||
|
mode=mode,
|
||||||
|
storage_options=db.storage_options,
|
||||||
|
)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
if "Dataset already exists" in str(err) and exist_ok:
|
if "Dataset already exists" in str(err) and exist_ok:
|
||||||
if tbl.schema != schema:
|
if tbl.schema != schema:
|
||||||
@@ -2923,6 +2940,53 @@ class AsyncTable:
|
|||||||
|
|
||||||
return await self._inner.update(updates_sql, where)
|
return await self._inner.update(updates_sql, where)
|
||||||
|
|
||||||
|
async def add_columns(self, transforms: Dict[str, str]):
|
||||||
|
"""
|
||||||
|
Add new columns with defined values.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
transforms: Dict[str, str]
|
||||||
|
A map of column name to a SQL expression to use to calculate the
|
||||||
|
value of the new column. These expressions will be evaluated for
|
||||||
|
each row in the table, and can reference existing columns.
|
||||||
|
"""
|
||||||
|
await self._inner.add_columns(list(transforms.items()))
|
||||||
|
|
||||||
|
async def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
|
"""
|
||||||
|
Alter column names and nullability.
|
||||||
|
|
||||||
|
alterations : Iterable[Dict[str, Any]]
|
||||||
|
A sequence of dictionaries, each with the following keys:
|
||||||
|
- "path": str
|
||||||
|
The column path to alter. For a top-level column, this is the name.
|
||||||
|
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
||||||
|
- "rename": str, optional
|
||||||
|
The new name of the column. If not specified, the column name is
|
||||||
|
not changed.
|
||||||
|
- "data_type": pyarrow.DataType, optional
|
||||||
|
The new data type of the column. Existing values will be casted
|
||||||
|
to this type. If not specified, the column data type is not changed.
|
||||||
|
- "nullable": bool, optional
|
||||||
|
Whether the column should be nullable. If not specified, the column
|
||||||
|
nullability is not changed. Only non-nullable columns can be changed
|
||||||
|
to nullable. Currently, you cannot change a nullable column to
|
||||||
|
non-nullable.
|
||||||
|
"""
|
||||||
|
await self._inner.alter_columns(alterations)
|
||||||
|
|
||||||
|
async def drop_columns(self, columns: Iterable[str]):
|
||||||
|
"""
|
||||||
|
Drop columns from the table.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns : Iterable[str]
|
||||||
|
The names of the columns to drop.
|
||||||
|
"""
|
||||||
|
await self._inner.drop_columns(columns)
|
||||||
|
|
||||||
async def version(self) -> int:
|
async def version(self) -> int:
|
||||||
"""
|
"""
|
||||||
Retrieve the version of the table
|
Retrieve the version of the table
|
||||||
|
|||||||
21
python/python/tests/test_duckdb.py
Normal file
21
python/python/tests/test_duckdb.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import duckdb
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic_query(tmp_path):
|
||||||
|
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||||
|
conn = lancedb.connect(tmp_path)
|
||||||
|
tbl = conn.create_table("test", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl) # noqa: F841
|
||||||
|
|
||||||
|
duck_conn = duckdb.connect()
|
||||||
|
|
||||||
|
results = duck_conn.sql("SELECT SUM(x) FROM adapter").fetchall()
|
||||||
|
assert results[0][0] == 10
|
||||||
|
|
||||||
|
results = duck_conn.sql("SELECT SUM(y) FROM adapter").fetchall()
|
||||||
|
assert results[0][0] == 26
|
||||||
111
python/python/tests/test_hybrid_query.py
Normal file
111
python/python/tests/test_hybrid_query.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
|
||||||
|
from lancedb.index import FTS
|
||||||
|
from lancedb.table import AsyncTable
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def table(tmpdir_factory) -> AsyncTable:
|
||||||
|
tmp_path = str(tmpdir_factory.mktemp("data"))
|
||||||
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"text": pa.array(["a", "b", "cat", "dog"]),
|
||||||
|
"vector": pa.array(
|
||||||
|
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
|
||||||
|
type=pa.list_(pa.float32(), list_size=2),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = await db.create_table("test", data)
|
||||||
|
await table.create_index("text", config=FTS(with_position=False))
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_hybrid_query(table: AsyncTable):
|
||||||
|
result = await (
|
||||||
|
table.query().nearest_to([0.0, 0.4]).nearest_to_text("dog").limit(2).to_arrow()
|
||||||
|
)
|
||||||
|
assert len(result) == 2
|
||||||
|
# ensure we get results that would match well for text and vector
|
||||||
|
assert result["text"].to_pylist() == ["a", "dog"]
|
||||||
|
|
||||||
|
# ensure there is no rowid by default
|
||||||
|
assert "_rowid" not in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_hybrid_query_with_row_ids(table: AsyncTable):
|
||||||
|
result = await (
|
||||||
|
table.query()
|
||||||
|
.nearest_to([0.0, 0.4])
|
||||||
|
.nearest_to_text("dog")
|
||||||
|
.limit(2)
|
||||||
|
.with_row_id()
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
assert len(result) == 2
|
||||||
|
# ensure we get results that would match well for text and vector
|
||||||
|
assert result["text"].to_pylist() == ["a", "dog"]
|
||||||
|
assert result["_rowid"].to_pylist() == [0, 3]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_hybrid_query_filters(table: AsyncTable):
|
||||||
|
# test that query params are passed down from the regular builder to
|
||||||
|
# child vector/fts builders
|
||||||
|
result = await (
|
||||||
|
table.query()
|
||||||
|
.where("text not in ('a', 'dog')")
|
||||||
|
.nearest_to([0.3, 0.3])
|
||||||
|
.nearest_to_text("*a*")
|
||||||
|
.limit(2)
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
assert len(result) == 2
|
||||||
|
# ensure we get results that would match well for text and vector
|
||||||
|
assert result["text"].to_pylist() == ["cat", "b"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_hybrid_query_default_limit(table: AsyncTable):
|
||||||
|
# add 10 new rows
|
||||||
|
new_rows = []
|
||||||
|
for i in range(100):
|
||||||
|
if i < 2:
|
||||||
|
new_rows.append({"text": "close_vec", "vector": [0.1, 0.1]})
|
||||||
|
else:
|
||||||
|
new_rows.append({"text": "far_vec", "vector": [5 * i, 5 * i]})
|
||||||
|
await table.add(new_rows)
|
||||||
|
result = await (
|
||||||
|
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).to_arrow()
|
||||||
|
)
|
||||||
|
|
||||||
|
# assert we got the default limit of 10
|
||||||
|
assert len(result) == 10
|
||||||
|
|
||||||
|
# assert we got the closest vectors and the text searched for
|
||||||
|
texts = result["text"].to_pylist()
|
||||||
|
assert texts.count("close_vec") == 2
|
||||||
|
assert texts.count("dog") == 1
|
||||||
|
assert texts.count("a") == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_explain_plan(table: AsyncTable):
|
||||||
|
plan = await (
|
||||||
|
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).explain_plan(True)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "Vector Search Plan" in plan
|
||||||
|
assert "KNNVectorDistance" in plan
|
||||||
|
assert "FTS Search Plan" in plan
|
||||||
|
assert "LanceScan" in plan
|
||||||
@@ -108,6 +108,29 @@ async def test_create_vector_index(some_table: AsyncTable):
|
|||||||
assert stats.num_indices == 1
|
assert stats.num_indices == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
|
||||||
|
# Can create
|
||||||
|
await some_table.create_index("vector", config=IvfPq(num_bits=4))
|
||||||
|
# Can recreate if replace=True
|
||||||
|
await some_table.create_index("vector", config=IvfPq(num_bits=4), replace=True)
|
||||||
|
# Can't recreate if replace=False
|
||||||
|
with pytest.raises(RuntimeError, match="already exists"):
|
||||||
|
await some_table.create_index("vector", replace=False)
|
||||||
|
indices = await some_table.list_indices()
|
||||||
|
assert len(indices) == 1
|
||||||
|
assert indices[0].index_type == "IvfPq"
|
||||||
|
assert indices[0].columns == ["vector"]
|
||||||
|
assert indices[0].name == "vector_idx"
|
||||||
|
|
||||||
|
stats = await some_table.index_stats("vector_idx")
|
||||||
|
assert stats.index_type == "IVF_PQ"
|
||||||
|
assert stats.distance_type == "l2"
|
||||||
|
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||||
|
assert stats.num_unindexed_rows == 0
|
||||||
|
assert stats.num_indices == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_create_hnswpq_index(some_table: AsyncTable):
|
async def test_create_hnswpq_index(some_table: AsyncTable):
|
||||||
await some_table.create_index("vector", config=HnswPq(num_partitions=10))
|
await some_table.create_index("vector", config=HnswPq(num_partitions=10))
|
||||||
|
|||||||
47
python/python/tests/test_pyarrow.py
Normal file
47
python/python/tests/test_pyarrow.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||||
|
|
||||||
|
|
||||||
|
def test_dataset_adapter(tmp_path):
|
||||||
|
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||||
|
conn = lancedb.connect(tmp_path)
|
||||||
|
tbl = conn.create_table("test", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 4
|
||||||
|
assert adapter.count_rows("x > 2") == 2
|
||||||
|
assert adapter.schema == data.schema
|
||||||
|
assert adapter.head(2) == data.slice(0, 2)
|
||||||
|
assert adapter.to_table() == data
|
||||||
|
assert adapter.to_batches().read_all() == data
|
||||||
|
assert adapter.scanner().to_table() == data
|
||||||
|
assert adapter.scanner().to_batches().read_all() == data
|
||||||
|
|
||||||
|
assert adapter.scanner().projected_schema == data.schema
|
||||||
|
assert adapter.scanner(columns=["x"]).projected_schema == pa.schema(
|
||||||
|
[data.schema.field("x")]
|
||||||
|
)
|
||||||
|
assert adapter.scanner(columns=["x"]).to_table() == pa.table({"x": [1, 2, 3, 4]})
|
||||||
|
|
||||||
|
# Make sure we bypass the limit
|
||||||
|
data = pa.table({"x": range(100)})
|
||||||
|
tbl = conn.create_table("test2", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 100
|
||||||
|
assert adapter.to_table().num_rows == 100
|
||||||
|
assert adapter.head(10).num_rows == 10
|
||||||
|
|
||||||
|
# Empty table
|
||||||
|
tbl = conn.create_table("test3", None, schema=pa.schema({"x": pa.int64()}))
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 0
|
||||||
|
assert adapter.to_table().num_rows == 0
|
||||||
|
assert adapter.head(10).num_rows == 0
|
||||||
|
|
||||||
|
assert adapter.scanner().projected_schema == pa.schema({"x": pa.int64()})
|
||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import unittest.mock as mock
|
import unittest.mock as mock
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
from lancedb.index import IvfPq
|
from lancedb.index import IvfPq
|
||||||
@@ -384,3 +385,19 @@ async def test_query_to_list_async(table_async: AsyncTable):
|
|||||||
assert len(list) == 2
|
assert len(list) == 2
|
||||||
assert list[0]["vector"] == [1, 2]
|
assert list[0]["vector"] == [1, 2]
|
||||||
assert list[1]["vector"] == [3, 4]
|
assert list[1]["vector"] == [3, 4]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_query_with_f16(tmp_path: Path):
|
||||||
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
f16_arr = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float16)
|
||||||
|
|
||||||
|
df = pa.table(
|
||||||
|
{
|
||||||
|
"vector": pa.FixedSizeListArray.from_arrays(f16_arr, 2),
|
||||||
|
"id": pa.array([1, 2]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
tbl = await db.create_table("test", df)
|
||||||
|
results = await tbl.vector_search([np.float16(1), np.float16(2)]).to_pandas()
|
||||||
|
assert len(results) == 2
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user