mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
Compare commits
24 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe655a15f0 | ||
|
|
9d0af794d0 | ||
|
|
048a2d10f8 | ||
|
|
c78a9849b4 | ||
|
|
c663085203 | ||
|
|
8b628854d5 | ||
|
|
a8d8c17b2a | ||
|
|
3c487e5fc7 | ||
|
|
d6219d687c | ||
|
|
239f725b32 | ||
|
|
5f261cf2d8 | ||
|
|
79eaa52184 | ||
|
|
bd82e1f66d | ||
|
|
ba34c3bee1 | ||
|
|
d4d0873e2b | ||
|
|
12c7bd18a5 | ||
|
|
c6bf6a25d6 | ||
|
|
c998a47e17 | ||
|
|
d8c758513c | ||
|
|
3795e02ee3 | ||
|
|
c7d424b2f3 | ||
|
|
1efb9914ee | ||
|
|
83e26a231e | ||
|
|
72a17b2de4 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.14.0-beta.0"
|
current_version = "0.14.0-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
4
.github/workflows/docs.yml
vendored
4
.github/workflows/docs.yml
vendored
@@ -72,9 +72,9 @@ jobs:
|
|||||||
- name: Setup Pages
|
- name: Setup Pages
|
||||||
uses: actions/configure-pages@v2
|
uses: actions/configure-pages@v2
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
uses: actions/upload-pages-artifact@v1
|
uses: actions/upload-pages-artifact@v3
|
||||||
with:
|
with:
|
||||||
path: "docs/site"
|
path: "docs/site"
|
||||||
- name: Deploy to GitHub Pages
|
- name: Deploy to GitHub Pages
|
||||||
id: deployment
|
id: deployment
|
||||||
uses: actions/deploy-pages@v1
|
uses: actions/deploy-pages@v4
|
||||||
|
|||||||
304
.github/workflows/npm-publish.yml
vendored
304
.github/workflows/npm-publish.yml
vendored
@@ -143,7 +143,7 @@ jobs:
|
|||||||
|
|
||||||
node-linux-musl:
|
node-linux-musl:
|
||||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ubuntu-latest
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -152,10 +152,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -249,7 +246,7 @@ jobs:
|
|||||||
|
|
||||||
nodejs-linux-musl:
|
nodejs-linux-musl:
|
||||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ubuntu-latest
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -258,10 +255,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -340,109 +334,50 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
node/dist/lancedb-vectordb-win32*.tgz
|
node/dist/lancedb-vectordb-win32*.tgz
|
||||||
|
|
||||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
node-windows-arm64:
|
||||||
# node-windows-arm64:
|
name: vectordb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# name: vectordb win32-arm64-msvc
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
# runs-on: windows-4x-arm
|
runs-on: ubuntu-latest
|
||||||
# if: startsWith(github.ref, 'refs/tags/v')
|
container: alpine:edge
|
||||||
# steps:
|
strategy:
|
||||||
# - uses: actions/checkout@v4
|
fail-fast: false
|
||||||
# - name: Install Git
|
matrix:
|
||||||
# run: |
|
config:
|
||||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
# - arch: x86_64
|
||||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
- arch: aarch64
|
||||||
# shell: powershell
|
steps:
|
||||||
# - name: Add Git to PATH
|
- name: Checkout
|
||||||
# run: |
|
uses: actions/checkout@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
- name: Install dependencies
|
||||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
run: |
|
||||||
# shell: powershell
|
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||||
# - name: Configure Git symlinks
|
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||||
# run: git config --global core.symlinks true
|
echo "source $HOME/.cargo/env" >> saved_env
|
||||||
# - uses: actions/checkout@v4
|
echo "export CC=clang" >> saved_env
|
||||||
# - uses: actions/setup-python@v5
|
echo "export AR=llvm-ar" >> saved_env
|
||||||
# with:
|
source "$HOME/.cargo/env"
|
||||||
# python-version: "3.13"
|
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||||
# - name: Install Visual Studio Build Tools
|
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||||
# run: |
|
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
- name: Configure x86_64 build
|
||||||
# "--installPath", "C:\BuildTools", `
|
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
- name: Configure aarch64 build
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||||
# shell: powershell
|
- name: Build Windows Artifacts
|
||||||
# - name: Add Visual Studio Build Tools to PATH
|
run: |
|
||||||
# run: |
|
source ./saved_env
|
||||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
- name: Upload Windows Artifacts
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
uses: actions/upload-artifact@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
with:
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
name: node-native-windows-${{ matrix.config.arch }}
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
path: |
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
node/dist/lancedb-vectordb-win32*.tgz
|
||||||
|
|
||||||
# # Add MSVC runtime libraries to LIB
|
|
||||||
# $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
|
||||||
# Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
|
|
||||||
|
|
||||||
# # Add INCLUDE paths
|
|
||||||
# $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
|
|
||||||
# "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
|
|
||||||
# Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Rust
|
|
||||||
# run: |
|
|
||||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
|
||||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Rust to PATH
|
|
||||||
# run: |
|
|
||||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
|
||||||
# shell: powershell
|
|
||||||
|
|
||||||
# - uses: Swatinem/rust-cache@v2
|
|
||||||
# with:
|
|
||||||
# workspaces: rust
|
|
||||||
# - name: Install 7-Zip ARM
|
|
||||||
# run: |
|
|
||||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
|
||||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
|
||||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add 7-Zip to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Protoc v21.12
|
|
||||||
# working-directory: C:\
|
|
||||||
# run: |
|
|
||||||
# if (Test-Path 'C:\protoc') {
|
|
||||||
# Write-Host "Protoc directory exists, skipping installation"
|
|
||||||
# return
|
|
||||||
# }
|
|
||||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
# Set-Location C:\protoc
|
|
||||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Protoc to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Build Windows native node modules
|
|
||||||
# run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
|
|
||||||
# - name: Upload Windows ARM64 Artifacts
|
|
||||||
# uses: actions/upload-artifact@v4
|
|
||||||
# with:
|
|
||||||
# name: node-native-windows-arm64
|
|
||||||
# path: |
|
|
||||||
# node/dist/*.node
|
|
||||||
|
|
||||||
nodejs-windows:
|
nodejs-windows:
|
||||||
name: lancedb ${{ matrix.target }}
|
name: lancedb ${{ matrix.target }}
|
||||||
@@ -478,103 +413,57 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
nodejs/dist/*.node
|
nodejs/dist/*.node
|
||||||
|
|
||||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
nodejs-windows-arm64:
|
||||||
# nodejs-windows-arm64:
|
name: lancedb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||||
# name: lancedb win32-arm64-msvc
|
# Only runs on tags that matches the make-release action
|
||||||
# runs-on: windows-4x-arm
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
# if: startsWith(github.ref, 'refs/tags/v')
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
container: alpine:edge
|
||||||
# - uses: actions/checkout@v4
|
strategy:
|
||||||
# - name: Install Git
|
fail-fast: false
|
||||||
# run: |
|
matrix:
|
||||||
# Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
|
config:
|
||||||
# Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
|
# - arch: x86_64
|
||||||
# shell: powershell
|
- arch: aarch64
|
||||||
# - name: Add Git to PATH
|
steps:
|
||||||
# run: |
|
- name: Checkout
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
|
uses: actions/checkout@v4
|
||||||
# $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
|
- name: Install dependencies
|
||||||
# shell: powershell
|
run: |
|
||||||
# - name: Configure Git symlinks
|
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||||
# run: git config --global core.symlinks true
|
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||||
# - uses: actions/checkout@v4
|
echo "source $HOME/.cargo/env" >> saved_env
|
||||||
# - uses: actions/setup-python@v5
|
echo "export CC=clang" >> saved_env
|
||||||
# with:
|
echo "export AR=llvm-ar" >> saved_env
|
||||||
# python-version: "3.13"
|
source "$HOME/.cargo/env"
|
||||||
# - name: Install Visual Studio Build Tools
|
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||||
# run: |
|
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||||
# Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
|
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||||
# Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
|
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||||
# "--installPath", "C:\BuildTools", `
|
printf '#!/bin/sh\ncargo "$@"' > $HOME/.cargo/bin/cargo-xwin
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
|
chmod u+x $HOME/.cargo/bin/cargo-xwin
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
|
- name: Configure x86_64 build
|
||||||
# "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
|
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
|
run: |
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
|
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||||
# "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
|
- name: Configure aarch64 build
|
||||||
# shell: powershell
|
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||||
# - name: Add Visual Studio Build Tools to PATH
|
run: |
|
||||||
# run: |
|
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||||
# $vsPath = "C:\BuildTools\VC\Tools\MSVC"
|
- name: Build Windows Artifacts
|
||||||
# $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
|
run: |
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
|
source ./saved_env
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
|
bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
|
- name: Upload Windows Artifacts
|
||||||
# Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
|
uses: actions/upload-artifact@v4
|
||||||
# Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
|
with:
|
||||||
|
name: nodejs-native-windows-${{ matrix.config.arch }}
|
||||||
# $env:LIB = ""
|
path: |
|
||||||
# Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
|
nodejs/dist/*.node
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Rust
|
|
||||||
# run: |
|
|
||||||
# Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
|
||||||
# .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Rust to PATH
|
|
||||||
# run: |
|
|
||||||
# Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
|
|
||||||
# shell: powershell
|
|
||||||
|
|
||||||
# - uses: Swatinem/rust-cache@v2
|
|
||||||
# with:
|
|
||||||
# workspaces: rust
|
|
||||||
# - name: Install 7-Zip ARM
|
|
||||||
# run: |
|
|
||||||
# New-Item -Path 'C:\7zip' -ItemType Directory
|
|
||||||
# Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
|
|
||||||
# Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add 7-Zip to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Install Protoc v21.12
|
|
||||||
# working-directory: C:\
|
|
||||||
# run: |
|
|
||||||
# if (Test-Path 'C:\protoc') {
|
|
||||||
# Write-Host "Protoc directory exists, skipping installation"
|
|
||||||
# return
|
|
||||||
# }
|
|
||||||
# New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
# Set-Location C:\protoc
|
|
||||||
# Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
# & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Add Protoc to PATH
|
|
||||||
# run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
# shell: powershell
|
|
||||||
# - name: Build Windows native node modules
|
|
||||||
# run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
|
|
||||||
# - name: Upload Windows ARM64 Artifacts
|
|
||||||
# uses: actions/upload-artifact@v4
|
|
||||||
# with:
|
|
||||||
# name: nodejs-native-windows-arm64
|
|
||||||
# path: |
|
|
||||||
# nodejs/dist/*.node
|
|
||||||
|
|
||||||
release:
|
release:
|
||||||
name: vectordb NPM Publish
|
name: vectordb NPM Publish
|
||||||
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
|
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows, node-windows-arm64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -614,7 +503,7 @@ jobs:
|
|||||||
|
|
||||||
release-nodejs:
|
release-nodejs:
|
||||||
name: lancedb NPM Publish
|
name: lancedb NPM Publish
|
||||||
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
|
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows, nodejs-windows-arm64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -672,6 +561,7 @@ jobs:
|
|||||||
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
||||||
|
|
||||||
update-package-lock:
|
update-package-lock:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
needs: [release]
|
needs: [release]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
@@ -689,6 +579,7 @@ jobs:
|
|||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
update-package-lock-nodejs:
|
update-package-lock-nodejs:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
needs: [release-nodejs]
|
needs: [release-nodejs]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
@@ -706,6 +597,7 @@ jobs:
|
|||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
gh-release:
|
gh-release:
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
|||||||
2
.github/workflows/pypi-publish.yml
vendored
2
.github/workflows/pypi-publish.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.8
|
python-version: 3.12
|
||||||
- uses: ./.github/workflows/build_windows_wheel
|
- uses: ./.github/workflows/build_windows_wheel
|
||||||
with:
|
with:
|
||||||
python-minor-version: 8
|
python-minor-version: 8
|
||||||
|
|||||||
1
.github/workflows/upload_wheel/action.yml
vendored
1
.github/workflows/upload_wheel/action.yml
vendored
@@ -17,6 +17,7 @@ runs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install twine
|
pip install twine
|
||||||
|
python3 -m pip install --upgrade pkginfo
|
||||||
- name: Choose repo
|
- name: Choose repo
|
||||||
shell: bash
|
shell: bash
|
||||||
id: choose_repo
|
id: choose_repo
|
||||||
|
|||||||
36
Cargo.toml
36
Cargo.toml
@@ -23,27 +23,27 @@ rust-version = "1.80.0" # TO
|
|||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.20.0", "features" = [
|
lance = { "version" = "=0.20.0", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
] }
|
||||||
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-io = "0.20.0"
|
||||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-index = "0.20.0"
|
||||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-linalg = "0.20.0"
|
||||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-table = "0.20.0"
|
||||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-testing = "0.20.0"
|
||||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-datafusion = "0.20.0"
|
||||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
lance-encoding = "0.20.0"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "52.2", optional = false }
|
arrow = { version = "53.2", optional = false }
|
||||||
arrow-array = "52.2"
|
arrow-array = "53.2"
|
||||||
arrow-data = "52.2"
|
arrow-data = "53.2"
|
||||||
arrow-ipc = "52.2"
|
arrow-ipc = "53.2"
|
||||||
arrow-ord = "52.2"
|
arrow-ord = "53.2"
|
||||||
arrow-schema = "52.2"
|
arrow-schema = "53.2"
|
||||||
arrow-arith = "52.2"
|
arrow-arith = "53.2"
|
||||||
arrow-cast = "52.2"
|
arrow-cast = "53.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
chrono = "0.4.35"
|
chrono = "0.4.35"
|
||||||
datafusion-common = "41.0"
|
datafusion-common = "42.0"
|
||||||
datafusion-physical-plan = "41.0"
|
datafusion-physical-plan = "42.0"
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -18,4 +18,4 @@ FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
|||||||
cd node
|
cd node
|
||||||
npm ci
|
npm ci
|
||||||
npm run build-release
|
npm run build-release
|
||||||
npm run pack-build -t $TARGET_TRIPLE
|
npm run pack-build -- -t $TARGET_TRIPLE
|
||||||
|
|||||||
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||||
|
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||||
|
|
||||||
|
# function dl() {
|
||||||
|
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||||
|
# }
|
||||||
|
|
||||||
|
# [[.h]]
|
||||||
|
|
||||||
|
# "id": "Win11SDK_10.0.26100"
|
||||||
|
# "version": "10.0.26100.7"
|
||||||
|
|
||||||
|
# libucrt.lib
|
||||||
|
|
||||||
|
# example: <assert.h>
|
||||||
|
# dir: ucrt/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||||
|
|
||||||
|
# example: <windows.h>
|
||||||
|
# dir: um/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||||
|
|
||||||
|
# example: <winapifamily.h>
|
||||||
|
# dir: /shared
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||||
|
|
||||||
|
|
||||||
|
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||||
|
# "version": "14.16.27045"
|
||||||
|
|
||||||
|
# example: <vcruntime.h>
|
||||||
|
# dir: MSVC/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
|
||||||
|
# [[.lib]]
|
||||||
|
|
||||||
|
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||||
|
|
||||||
|
# fwpuclnt.lib arm64rt.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7a332420d812f7c1d41da865ae5a7c52/windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/19de98ed4a79938d0045d19c047936b3/3e2f7be479e3679d700ce0782e4cc318.cab
|
||||||
|
|
||||||
|
# libcmt.lib libvcruntime.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/227f40682a88dc5fa0ccb9cadc9ad30af99ad1f1a75db63407587d079f60d035/Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||||
|
|
||||||
|
|
||||||
|
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||||
|
|
||||||
|
mkdir -p /usr/aarch64-pc-windows-msvc/usr/include
|
||||||
|
mkdir -p /usr/aarch64-pc-windows-msvc/usr/lib
|
||||||
|
|
||||||
|
# lowercase folder/file names
|
||||||
|
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||||
|
|
||||||
|
# .h
|
||||||
|
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/aarch64-pc-windows-msvc/usr/include
|
||||||
|
|
||||||
|
# lowercase #include "" and #include <>
|
||||||
|
find /usr/aarch64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||||
|
|
||||||
|
# ARM intrinsics
|
||||||
|
# original dir: MSVC/
|
||||||
|
|
||||||
|
# '__n128x4' redefined in arm_neon.h
|
||||||
|
# "arm64_neon.h" included from intrin.h
|
||||||
|
|
||||||
|
(cd /usr/lib/llvm19/lib/clang/19/include && cp arm_neon.h intrin.h -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
# .lib
|
||||||
|
|
||||||
|
# _Interlocked intrinsics
|
||||||
|
# must always link with arm64rt.lib
|
||||||
|
# reason: https://developercommunity.visualstudio.com/t/libucrtlibstreamobj-error-lnk2001-unresolved-exter/1544787#T-ND1599818
|
||||||
|
# I don't understand the 'correct' fix for this, arm64rt.lib is supposed to be the workaround
|
||||||
|
|
||||||
|
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/arm64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib fwpuclnt.lib arm64rt.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
(cd 'contents/vc/tools/msvc/14.16.27023/lib/arm64' && cp libcmt.lib libvcruntime.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/arm64/libucrt.lib' /usr/aarch64-pc-windows-msvc/usr/lib
|
||||||
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||||
|
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||||
|
|
||||||
|
# function dl() {
|
||||||
|
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||||
|
# }
|
||||||
|
|
||||||
|
# [[.h]]
|
||||||
|
|
||||||
|
# "id": "Win11SDK_10.0.26100"
|
||||||
|
# "version": "10.0.26100.7"
|
||||||
|
|
||||||
|
# libucrt.lib
|
||||||
|
|
||||||
|
# example: <assert.h>
|
||||||
|
# dir: ucrt/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||||
|
|
||||||
|
# example: <windows.h>
|
||||||
|
# dir: um/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||||
|
|
||||||
|
# example: <winapifamily.h>
|
||||||
|
# dir: /shared
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||||
|
|
||||||
|
|
||||||
|
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||||
|
# "version": "14.16.27045"
|
||||||
|
|
||||||
|
# example: <vcruntime.h>
|
||||||
|
# dir: MSVC/
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
|
||||||
|
# [[.lib]]
|
||||||
|
|
||||||
|
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/bfc3904a0195453419ae4dfea7abd6fb/e10768bb6e9d0ea730280336b697da66.cab
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/637f9f3be880c71f9e3ca07b4d67345c/f9b24c8280986c0683fbceca5326d806.cab
|
||||||
|
|
||||||
|
# dbghelp.lib fwpuclnt.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/9f51690d5aa804b1340ce12d1ec80f89/windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/d3a7df4ca3303a698640a29e558a5e5b/58314d0646d7e1a25e97c902166c3155.cab
|
||||||
|
|
||||||
|
# libcmt.lib libvcruntime.lib
|
||||||
|
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/8728f21ae09940f1f4b4ee47b4a596be2509e2a47d2f0c83bbec0ea37d69644b/Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||||
|
|
||||||
|
|
||||||
|
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||||
|
msiextract windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||||
|
unzip -o Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||||
|
|
||||||
|
mkdir -p /usr/x86_64-pc-windows-msvc/usr/include
|
||||||
|
mkdir -p /usr/x86_64-pc-windows-msvc/usr/lib
|
||||||
|
|
||||||
|
# lowercase folder/file names
|
||||||
|
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||||
|
|
||||||
|
# .h
|
||||||
|
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/x86_64-pc-windows-msvc/usr/include
|
||||||
|
|
||||||
|
# lowercase #include "" and #include <>
|
||||||
|
find /usr/x86_64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||||
|
|
||||||
|
# x86 intrinsics
|
||||||
|
# original dir: MSVC/
|
||||||
|
|
||||||
|
# '_mm_movemask_epi8' defined in emmintrin.h
|
||||||
|
# '__v4sf' defined in xmmintrin.h
|
||||||
|
# '__v2si' defined in mmintrin.h
|
||||||
|
# '__m128d' redefined in immintrin.h
|
||||||
|
# '__m128i' redefined in intrin.h
|
||||||
|
# '_mm_comlt_epu8' defined in ammintrin.h
|
||||||
|
|
||||||
|
(cd /usr/lib/llvm19/lib/clang/19/include && cp emmintrin.h xmmintrin.h mmintrin.h immintrin.h intrin.h ammintrin.h -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||||
|
|
||||||
|
# .lib
|
||||||
|
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/x64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib dbghelp.lib fwpuclnt.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
(cd 'contents/vc/tools/msvc/14.16.27023/lib/x64' && cp libcmt.lib libvcruntime.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||||
|
|
||||||
|
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/x64/libucrt.lib' /usr/x86_64-pc-windows-msvc/usr/lib
|
||||||
@@ -6,6 +6,7 @@ LanceDB registers the OpenAI embeddings function in the registry by default, as
|
|||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
|
| `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
|
||||||
| `dim` | `int` | Model default | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
|
| `dim` | `int` | Model default | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
|
||||||
|
| `use_azure` | bool | `False` | Set true to use Azure OpenAPI SDK |
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|||||||
@@ -27,10 +27,13 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure
|
|||||||
|
|
||||||
Azure Blob Storage:
|
Azure Blob Storage:
|
||||||
|
|
||||||
|
<!-- skip-test -->
|
||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
db = lancedb.connect("az://bucket/path")
|
db = lancedb.connect("az://bucket/path")
|
||||||
```
|
```
|
||||||
|
Note that for Azure, storage credentials must be configured. See [below](#azure-blob-storage) for more details.
|
||||||
|
|
||||||
|
|
||||||
=== "TypeScript"
|
=== "TypeScript"
|
||||||
|
|
||||||
@@ -87,11 +90,6 @@ In most cases, when running in the respective cloud and permissions are set up c
|
|||||||
export TIMEOUT=60s
|
export TIMEOUT=60s
|
||||||
```
|
```
|
||||||
|
|
||||||
!!! note "`storage_options` availability"
|
|
||||||
|
|
||||||
The `storage_options` parameter is only available in Python *async* API and JavaScript API.
|
|
||||||
It is not yet supported in the Python synchronous API.
|
|
||||||
|
|
||||||
If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection:
|
If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection:
|
||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
|
|||||||
@@ -790,6 +790,101 @@ Use the `drop_table()` method on the database to remove a table.
|
|||||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||||
If the table does not exist an exception is raised.
|
If the table does not exist an exception is raised.
|
||||||
|
|
||||||
|
## Changing schemas
|
||||||
|
|
||||||
|
While tables must have a schema specified when they are created, you can
|
||||||
|
change the schema over time. There's three methods to alter the schema of
|
||||||
|
a table:
|
||||||
|
|
||||||
|
* `add_columns`: Add new columns to the table
|
||||||
|
* `alter_columns`: Alter the name, nullability, or data type of a column
|
||||||
|
* `drop_columns`: Drop columns from the table
|
||||||
|
|
||||||
|
### Adding new columns
|
||||||
|
|
||||||
|
You can add new columns to the table with the `add_columns` method. New columns
|
||||||
|
are filled with values based on a SQL expression. For example, you can add a new
|
||||||
|
column `y` to the table and fill it with the value of `x + 1`.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
table.add_columns({"double_price": "price * 2"})
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.add_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:add_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.addColumns](../js/classes/Table.md/#addcolumns)
|
||||||
|
|
||||||
|
If you want to fill it with null, you can use `cast(NULL as <data_type>)` as
|
||||||
|
the SQL expression to fill the column with nulls, while controlling the data
|
||||||
|
type of the column. Available data types are base on the
|
||||||
|
[DataFusion data types](https://datafusion.apache.org/user-guide/sql/data_types.html).
|
||||||
|
You can use any of the SQL types, such as `BIGINT`:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
cast(NULL as BIGINT)
|
||||||
|
```
|
||||||
|
|
||||||
|
Using Arrow data types and the `arrow_typeof` function is not yet supported.
|
||||||
|
|
||||||
|
<!-- TODO: we could provide a better formula for filling with nulls:
|
||||||
|
https://github.com/lancedb/lance/issues/3175
|
||||||
|
-->
|
||||||
|
|
||||||
|
### Altering existing columns
|
||||||
|
|
||||||
|
You can alter the name, nullability, or data type of a column with the `alter_columns`
|
||||||
|
method.
|
||||||
|
|
||||||
|
Changing the name or nullability of a column just updates the metadata. Because
|
||||||
|
of this, it's a fast operation. Changing the data type of a column requires
|
||||||
|
rewriting the column, which can be a heavy operation.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pyarrow as pa
|
||||||
|
table.alter_column({"path": "double_price", "rename": "dbl_price",
|
||||||
|
"data_type": pa.float32(), "nullable": False})
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.alter_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:alter_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.alterColumns](../js/classes/Table.md/#altercolumns)
|
||||||
|
|
||||||
|
### Dropping columns
|
||||||
|
|
||||||
|
You can drop columns from the table with the `drop_columns` method. This will
|
||||||
|
will remove the column from the schema.
|
||||||
|
|
||||||
|
<!-- TODO: Provide guidance on how to reduce disk usage once optimize helps here
|
||||||
|
waiting on: https://github.com/lancedb/lance/issues/3177
|
||||||
|
-->
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
table.drop_columns(["dbl_price"])
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.table.Table.drop_columns][]
|
||||||
|
|
||||||
|
=== "Typescript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
--8<-- "nodejs/examples/basic.test.ts:drop_columns"
|
||||||
|
```
|
||||||
|
**API Reference:** [lancedb.Table.dropColumns](../js/classes/Table.md/#altercolumns)
|
||||||
|
|
||||||
|
|
||||||
## Handling bad vectors
|
## Handling bad vectors
|
||||||
|
|
||||||
In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
|
In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.14.0-beta.0</version>
|
<version>0.14.0-beta.2</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.14.0-beta.0</version>
|
<version>0.14.0-beta.2</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
20
node/package-lock.json
generated
20
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,14 +52,14 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0",
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -91,13 +91,13 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0",
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.2",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0"
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.14.0-beta.0"
|
version = "0.14.0-beta.2"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -825,6 +825,18 @@ describe("schema evolution", function () {
|
|||||||
new Field("price", new Float64(), true),
|
new Field("price", new Float64(), true),
|
||||||
]);
|
]);
|
||||||
expect(await table.schema()).toEqual(expectedSchema);
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
|
|
||||||
|
await table.alterColumns([{ path: "new_id", dataType: "int32" }]);
|
||||||
|
const expectedSchema2 = new Schema([
|
||||||
|
new Field("new_id", new Int32(), true),
|
||||||
|
new Field(
|
||||||
|
"vector",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
]);
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema2);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("can drop a column from the schema", async function () {
|
it("can drop a column from the schema", async function () {
|
||||||
|
|||||||
@@ -116,6 +116,26 @@ test("basic table examples", async () => {
|
|||||||
await tbl.add(data);
|
await tbl.add(data);
|
||||||
// --8<-- [end:add_data]
|
// --8<-- [end:add_data]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// --8<-- [start:add_columns]
|
||||||
|
await tbl.addColumns([{ name: "double_price", valueSql: "price * 2" }]);
|
||||||
|
// --8<-- [end:add_columns]
|
||||||
|
// --8<-- [start:alter_columns]
|
||||||
|
await tbl.alterColumns([
|
||||||
|
{
|
||||||
|
path: "double_price",
|
||||||
|
rename: "dbl_price",
|
||||||
|
dataType: "float",
|
||||||
|
nullable: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
// --8<-- [end:alter_columns]
|
||||||
|
// --8<-- [start:drop_columns]
|
||||||
|
await tbl.dropColumns(["dbl_price"]);
|
||||||
|
// --8<-- [end:drop_columns]
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// --8<-- [start:vector_search]
|
// --8<-- [start:vector_search]
|
||||||
const res = await tbl.search([100, 100]).limit(2).toArray();
|
const res = await tbl.search([100, 100]).limit(2).toArray();
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.14.0-beta.0",
|
"version": "0.14.0-beta.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -178,16 +178,20 @@ impl Table {
|
|||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
||||||
for alteration in &alterations {
|
for alteration in &alterations {
|
||||||
if alteration.rename.is_none() && alteration.nullable.is_none() {
|
if alteration.rename.is_none()
|
||||||
|
&& alteration.nullable.is_none()
|
||||||
|
&& alteration.data_type.is_none()
|
||||||
|
{
|
||||||
return Err(napi::Error::from_reason(
|
return Err(napi::Error::from_reason(
|
||||||
"Alteration must have a 'rename' or 'nullable' field.",
|
"Alteration must have a 'rename', 'dataType', or 'nullable' field.",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let alterations = alterations
|
let alterations = alterations
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(LanceColumnAlteration::from)
|
.map(LanceColumnAlteration::try_from)
|
||||||
.collect::<Vec<_>>();
|
.collect::<std::result::Result<Vec<_>, String>>()
|
||||||
|
.map_err(napi::Error::from_reason)?;
|
||||||
|
|
||||||
self.inner_ref()?
|
self.inner_ref()?
|
||||||
.alter_columns(&alterations)
|
.alter_columns(&alterations)
|
||||||
@@ -433,24 +437,43 @@ pub struct ColumnAlteration {
|
|||||||
/// The new name of the column. If not provided then the name will not be changed.
|
/// The new name of the column. If not provided then the name will not be changed.
|
||||||
/// This must be distinct from the names of all other columns in the table.
|
/// This must be distinct from the names of all other columns in the table.
|
||||||
pub rename: Option<String>,
|
pub rename: Option<String>,
|
||||||
|
/// A new data type for the column. If not provided then the data type will not be changed.
|
||||||
|
/// Changing data types is limited to casting to the same general type. For example, these
|
||||||
|
/// changes are valid:
|
||||||
|
/// * `int32` -> `int64` (integers)
|
||||||
|
/// * `double` -> `float` (floats)
|
||||||
|
/// * `string` -> `large_string` (strings)
|
||||||
|
/// But these changes are not:
|
||||||
|
/// * `int32` -> `double` (mix integers and floats)
|
||||||
|
/// * `string` -> `int32` (mix strings and integers)
|
||||||
|
pub data_type: Option<String>,
|
||||||
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||||
pub nullable: Option<bool>,
|
pub nullable: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ColumnAlteration> for LanceColumnAlteration {
|
impl TryFrom<ColumnAlteration> for LanceColumnAlteration {
|
||||||
fn from(js: ColumnAlteration) -> Self {
|
type Error = String;
|
||||||
|
fn try_from(js: ColumnAlteration) -> std::result::Result<Self, Self::Error> {
|
||||||
let ColumnAlteration {
|
let ColumnAlteration {
|
||||||
path,
|
path,
|
||||||
rename,
|
rename,
|
||||||
nullable,
|
nullable,
|
||||||
|
data_type,
|
||||||
} = js;
|
} = js;
|
||||||
Self {
|
let data_type = if let Some(data_type) = data_type {
|
||||||
|
Some(
|
||||||
|
lancedb::utils::string_to_datatype(&data_type)
|
||||||
|
.ok_or_else(|| format!("Invalid data type: {}", data_type))?,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Ok(Self {
|
||||||
path,
|
path,
|
||||||
rename,
|
rename,
|
||||||
nullable,
|
nullable,
|
||||||
// TODO: wire up this field
|
data_type,
|
||||||
data_type: None,
|
})
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.17.0-beta.2"
|
current_version = "0.17.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.17.0-beta.2"
|
version = "0.17.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -14,23 +14,18 @@ name = "_lancedb"
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "52.1", features = ["pyarrow"] }
|
arrow = { version = "53.2", features = ["pyarrow"] }
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.21", features = [
|
pyo3 = { version = "0.22.2", features = [
|
||||||
"extension-module",
|
"extension-module",
|
||||||
"abi3-py39",
|
"abi3-py39",
|
||||||
"gil-refs"
|
"gil-refs"
|
||||||
] }
|
] }
|
||||||
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
pyo3-async-runtimes = { version = "0.22", features = ["attributes", "tokio-runtime"] }
|
||||||
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
|
||||||
pyo3-asyncio-0-21 = { version = "0.21.0", features = [
|
|
||||||
"attributes",
|
|
||||||
"tokio-runtime"
|
|
||||||
] }
|
|
||||||
pin-project = "1.1.5"
|
pin-project = "1.1.5"
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
tokio = { version = "1.36.0", features = ["sync"] }
|
tokio = { version = "1.40", features = ["sync"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
pyo3-build-config = { version = "0.20.3", features = [
|
pyo3-build-config = { version = "0.20.3", features = [
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.20.0b3",
|
"pylance==0.20.0",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"packaging",
|
"packaging",
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ def connect(
|
|||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DBConnection:
|
) -> DBConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
@@ -67,6 +68,9 @@ def connect(
|
|||||||
Configuration options for the LanceDB Cloud HTTP client. If a dict, then
|
Configuration options for the LanceDB Cloud HTTP client. If a dict, then
|
||||||
the keys are the attributes of the ClientConfig class. If None, then the
|
the keys are the attributes of the ClientConfig class. If None, then the
|
||||||
default configuration is used.
|
default configuration is used.
|
||||||
|
storage_options: dict, optional
|
||||||
|
Additional options for the storage backend. See available options at
|
||||||
|
https://lancedb.github.io/lancedb/guides/storage/
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -111,7 +115,11 @@ def connect(
|
|||||||
|
|
||||||
if kwargs:
|
if kwargs:
|
||||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||||
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)
|
return LanceDBConnection(
|
||||||
|
uri,
|
||||||
|
read_consistency_interval=read_consistency_interval,
|
||||||
|
storage_options=storage_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def connect_async(
|
async def connect_async(
|
||||||
|
|||||||
@@ -13,34 +13,29 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
from overrides import EnforceOverrides, override
|
from overrides import EnforceOverrides, override
|
||||||
from pyarrow import fs
|
|
||||||
|
|
||||||
from lancedb.common import data_to_reader, validate_schema
|
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||||
|
from lancedb.background_loop import BackgroundEventLoop
|
||||||
|
|
||||||
from ._lancedb import connect as lancedb_connect
|
from ._lancedb import connect as lancedb_connect
|
||||||
from .table import (
|
from .table import (
|
||||||
AsyncTable,
|
AsyncTable,
|
||||||
LanceTable,
|
LanceTable,
|
||||||
Table,
|
Table,
|
||||||
_table_path,
|
|
||||||
sanitize_create_table,
|
sanitize_create_table,
|
||||||
)
|
)
|
||||||
from .util import (
|
from .util import (
|
||||||
fs_from_uri,
|
|
||||||
get_uri_location,
|
|
||||||
get_uri_scheme,
|
get_uri_scheme,
|
||||||
validate_table_name,
|
validate_table_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
import pyarrow as pa
|
||||||
from .pydantic import LanceModel
|
from .pydantic import LanceModel
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
@@ -48,6 +43,8 @@ if TYPE_CHECKING:
|
|||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
|
|
||||||
|
LOOP = BackgroundEventLoop()
|
||||||
|
|
||||||
|
|
||||||
class DBConnection(EnforceOverrides):
|
class DBConnection(EnforceOverrides):
|
||||||
"""An active LanceDB connection interface."""
|
"""An active LanceDB connection interface."""
|
||||||
@@ -180,6 +177,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
control over how data is saved, either provide the PyArrow schema to
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
>>> custom_schema = pa.schema([
|
>>> custom_schema = pa.schema([
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
... pa.field("lat", pa.float32()),
|
... pa.field("lat", pa.float32()),
|
||||||
@@ -327,7 +325,11 @@ class LanceDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, uri: URI, *, read_consistency_interval: Optional[timedelta] = None
|
self,
|
||||||
|
uri: URI,
|
||||||
|
*,
|
||||||
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
):
|
):
|
||||||
if not isinstance(uri, Path):
|
if not isinstance(uri, Path):
|
||||||
scheme = get_uri_scheme(uri)
|
scheme = get_uri_scheme(uri)
|
||||||
@@ -338,9 +340,27 @@ class LanceDBConnection(DBConnection):
|
|||||||
uri = uri.expanduser().absolute()
|
uri = uri.expanduser().absolute()
|
||||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||||
self._uri = str(uri)
|
self._uri = str(uri)
|
||||||
|
|
||||||
self._entered = False
|
self._entered = False
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
|
self.storage_options = storage_options
|
||||||
|
|
||||||
|
if read_consistency_interval is not None:
|
||||||
|
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
||||||
|
else:
|
||||||
|
read_consistency_interval_secs = None
|
||||||
|
|
||||||
|
async def do_connect():
|
||||||
|
return await lancedb_connect(
|
||||||
|
sanitize_uri(uri),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
read_consistency_interval_secs,
|
||||||
|
None,
|
||||||
|
storage_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
val = f"{self.__class__.__name__}({self._uri}"
|
val = f"{self.__class__.__name__}({self._uri}"
|
||||||
@@ -364,32 +384,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
Iterator of str.
|
Iterator of str.
|
||||||
A list of table names.
|
A list of table names.
|
||||||
"""
|
"""
|
||||||
try:
|
return LOOP.run(self._conn.table_names(start_after=page_token, limit=limit))
|
||||||
asyncio.get_running_loop()
|
|
||||||
# User application is async. Soon we will just tell them to use the
|
|
||||||
# async version. Until then fallback to the old sync implementation.
|
|
||||||
try:
|
|
||||||
filesystem = fs_from_uri(self.uri)[0]
|
|
||||||
except pa.ArrowInvalid:
|
|
||||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loc = get_uri_location(self.uri)
|
|
||||||
paths = filesystem.get_file_info(fs.FileSelector(loc))
|
|
||||||
except FileNotFoundError:
|
|
||||||
# It is ok if the file does not exist since it will be created
|
|
||||||
paths = []
|
|
||||||
tables = [
|
|
||||||
os.path.splitext(file_info.base_name)[0]
|
|
||||||
for file_info in paths
|
|
||||||
if file_info.extension == "lance"
|
|
||||||
]
|
|
||||||
tables.sort()
|
|
||||||
return tables
|
|
||||||
except RuntimeError:
|
|
||||||
# User application is sync. It is safe to use the async implementation
|
|
||||||
# under the hood.
|
|
||||||
return asyncio.run(self._async_get_table_names(page_token, limit))
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self.table_names())
|
return len(self.table_names())
|
||||||
@@ -461,19 +456,16 @@ class LanceDBConnection(DBConnection):
|
|||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
table_uri = _table_path(self.uri, name)
|
LOOP.run(self._conn.drop_table(name))
|
||||||
filesystem, path = fs_from_uri(table_uri)
|
except ValueError as e:
|
||||||
filesystem.delete_dir(path)
|
|
||||||
except FileNotFoundError:
|
|
||||||
if not ignore_missing:
|
if not ignore_missing:
|
||||||
raise
|
raise e
|
||||||
|
if f"Table '{name}' was not found" not in str(e):
|
||||||
|
raise e
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_database(self):
|
def drop_database(self):
|
||||||
dummy_table_uri = _table_path(self.uri, "dummy")
|
LOOP.run(self._conn.drop_database())
|
||||||
uri = dummy_table_uri.removesuffix("dummy.lance")
|
|
||||||
filesystem, path = fs_from_uri(uri)
|
|
||||||
filesystem.delete_dir(path)
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncConnection(object):
|
class AsyncConnection(object):
|
||||||
@@ -689,6 +681,7 @@ class AsyncConnection(object):
|
|||||||
control over how data is saved, either provide the PyArrow schema to
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
>>> custom_schema = pa.schema([
|
>>> custom_schema = pa.schema([
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
... pa.field("lat", pa.float32()),
|
... pa.field("lat", pa.float32()),
|
||||||
|
|||||||
@@ -48,6 +48,9 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
|
|||||||
organization: Optional[str] = None
|
organization: Optional[str] = None
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
|
|
||||||
|
# Set true to use Azure OpenAI API
|
||||||
|
use_azure: bool = False
|
||||||
|
|
||||||
def ndims(self):
|
def ndims(self):
|
||||||
return self._ndims
|
return self._ndims
|
||||||
|
|
||||||
@@ -123,4 +126,8 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
|
|||||||
kwargs["organization"] = self.organization
|
kwargs["organization"] = self.organization
|
||||||
if self.api_key:
|
if self.api_key:
|
||||||
kwargs["api_key"] = self.api_key
|
kwargs["api_key"] = self.api_key
|
||||||
return openai.OpenAI(**kwargs)
|
|
||||||
|
if self.use_azure:
|
||||||
|
return openai.AzureOpenAI(**kwargs)
|
||||||
|
else:
|
||||||
|
return openai.OpenAI(**kwargs)
|
||||||
|
|||||||
@@ -12,18 +12,22 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import ClassVar, List, Union
|
from typing import ClassVar, TYPE_CHECKING, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
from ..util import attempt_import_or_raise
|
from ..util import attempt_import_or_raise
|
||||||
from .base import TextEmbeddingFunction
|
from .base import EmbeddingFunction
|
||||||
from .registry import register
|
from .registry import register
|
||||||
from .utils import api_key_not_found_help, TEXT
|
from .utils import api_key_not_found_help, IMAGES
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import PIL
|
||||||
|
|
||||||
|
|
||||||
@register("voyageai")
|
@register("voyageai")
|
||||||
class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||||
"""
|
"""
|
||||||
An embedding function that uses the VoyageAI API
|
An embedding function that uses the VoyageAI API
|
||||||
|
|
||||||
@@ -36,6 +40,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
|
|
||||||
* voyage-3
|
* voyage-3
|
||||||
* voyage-3-lite
|
* voyage-3-lite
|
||||||
|
* voyage-multimodal-3
|
||||||
* voyage-finance-2
|
* voyage-finance-2
|
||||||
* voyage-multilingual-2
|
* voyage-multilingual-2
|
||||||
* voyage-law-2
|
* voyage-law-2
|
||||||
@@ -54,7 +59,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
.create(name="voyage-3")
|
.create(name="voyage-3")
|
||||||
|
|
||||||
class TextModel(LanceModel):
|
class TextModel(LanceModel):
|
||||||
text: str = voyageai.SourceField()
|
data: str = voyageai.SourceField()
|
||||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||||
|
|
||||||
data = [ { "text": "hello world" },
|
data = [ { "text": "hello world" },
|
||||||
@@ -77,6 +82,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
return 1536
|
return 1536
|
||||||
elif self.name in [
|
elif self.name in [
|
||||||
"voyage-3",
|
"voyage-3",
|
||||||
|
"voyage-multimodal-3",
|
||||||
"voyage-finance-2",
|
"voyage-finance-2",
|
||||||
"voyage-multilingual-2",
|
"voyage-multilingual-2",
|
||||||
"voyage-law-2",
|
"voyage-law-2",
|
||||||
@@ -85,19 +91,19 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Model {self.name} not supported")
|
raise ValueError(f"Model {self.name} not supported")
|
||||||
|
|
||||||
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
|
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
|
||||||
return self.compute_source_embeddings(query, input_type="query")
|
"""
|
||||||
|
Sanitize the input to the embedding function.
|
||||||
|
"""
|
||||||
|
if isinstance(images, (str, bytes)):
|
||||||
|
images = [images]
|
||||||
|
elif isinstance(images, pa.Array):
|
||||||
|
images = images.to_pylist()
|
||||||
|
elif isinstance(images, pa.ChunkedArray):
|
||||||
|
images = images.combine_chunks().to_pylist()
|
||||||
|
return images
|
||||||
|
|
||||||
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
|
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
|
||||||
texts = self.sanitize_input(texts)
|
|
||||||
input_type = (
|
|
||||||
kwargs.get("input_type") or "document"
|
|
||||||
) # assume source input type if not passed by `compute_query_embeddings`
|
|
||||||
return self.generate_embeddings(texts, input_type=input_type)
|
|
||||||
|
|
||||||
def generate_embeddings(
|
|
||||||
self, texts: Union[List[str], np.ndarray], *args, **kwargs
|
|
||||||
) -> List[np.array]:
|
|
||||||
"""
|
"""
|
||||||
Get the embeddings for the given texts
|
Get the embeddings for the given texts
|
||||||
|
|
||||||
@@ -109,15 +115,55 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
|
|
||||||
truncation: Optional[bool]
|
truncation: Optional[bool]
|
||||||
"""
|
"""
|
||||||
VoyageAIEmbeddingFunction._init_client()
|
if self.name in ["voyage-multimodal-3"]:
|
||||||
rs = VoyageAIEmbeddingFunction.client.embed(
|
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||||
texts=texts, model=self.name, **kwargs
|
inputs=[[text]], model=self.name, **kwargs
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
rs = VoyageAIEmbeddingFunction._get_client().embed(
|
||||||
|
texts=[text], model=self.name, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
return [emb for emb in rs.embeddings]
|
return rs.embeddings[0]
|
||||||
|
|
||||||
|
def generate_image_embedding(
|
||||||
|
self, image: "PIL.Image.Image", **kwargs
|
||||||
|
) -> np.ndarray:
|
||||||
|
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||||
|
inputs=[[image]], model=self.name, **kwargs
|
||||||
|
)
|
||||||
|
return rs.embeddings[0]
|
||||||
|
|
||||||
|
def compute_query_embeddings(
|
||||||
|
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||||
|
) -> List[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Compute the embeddings for a given user query
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query : Union[str, PIL.Image.Image]
|
||||||
|
The query to embed. A query can be either text or an image.
|
||||||
|
"""
|
||||||
|
if isinstance(query, str):
|
||||||
|
return [self.generate_text_embeddings(query, input_type="query")]
|
||||||
|
else:
|
||||||
|
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||||
|
if isinstance(query, PIL.Image.Image):
|
||||||
|
return [self.generate_image_embedding(query, input_type="query")]
|
||||||
|
else:
|
||||||
|
raise TypeError("Only text PIL images supported as query")
|
||||||
|
|
||||||
|
def compute_source_embeddings(
|
||||||
|
self, images: IMAGES, *args, **kwargs
|
||||||
|
) -> List[np.array]:
|
||||||
|
images = self.sanitize_input(images)
|
||||||
|
return [
|
||||||
|
self.generate_image_embedding(img, input_type="document") for img in images
|
||||||
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _init_client():
|
def _get_client():
|
||||||
if VoyageAIEmbeddingFunction.client is None:
|
if VoyageAIEmbeddingFunction.client is None:
|
||||||
voyageai = attempt_import_or_raise("voyageai")
|
voyageai = attempt_import_or_raise("voyageai")
|
||||||
if os.environ.get("VOYAGE_API_KEY") is None:
|
if os.environ.get("VOYAGE_API_KEY") is None:
|
||||||
@@ -125,3 +171,4 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
|||||||
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
||||||
os.environ["VOYAGE_API_KEY"]
|
os.environ["VOYAGE_API_KEY"]
|
||||||
)
|
)
|
||||||
|
return VoyageAIEmbeddingFunction.client
|
||||||
|
|||||||
@@ -110,7 +110,16 @@ class FTS:
|
|||||||
remove_stop_words: bool = False,
|
remove_stop_words: bool = False,
|
||||||
ascii_folding: bool = False,
|
ascii_folding: bool = False,
|
||||||
):
|
):
|
||||||
self._inner = LanceDbIndex.fts(with_position=with_position)
|
self._inner = LanceDbIndex.fts(
|
||||||
|
with_position=with_position,
|
||||||
|
base_tokenizer=base_tokenizer,
|
||||||
|
language=language,
|
||||||
|
max_token_length=max_token_length,
|
||||||
|
lower_case=lower_case,
|
||||||
|
stem=stem,
|
||||||
|
remove_stop_words=remove_stop_words,
|
||||||
|
ascii_folding=ascii_folding,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class HnswPq:
|
class HnswPq:
|
||||||
|
|||||||
0
python/python/lancedb/integrations/__init__.py
Normal file
0
python/python/lancedb/integrations/__init__.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, List, Optional, Tuple, Union, Literal
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from ..table import Table
|
||||||
|
|
||||||
|
Filter = Union[str, pa.compute.Expression]
|
||||||
|
Keys = Union[str, List[str]]
|
||||||
|
JoinType = Literal[
|
||||||
|
"left semi",
|
||||||
|
"right semi",
|
||||||
|
"left anti",
|
||||||
|
"right anti",
|
||||||
|
"inner",
|
||||||
|
"left outer",
|
||||||
|
"right outer",
|
||||||
|
"full outer",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class PyarrowScannerAdapter(pa.dataset.Scanner):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
table: Table,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
self.table = table
|
||||||
|
self.columns = columns
|
||||||
|
self.filter = filter
|
||||||
|
self.batch_size = batch_size
|
||||||
|
if batch_readahead is not None:
|
||||||
|
logging.debug("ignoring batch_readahead which has no lance equivalent")
|
||||||
|
if fragment_readahead is not None:
|
||||||
|
logging.debug("ignoring fragment_readahead which has no lance equivalent")
|
||||||
|
if fragment_scan_options is not None:
|
||||||
|
raise NotImplementedError("fragment_scan_options not supported")
|
||||||
|
if use_threads is False:
|
||||||
|
raise NotImplementedError("use_threads=False not supported")
|
||||||
|
if memory_pool is not None:
|
||||||
|
raise NotImplementedError("memory_pool not supported")
|
||||||
|
|
||||||
|
def count_rows(self):
|
||||||
|
return self.table.count_rows(self.filter)
|
||||||
|
|
||||||
|
def from_batches(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def from_dataset(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def from_fragment(self, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def head(self, num_rows: int):
|
||||||
|
return self.to_reader(limit=num_rows).read_all()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def projected_schema(self):
|
||||||
|
return self.head(1).schema
|
||||||
|
|
||||||
|
def scan_batches(self):
|
||||||
|
return self.to_reader()
|
||||||
|
|
||||||
|
def take(self, indices: List[int]):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_batches(self):
|
||||||
|
return self.to_reader()
|
||||||
|
|
||||||
|
def to_table(self):
|
||||||
|
return self.to_reader().read_all()
|
||||||
|
|
||||||
|
def to_reader(self, *, limit: Optional[int] = None):
|
||||||
|
query = self.table.search()
|
||||||
|
# Disable the builtin limit
|
||||||
|
if limit is None:
|
||||||
|
num_rows = self.count_rows()
|
||||||
|
query.limit(num_rows)
|
||||||
|
elif limit <= 0:
|
||||||
|
raise ValueError("limit must be positive")
|
||||||
|
else:
|
||||||
|
query.limit(limit)
|
||||||
|
if self.columns is not None:
|
||||||
|
query = query.select(self.columns)
|
||||||
|
if self.filter is not None:
|
||||||
|
query = query.where(self.filter, prefilter=True)
|
||||||
|
return query.to_batches(batch_size=self.batch_size)
|
||||||
|
|
||||||
|
|
||||||
|
class PyarrowDatasetAdapter(pa.dataset.Dataset):
|
||||||
|
def __init__(self, table: Table):
|
||||||
|
self.table = table
|
||||||
|
|
||||||
|
def count_rows(self, filter: Optional[Filter] = None):
|
||||||
|
return self.table.count_rows(filter)
|
||||||
|
|
||||||
|
def get_fragments(self, filter: Optional[Filter] = None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def head(
|
||||||
|
self,
|
||||||
|
num_rows: int,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).head(num_rows)
|
||||||
|
|
||||||
|
def join(
|
||||||
|
self,
|
||||||
|
right_dataset: Any,
|
||||||
|
keys: Keys,
|
||||||
|
right_keys: Optional[Keys] = None,
|
||||||
|
join_type: Optional[JoinType] = None,
|
||||||
|
left_suffix: Optional[str] = None,
|
||||||
|
right_suffix: Optional[str] = None,
|
||||||
|
coalesce_keys: bool = True,
|
||||||
|
use_threads: bool = True,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def join_asof(
|
||||||
|
self,
|
||||||
|
right_dataset: Any,
|
||||||
|
on: str,
|
||||||
|
by: Keys,
|
||||||
|
tolerance: int,
|
||||||
|
right_on: Optional[str] = None,
|
||||||
|
right_by: Optional[Keys] = None,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def partition_expression(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def replace_schema(self, schema: pa.Schema):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def scanner(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return PyarrowScannerAdapter(
|
||||||
|
self.table,
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def schema(self):
|
||||||
|
return self.table.schema
|
||||||
|
|
||||||
|
def sort_by(self, sorting: Union[str, List[Tuple[str, bool]]]):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def take(
|
||||||
|
self,
|
||||||
|
indices: List[int],
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def to_batches(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).to_batches()
|
||||||
|
|
||||||
|
def to_table(
|
||||||
|
self,
|
||||||
|
columns: Optional[List[str]] = None,
|
||||||
|
filter: Optional[Filter] = None,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
batch_readahead: Optional[int] = None,
|
||||||
|
fragment_readahead: Optional[int] = None,
|
||||||
|
fragment_scan_options: Optional[Any] = None,
|
||||||
|
use_threads: bool = True,
|
||||||
|
memory_pool: Optional[Any] = None,
|
||||||
|
):
|
||||||
|
return self.scanner(
|
||||||
|
columns,
|
||||||
|
filter,
|
||||||
|
batch_size,
|
||||||
|
batch_readahead,
|
||||||
|
fragment_readahead,
|
||||||
|
fragment_scan_options,
|
||||||
|
use_threads,
|
||||||
|
memory_pool,
|
||||||
|
).to_table()
|
||||||
@@ -325,6 +325,14 @@ class LanceQueryBuilder(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.Table:
|
||||||
|
"""
|
||||||
|
Execute the query and return the results as a pyarrow
|
||||||
|
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def to_list(self) -> List[dict]:
|
def to_list(self) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a list of dictionaries.
|
Execute the query and return the results as a list of dictionaries.
|
||||||
@@ -869,6 +877,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
check_reranker_result(results)
|
check_reranker_result(results)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None):
|
||||||
|
raise NotImplementedError("to_batches on an FTS query")
|
||||||
|
|
||||||
def tantivy_to_arrow(self) -> pa.Table:
|
def tantivy_to_arrow(self) -> pa.Table:
|
||||||
try:
|
try:
|
||||||
import tantivy
|
import tantivy
|
||||||
@@ -971,6 +982,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self) -> pa.Table:
|
||||||
|
return self.to_batches().read_all()
|
||||||
|
|
||||||
|
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
||||||
query = Query(
|
query = Query(
|
||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
filter=self._where,
|
filter=self._where,
|
||||||
@@ -980,7 +994,7 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
|||||||
# not actually respected in remote query
|
# not actually respected in remote query
|
||||||
offset=self._offset or 0,
|
offset=self._offset or 0,
|
||||||
)
|
)
|
||||||
return self._table._execute_query(query).read_all()
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
||||||
"""Rerank the results using the specified reranker.
|
"""Rerank the results using the specified reranker.
|
||||||
@@ -1135,6 +1149,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
results = results.drop(["_rowid"])
|
results = results.drop(["_rowid"])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def to_batches(self):
|
||||||
|
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
||||||
|
|
||||||
def _rank(self, results: pa.Table, column: str, ascending: bool = True):
|
def _rank(self, results: pa.Table, column: str, ascending: bool = True):
|
||||||
if len(results) == 0:
|
if len(results) == 0:
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -20,19 +20,16 @@ import warnings
|
|||||||
|
|
||||||
from lancedb import connect_async
|
from lancedb import connect_async
|
||||||
from lancedb.remote import ClientConfig
|
from lancedb.remote import ClientConfig
|
||||||
from lancedb.remote.background_loop import BackgroundEventLoop
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from overrides import override
|
from overrides import override
|
||||||
|
|
||||||
from ..common import DATA
|
from ..common import DATA
|
||||||
from ..db import DBConnection
|
from ..db import DBConnection, LOOP
|
||||||
from ..embeddings import EmbeddingFunctionConfig
|
from ..embeddings import EmbeddingFunctionConfig
|
||||||
from ..pydantic import LanceModel
|
from ..pydantic import LanceModel
|
||||||
from ..table import Table
|
from ..table import Table
|
||||||
from ..util import validate_table_name
|
from ..util import validate_table_name
|
||||||
|
|
||||||
LOOP = BackgroundEventLoop()
|
|
||||||
|
|
||||||
|
|
||||||
class RemoteDBConnection(DBConnection):
|
class RemoteDBConnection(DBConnection):
|
||||||
"""A connection to a remote LanceDB database."""
|
"""A connection to a remote LanceDB database."""
|
||||||
|
|||||||
@@ -138,9 +138,28 @@ class RemoteTable(Table):
|
|||||||
*,
|
*,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
with_position: bool = True,
|
with_position: bool = True,
|
||||||
|
# tokenizer configs:
|
||||||
|
base_tokenizer: str = "simple",
|
||||||
|
language: str = "English",
|
||||||
|
max_token_length: Optional[int] = 40,
|
||||||
|
lower_case: bool = True,
|
||||||
|
stem: bool = False,
|
||||||
|
remove_stop_words: bool = False,
|
||||||
|
ascii_folding: bool = False,
|
||||||
):
|
):
|
||||||
config = FTS(with_position=with_position)
|
config = FTS(
|
||||||
LOOP.run(self._table.create_index(column, config=config, replace=replace))
|
with_position=with_position,
|
||||||
|
base_tokenizer=base_tokenizer,
|
||||||
|
language=language,
|
||||||
|
max_token_length=max_token_length,
|
||||||
|
lower_case=lower_case,
|
||||||
|
stem=stem,
|
||||||
|
remove_stop_words=remove_stop_words,
|
||||||
|
ascii_folding=ascii_folding,
|
||||||
|
)
|
||||||
|
self._loop.run_until_complete(
|
||||||
|
self._table.create_index(column, config=config, replace=replace)
|
||||||
|
)
|
||||||
|
|
||||||
def create_index(
|
def create_index(
|
||||||
self,
|
self,
|
||||||
@@ -490,19 +509,13 @@ class RemoteTable(Table):
|
|||||||
return LOOP.run(self._table.count_rows(filter))
|
return LOOP.run(self._table.count_rows(filter))
|
||||||
|
|
||||||
def add_columns(self, transforms: Dict[str, str]):
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.add_columns(transforms))
|
||||||
"add_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.alter_columns(*alterations))
|
||||||
"alter_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
def drop_columns(self, columns: Iterable[str]):
|
def drop_columns(self, columns: Iterable[str]):
|
||||||
raise NotImplementedError(
|
return LOOP.run(self._table.drop_columns(columns))
|
||||||
"drop_columns() is not yet supported on the LanceDB cloud"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
||||||
|
|||||||
@@ -967,8 +967,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
transforms: Dict[str, str]
|
transforms: Dict[str, str]
|
||||||
@@ -978,20 +976,21 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
"""
|
"""
|
||||||
Alter column names and nullability.
|
Alter column names and nullability.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
alterations : Iterable[Dict[str, Any]]
|
alterations : Iterable[Dict[str, Any]]
|
||||||
A sequence of dictionaries, each with the following keys:
|
A sequence of dictionaries, each with the following keys:
|
||||||
- "path": str
|
- "path": str
|
||||||
The column path to alter. For a top-level column, this is the name.
|
The column path to alter. For a top-level column, this is the name.
|
||||||
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
||||||
- "name": str, optional
|
- "rename": str, optional
|
||||||
The new name of the column. If not specified, the column name is
|
The new name of the column. If not specified, the column name is
|
||||||
not changed.
|
not changed.
|
||||||
|
- "data_type": pyarrow.DataType, optional
|
||||||
|
The new data type of the column. Existing values will be casted
|
||||||
|
to this type. If not specified, the column data type is not changed.
|
||||||
- "nullable": bool, optional
|
- "nullable": bool, optional
|
||||||
Whether the column should be nullable. If not specified, the column
|
Whether the column should be nullable. If not specified, the column
|
||||||
nullability is not changed. Only non-nullable columns can be changed
|
nullability is not changed. Only non-nullable columns can be changed
|
||||||
@@ -1004,8 +1003,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
Drop columns from the table.
|
Drop columns from the table.
|
||||||
|
|
||||||
This is not yet available in LanceDB Cloud.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
columns : Iterable[str]
|
columns : Iterable[str]
|
||||||
@@ -1080,13 +1077,16 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
|
|||||||
index_cache_size: Optional[int] = None
|
index_cache_size: Optional[int] = None
|
||||||
read_consistency_interval: Optional[timedelta] = None
|
read_consistency_interval: Optional[timedelta] = None
|
||||||
last_consistency_check: Optional[float] = None
|
last_consistency_check: Optional[float] = None
|
||||||
|
storage_options: Optional[Dict[str, str]] = None
|
||||||
_dataset: Optional[LanceDataset] = None
|
_dataset: Optional[LanceDataset] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset(self) -> LanceDataset:
|
def dataset(self) -> LanceDataset:
|
||||||
if not self._dataset:
|
if not self._dataset:
|
||||||
self._dataset = lance.dataset(
|
self._dataset = lance.dataset(
|
||||||
self.uri, index_cache_size=self.index_cache_size
|
self.uri,
|
||||||
|
index_cache_size=self.index_cache_size,
|
||||||
|
storage_options=self.storage_options,
|
||||||
)
|
)
|
||||||
self.last_consistency_check = time.monotonic()
|
self.last_consistency_check = time.monotonic()
|
||||||
elif self.read_consistency_interval is not None:
|
elif self.read_consistency_interval is not None:
|
||||||
@@ -1117,13 +1117,17 @@ class _LanceTimeTravelRef(_LanceDatasetRef):
|
|||||||
uri: str
|
uri: str
|
||||||
version: int
|
version: int
|
||||||
index_cache_size: Optional[int] = None
|
index_cache_size: Optional[int] = None
|
||||||
|
storage_options: Optional[Dict[str, str]] = None
|
||||||
_dataset: Optional[LanceDataset] = None
|
_dataset: Optional[LanceDataset] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset(self) -> LanceDataset:
|
def dataset(self) -> LanceDataset:
|
||||||
if not self._dataset:
|
if not self._dataset:
|
||||||
self._dataset = lance.dataset(
|
self._dataset = lance.dataset(
|
||||||
self.uri, version=self.version, index_cache_size=self.index_cache_size
|
self.uri,
|
||||||
|
version=self.version,
|
||||||
|
index_cache_size=self.index_cache_size,
|
||||||
|
storage_options=self.storage_options,
|
||||||
)
|
)
|
||||||
return self._dataset
|
return self._dataset
|
||||||
|
|
||||||
@@ -1172,24 +1176,27 @@ class LanceTable(Table):
|
|||||||
uri=self._dataset_uri,
|
uri=self._dataset_uri,
|
||||||
version=version,
|
version=version,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
storage_options=connection.storage_options,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._ref = _LanceLatestDatasetRef(
|
self._ref = _LanceLatestDatasetRef(
|
||||||
uri=self._dataset_uri,
|
uri=self._dataset_uri,
|
||||||
read_consistency_interval=connection.read_consistency_interval,
|
read_consistency_interval=connection.read_consistency_interval,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
storage_options=connection.storage_options,
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def open(cls, db, name, **kwargs):
|
def open(cls, db, name, **kwargs):
|
||||||
tbl = cls(db, name, **kwargs)
|
tbl = cls(db, name, **kwargs)
|
||||||
fs, path = fs_from_uri(tbl._dataset_path)
|
|
||||||
file_info = fs.get_file_info(path)
|
# check the dataset exists
|
||||||
if file_info.type != pa.fs.FileType.Directory:
|
try:
|
||||||
raise FileNotFoundError(
|
tbl.version
|
||||||
f"Table {name} does not exist."
|
except ValueError as e:
|
||||||
f"Please first call db.create_table({name}, data)"
|
if "Not found:" in str(e):
|
||||||
)
|
raise FileNotFoundError(f"Table {name} does not exist")
|
||||||
|
raise e
|
||||||
|
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
@@ -1617,11 +1624,7 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
)
|
)
|
||||||
# Access the dataset_mut property to ensure that the dataset is mutable.
|
self._ref.dataset_mut.insert(data, mode=mode, schema=self.schema)
|
||||||
self._ref.dataset_mut
|
|
||||||
self._ref.dataset = lance.write_dataset(
|
|
||||||
data, self._dataset_uri, schema=self.schema, mode=mode
|
|
||||||
)
|
|
||||||
|
|
||||||
def merge(
|
def merge(
|
||||||
self,
|
self,
|
||||||
@@ -1905,7 +1908,13 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
empty = pa.Table.from_batches([], schema=schema)
|
empty = pa.Table.from_batches([], schema=schema)
|
||||||
try:
|
try:
|
||||||
lance.write_dataset(empty, tbl._dataset_uri, schema=schema, mode=mode)
|
lance.write_dataset(
|
||||||
|
empty,
|
||||||
|
tbl._dataset_uri,
|
||||||
|
schema=schema,
|
||||||
|
mode=mode,
|
||||||
|
storage_options=db.storage_options,
|
||||||
|
)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
if "Dataset already exists" in str(err) and exist_ok:
|
if "Dataset already exists" in str(err) and exist_ok:
|
||||||
if tbl.schema != schema:
|
if tbl.schema != schema:
|
||||||
@@ -2923,6 +2932,53 @@ class AsyncTable:
|
|||||||
|
|
||||||
return await self._inner.update(updates_sql, where)
|
return await self._inner.update(updates_sql, where)
|
||||||
|
|
||||||
|
async def add_columns(self, transforms: Dict[str, str]):
|
||||||
|
"""
|
||||||
|
Add new columns with defined values.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
transforms: Dict[str, str]
|
||||||
|
A map of column name to a SQL expression to use to calculate the
|
||||||
|
value of the new column. These expressions will be evaluated for
|
||||||
|
each row in the table, and can reference existing columns.
|
||||||
|
"""
|
||||||
|
await self._inner.add_columns(list(transforms.items()))
|
||||||
|
|
||||||
|
async def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
|
"""
|
||||||
|
Alter column names and nullability.
|
||||||
|
|
||||||
|
alterations : Iterable[Dict[str, Any]]
|
||||||
|
A sequence of dictionaries, each with the following keys:
|
||||||
|
- "path": str
|
||||||
|
The column path to alter. For a top-level column, this is the name.
|
||||||
|
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
||||||
|
- "rename": str, optional
|
||||||
|
The new name of the column. If not specified, the column name is
|
||||||
|
not changed.
|
||||||
|
- "data_type": pyarrow.DataType, optional
|
||||||
|
The new data type of the column. Existing values will be casted
|
||||||
|
to this type. If not specified, the column data type is not changed.
|
||||||
|
- "nullable": bool, optional
|
||||||
|
Whether the column should be nullable. If not specified, the column
|
||||||
|
nullability is not changed. Only non-nullable columns can be changed
|
||||||
|
to nullable. Currently, you cannot change a nullable column to
|
||||||
|
non-nullable.
|
||||||
|
"""
|
||||||
|
await self._inner.alter_columns(alterations)
|
||||||
|
|
||||||
|
async def drop_columns(self, columns: Iterable[str]):
|
||||||
|
"""
|
||||||
|
Drop columns from the table.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns : Iterable[str]
|
||||||
|
The names of the columns to drop.
|
||||||
|
"""
|
||||||
|
await self._inner.drop_columns(columns)
|
||||||
|
|
||||||
async def version(self) -> int:
|
async def version(self) -> int:
|
||||||
"""
|
"""
|
||||||
Retrieve the version of the table
|
Retrieve the version of the table
|
||||||
|
|||||||
21
python/python/tests/test_duckdb.py
Normal file
21
python/python/tests/test_duckdb.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import duckdb
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic_query(tmp_path):
|
||||||
|
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||||
|
conn = lancedb.connect(tmp_path)
|
||||||
|
tbl = conn.create_table("test", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl) # noqa: F841
|
||||||
|
|
||||||
|
duck_conn = duckdb.connect()
|
||||||
|
|
||||||
|
results = duck_conn.sql("SELECT SUM(x) FROM adapter").fetchall()
|
||||||
|
assert results[0][0] == 10
|
||||||
|
|
||||||
|
results = duck_conn.sql("SELECT SUM(y) FROM adapter").fetchall()
|
||||||
|
assert results[0][0] == 26
|
||||||
47
python/python/tests/test_pyarrow.py
Normal file
47
python/python/tests/test_pyarrow.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||||
|
|
||||||
|
|
||||||
|
def test_dataset_adapter(tmp_path):
|
||||||
|
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||||
|
conn = lancedb.connect(tmp_path)
|
||||||
|
tbl = conn.create_table("test", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 4
|
||||||
|
assert adapter.count_rows("x > 2") == 2
|
||||||
|
assert adapter.schema == data.schema
|
||||||
|
assert adapter.head(2) == data.slice(0, 2)
|
||||||
|
assert adapter.to_table() == data
|
||||||
|
assert adapter.to_batches().read_all() == data
|
||||||
|
assert adapter.scanner().to_table() == data
|
||||||
|
assert adapter.scanner().to_batches().read_all() == data
|
||||||
|
|
||||||
|
assert adapter.scanner().projected_schema == data.schema
|
||||||
|
assert adapter.scanner(columns=["x"]).projected_schema == pa.schema(
|
||||||
|
[data.schema.field("x")]
|
||||||
|
)
|
||||||
|
assert adapter.scanner(columns=["x"]).to_table() == pa.table({"x": [1, 2, 3, 4]})
|
||||||
|
|
||||||
|
# Make sure we bypass the limit
|
||||||
|
data = pa.table({"x": range(100)})
|
||||||
|
tbl = conn.create_table("test2", data)
|
||||||
|
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 100
|
||||||
|
assert adapter.to_table().num_rows == 100
|
||||||
|
assert adapter.head(10).num_rows == 10
|
||||||
|
|
||||||
|
# Empty table
|
||||||
|
tbl = conn.create_table("test3", None, schema=pa.schema({"x": pa.int64()}))
|
||||||
|
adapter = PyarrowDatasetAdapter(tbl)
|
||||||
|
|
||||||
|
assert adapter.count_rows() == 0
|
||||||
|
assert adapter.to_table().num_rows == 0
|
||||||
|
assert adapter.head(10).num_rows == 0
|
||||||
|
|
||||||
|
assert adapter.scanner().projected_schema == pa.schema({"x": pa.int64()})
|
||||||
@@ -193,7 +193,7 @@ def test_table_add_in_threadpool():
|
|||||||
if request.path == "/v1/table/test/insert/":
|
if request.path == "/v1/table/test/insert/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
elif request.path == "/v1/table/test/create/":
|
elif request.path == "/v1/table/test/create/?mode=create":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.send_header("Content-Type", "application/json")
|
request.send_header("Content-Type", "application/json")
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ class MockDB:
|
|||||||
def __init__(self, uri: Path):
|
def __init__(self, uri: Path):
|
||||||
self.uri = str(uri)
|
self.uri = str(uri)
|
||||||
self.read_consistency_interval = None
|
self.read_consistency_interval = None
|
||||||
|
self.storage_options = None
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def is_managed_remote(self) -> bool:
|
def is_managed_remote(self) -> bool:
|
||||||
@@ -1292,6 +1293,19 @@ def test_add_columns(tmp_path):
|
|||||||
assert table.to_arrow().column_names == ["id", "new_col"]
|
assert table.to_arrow().column_names == ["id", "new_col"]
|
||||||
assert table.to_arrow()["new_col"].to_pylist() == [2, 3]
|
assert table.to_arrow()["new_col"].to_pylist() == [2, 3]
|
||||||
|
|
||||||
|
table.add_columns({"null_int": "cast(null as bigint)"})
|
||||||
|
assert table.schema.field("null_int").type == pa.int64()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_add_columns_async(db_async: AsyncConnection):
|
||||||
|
data = pa.table({"id": [0, 1]})
|
||||||
|
table = await db_async.create_table("my_table", data=data)
|
||||||
|
await table.add_columns({"new_col": "id + 2"})
|
||||||
|
data = await table.to_arrow()
|
||||||
|
assert data.column_names == ["id", "new_col"]
|
||||||
|
assert data["new_col"].to_pylist() == [2, 3]
|
||||||
|
|
||||||
|
|
||||||
def test_alter_columns(tmp_path):
|
def test_alter_columns(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
@@ -1301,6 +1315,18 @@ def test_alter_columns(tmp_path):
|
|||||||
assert table.to_arrow().column_names == ["new_id"]
|
assert table.to_arrow().column_names == ["new_id"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_alter_columns_async(db_async: AsyncConnection):
|
||||||
|
data = pa.table({"id": [0, 1]})
|
||||||
|
table = await db_async.create_table("my_table", data=data)
|
||||||
|
await table.alter_columns({"path": "id", "rename": "new_id"})
|
||||||
|
assert (await table.to_arrow()).column_names == ["new_id"]
|
||||||
|
await table.alter_columns(dict(path="new_id", data_type=pa.int16(), nullable=True))
|
||||||
|
data = await table.to_arrow()
|
||||||
|
assert data.column(0).type == pa.int16()
|
||||||
|
assert data.schema.field(0).nullable
|
||||||
|
|
||||||
|
|
||||||
def test_drop_columns(tmp_path):
|
def test_drop_columns(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
||||||
@@ -1309,6 +1335,14 @@ def test_drop_columns(tmp_path):
|
|||||||
assert table.to_arrow().column_names == ["id"]
|
assert table.to_arrow().column_names == ["id"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_drop_columns_async(db_async: AsyncConnection):
|
||||||
|
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
||||||
|
table = await db_async.create_table("my_table", data=data)
|
||||||
|
await table.drop_columns(["category"])
|
||||||
|
assert (await table.to_arrow()).column_names == ["id"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_time_travel(db_async: AsyncConnection):
|
async def test_time_travel(db_async: AsyncConnection):
|
||||||
# Setup
|
# Setup
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use arrow::{
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lancedb::arrow::SendableRecordBatchStream;
|
use lancedb::arrow::SendableRecordBatchStream;
|
||||||
use pyo3::{pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult, Python};
|
use pyo3::{pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult, Python};
|
||||||
use pyo3_asyncio_0_21::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use pyo3::{
|
|||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pyfunction, pymethods, Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
pyclass, pyfunction, pymethods, Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_asyncio_0_21::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{error::PythonErrorExt, table::Table};
|
use crate::{error::PythonErrorExt, table::Table};
|
||||||
|
|
||||||
@@ -58,6 +58,7 @@ impl Connection {
|
|||||||
self.inner.take();
|
self.inner.take();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (start_after=None, limit=None))]
|
||||||
pub fn table_names(
|
pub fn table_names(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
@@ -74,6 +75,7 @@ impl Connection {
|
|||||||
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
|
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (name, mode, data, storage_options=None, data_storage_version=None, enable_v2_manifest_paths=None))]
|
||||||
pub fn create_table<'a>(
|
pub fn create_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
@@ -111,6 +113,7 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (name, mode, schema, storage_options=None, data_storage_version=None, enable_v2_manifest_paths=None))]
|
||||||
pub fn create_empty_table<'a>(
|
pub fn create_empty_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
@@ -198,6 +201,7 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn connect(
|
pub fn connect(
|
||||||
py: Python,
|
py: Python,
|
||||||
|
|||||||
@@ -138,7 +138,9 @@ fn http_from_rust_error(
|
|||||||
status_code: Option<u16>,
|
status_code: Option<u16>,
|
||||||
) -> PyResult<PyErr> {
|
) -> PyResult<PyErr> {
|
||||||
let message = err.to_string();
|
let message = err.to_string();
|
||||||
let http_err_cls = py.import("lancedb.remote.errors")?.getattr("HttpError")?;
|
let http_err_cls = py
|
||||||
|
.import_bound("lancedb.remote.errors")?
|
||||||
|
.getattr("HttpError")?;
|
||||||
let py_err = http_err_cls.call1((message, request_id, status_code))?;
|
let py_err = http_err_cls.call1((message, request_id, status_code))?;
|
||||||
|
|
||||||
// Reset the traceback since it doesn't provide additional information.
|
// Reset the traceback since it doesn't provide additional information.
|
||||||
@@ -149,5 +151,5 @@ fn http_from_rust_error(
|
|||||||
py_err.setattr(intern!(py, "__cause__"), cause_err)?;
|
py_err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(PyErr::from_value(py_err))
|
Ok(PyErr::from_value_bound(py_err))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ impl Index {
|
|||||||
|
|
||||||
#[pymethods]
|
#[pymethods]
|
||||||
impl Index {
|
impl Index {
|
||||||
|
#[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None))]
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
pub fn ivf_pq(
|
pub fn ivf_pq(
|
||||||
distance_type: Option<String>,
|
distance_type: Option<String>,
|
||||||
@@ -106,6 +107,7 @@ impl Index {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (with_position=None, base_tokenizer=None, language=None, max_token_length=None, lower_case=None, stem=None, remove_stop_words=None, ascii_folding=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
pub fn fts(
|
pub fn fts(
|
||||||
@@ -146,6 +148,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))]
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
pub fn hnsw_pq(
|
pub fn hnsw_pq(
|
||||||
distance_type: Option<String>,
|
distance_type: Option<String>,
|
||||||
@@ -184,6 +187,7 @@ impl Index {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (distance_type=None, num_partitions=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))]
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
pub fn hnsw_sq(
|
pub fn hnsw_sq(
|
||||||
distance_type: Option<String>,
|
distance_type: Option<String>,
|
||||||
|
|||||||
@@ -16,7 +16,11 @@ use arrow::RecordBatchStream;
|
|||||||
use connection::{connect, Connection};
|
use connection::{connect, Connection};
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
use index::{Index, IndexConfig};
|
use index::{Index, IndexConfig};
|
||||||
use pyo3::{pymodule, types::PyModule, wrap_pyfunction, PyResult, Python};
|
use pyo3::{
|
||||||
|
pymodule,
|
||||||
|
types::{PyModule, PyModuleMethods},
|
||||||
|
wrap_pyfunction, Bound, PyResult, Python,
|
||||||
|
};
|
||||||
use query::{Query, VectorQuery};
|
use query::{Query, VectorQuery};
|
||||||
use table::Table;
|
use table::Table;
|
||||||
|
|
||||||
@@ -29,7 +33,7 @@ pub mod table;
|
|||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
#[pymodule]
|
#[pymodule]
|
||||||
pub fn _lancedb(_py: Python, m: &PyModule) -> PyResult<()> {
|
pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
let env = Env::new()
|
let env = Env::new()
|
||||||
.filter_or("LANCEDB_LOG", "warn")
|
.filter_or("LANCEDB_LOG", "warn")
|
||||||
.write_style("LANCEDB_LOG_STYLE");
|
.write_style("LANCEDB_LOG_STYLE");
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ use pyo3::PyAny;
|
|||||||
use pyo3::PyRef;
|
use pyo3::PyRef;
|
||||||
use pyo3::PyResult;
|
use pyo3::PyResult;
|
||||||
use pyo3::{pyclass, PyErr};
|
use pyo3::{pyclass, PyErr};
|
||||||
use pyo3_asyncio_0_21::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::arrow::RecordBatchStream;
|
use crate::arrow::RecordBatchStream;
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
@@ -105,6 +105,7 @@ impl Query {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (max_batch_length=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
@@ -203,6 +204,7 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (max_batch_length=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
|||||||
@@ -1,17 +1,21 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
use arrow::{
|
use arrow::{
|
||||||
|
datatypes::DataType,
|
||||||
ffi_stream::ArrowArrayStreamReader,
|
ffi_stream::ArrowArrayStreamReader,
|
||||||
pyarrow::{FromPyArrow, ToPyArrow},
|
pyarrow::{FromPyArrow, ToPyArrow},
|
||||||
};
|
};
|
||||||
use lancedb::table::{
|
use lancedb::table::{
|
||||||
AddDataMode, Duration, OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
|
||||||
|
Table as LanceDbTable,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{IntoPyDict, PyDict, PyDictMethods, PyString},
|
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python, ToPyObject,
|
Bound, FromPyObject, PyAny, PyRef, PyResult, Python, ToPyObject,
|
||||||
};
|
};
|
||||||
use pyo3_asyncio_0_21::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::PythonErrorExt,
|
error::PythonErrorExt,
|
||||||
@@ -137,9 +141,10 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (updates, r#where=None))]
|
||||||
pub fn update<'a>(
|
pub fn update<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
updates: &PyDict,
|
updates: &Bound<'_, PyDict>,
|
||||||
r#where: Option<String>,
|
r#where: Option<String>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let mut op = self_.inner_ref()?.update();
|
let mut op = self_.inner_ref()?.update();
|
||||||
@@ -147,10 +152,8 @@ impl Table {
|
|||||||
op = op.only_if(only_if);
|
op = op.only_if(only_if);
|
||||||
}
|
}
|
||||||
for (column_name, value) in updates.into_iter() {
|
for (column_name, value) in updates.into_iter() {
|
||||||
let column_name: &PyString = column_name.downcast()?;
|
let column_name: String = column_name.extract()?;
|
||||||
let column_name = column_name.to_str()?.to_string();
|
let value: String = value.extract()?;
|
||||||
let value: &PyString = value.downcast()?;
|
|
||||||
let value = value.to_str()?.to_string();
|
|
||||||
op = op.column(column_name, value);
|
op = op.column(column_name, value);
|
||||||
}
|
}
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -159,6 +162,7 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (filter=None))]
|
||||||
pub fn count_rows(
|
pub fn count_rows(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
@@ -169,6 +173,7 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (column, index=None, replace=None))]
|
||||||
pub fn create_index<'a>(
|
pub fn create_index<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
column: String,
|
column: String,
|
||||||
@@ -263,7 +268,8 @@ impl Table {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let tup: Vec<(&String, &String)> = v.metadata.iter().collect();
|
let tup: Vec<(&String, &String)> = v.metadata.iter().collect();
|
||||||
dict.set_item("metadata", tup.into_py_dict(py)).unwrap();
|
dict.set_item("metadata", tup.into_py_dict_bound(py))
|
||||||
|
.unwrap();
|
||||||
dict.to_object(py)
|
dict.to_object(py)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
@@ -299,6 +305,7 @@ impl Table {
|
|||||||
Query::new(self.inner_ref().unwrap().query())
|
Query::new(self.inner_ref().unwrap().query())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
|
||||||
pub fn optimize(
|
pub fn optimize(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
cleanup_since_ms: Option<u64>,
|
cleanup_since_ms: Option<u64>,
|
||||||
@@ -406,6 +413,72 @@ impl Table {
|
|||||||
.infer_error()
|
.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_columns(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
definitions: Vec<(String, String)>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let definitions = NewColumnTransform::SqlExpressions(definitions);
|
||||||
|
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.add_columns(definitions, None).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn alter_columns<'a>(
|
||||||
|
self_: PyRef<'a, Self>,
|
||||||
|
alterations: Vec<Bound<PyDict>>,
|
||||||
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
|
let alterations = alterations
|
||||||
|
.iter()
|
||||||
|
.map(|alteration| {
|
||||||
|
let path = alteration
|
||||||
|
.get_item("path")?
|
||||||
|
.ok_or_else(|| PyValueError::new_err("Missing path"))?
|
||||||
|
.extract()?;
|
||||||
|
let rename = {
|
||||||
|
// We prefer rename, but support name for backwards compatibility
|
||||||
|
let rename = if let Ok(Some(rename)) = alteration.get_item("rename") {
|
||||||
|
Some(rename)
|
||||||
|
} else {
|
||||||
|
alteration.get_item("name")?
|
||||||
|
};
|
||||||
|
rename.map(|name| name.extract()).transpose()?
|
||||||
|
};
|
||||||
|
let nullable = alteration
|
||||||
|
.get_item("nullable")?
|
||||||
|
.map(|val| val.extract())
|
||||||
|
.transpose()?;
|
||||||
|
let data_type = alteration
|
||||||
|
.get_item("data_type")?
|
||||||
|
.map(|val| DataType::from_pyarrow_bound(&val))
|
||||||
|
.transpose()?;
|
||||||
|
Ok(ColumnAlteration {
|
||||||
|
path,
|
||||||
|
rename,
|
||||||
|
nullable,
|
||||||
|
data_type,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<PyResult<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.alter_columns(&alterations).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn drop_columns(self_: PyRef<Self>, columns: Vec<String>) -> PyResult<Bound<PyAny>> {
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
let column_refs = columns.iter().map(String::as_str).collect::<Vec<&str>>();
|
||||||
|
inner.drop_columns(&column_refs).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.14.0-beta.0"
|
version = "0.14.0-beta.2"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.14.0-beta.0"
|
version = "0.14.0-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -625,7 +625,7 @@ impl ConnectBuilder {
|
|||||||
|
|
||||||
/// Set the LanceDB Cloud client configuration.
|
/// Set the LanceDB Cloud client configuration.
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```no_run
|
||||||
/// # use lancedb::connect;
|
/// # use lancedb::connect;
|
||||||
/// # use lancedb::remote::*;
|
/// # use lancedb::remote::*;
|
||||||
/// connect("db://my_database")
|
/// connect("db://my_database")
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
//!
|
//!
|
||||||
//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`:
|
//! LanceDB runs in process, to use it in your Rust project, put the following in your `Cargo.toml`:
|
||||||
//!
|
//!
|
||||||
//! ```ignore
|
//! ```shell
|
||||||
//! cargo install lancedb
|
//! cargo install lancedb
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
|
|||||||
@@ -348,7 +348,7 @@ pub trait QueryBase {
|
|||||||
///
|
///
|
||||||
/// The filter should be supplied as an SQL query string. For example:
|
/// The filter should be supplied as an SQL query string. For example:
|
||||||
///
|
///
|
||||||
/// ```ignore
|
/// ```sql
|
||||||
/// x > 10
|
/// x > 10
|
||||||
/// y > 0 AND y < 100
|
/// y > 0 AND y < 100
|
||||||
/// x > 5 OR y = 'test'
|
/// x > 5 OR y = 'test'
|
||||||
@@ -364,8 +364,18 @@ pub trait QueryBase {
|
|||||||
///
|
///
|
||||||
/// This method is only valid on tables that have a full text search index.
|
/// This method is only valid on tables that have a full text search index.
|
||||||
///
|
///
|
||||||
/// ```ignore
|
/// ```
|
||||||
/// query.full_text_search(FullTextSearchQuery::new("hello world"))
|
/// use lance_index::scalar::FullTextSearchQuery;
|
||||||
|
/// use lancedb::query::{QueryBase, ExecutableQuery};
|
||||||
|
///
|
||||||
|
/// # use lancedb::Table;
|
||||||
|
/// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
/// let results = table.query()
|
||||||
|
/// .full_text_search(FullTextSearchQuery::new("hello world".into()))
|
||||||
|
/// .execute()
|
||||||
|
/// .await?;
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
fn full_text_search(self, query: FullTextSearchQuery) -> Self;
|
fn full_text_search(self, query: FullTextSearchQuery) -> Self;
|
||||||
|
|
||||||
|
|||||||
@@ -228,6 +228,14 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
let db_name = parsed_url.host_str().unwrap();
|
let db_name = parsed_url.host_str().unwrap();
|
||||||
|
let db_prefix = {
|
||||||
|
let prefix = parsed_url.path().trim_start_matches('/');
|
||||||
|
if prefix.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(prefix)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Get the timeouts
|
// Get the timeouts
|
||||||
let connect_timeout = Self::get_timeout(
|
let connect_timeout = Self::get_timeout(
|
||||||
@@ -258,6 +266,7 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
db_name,
|
db_name,
|
||||||
host_override.is_some(),
|
host_override.is_some(),
|
||||||
options,
|
options,
|
||||||
|
db_prefix,
|
||||||
)?)
|
)?)
|
||||||
.user_agent(client_config.user_agent)
|
.user_agent(client_config.user_agent)
|
||||||
.build()
|
.build()
|
||||||
@@ -292,6 +301,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
db_name: &str,
|
db_name: &str,
|
||||||
has_host_override: bool,
|
has_host_override: bool,
|
||||||
options: &RemoteOptions,
|
options: &RemoteOptions,
|
||||||
|
db_prefix: Option<&str>,
|
||||||
) -> Result<HeaderMap> {
|
) -> Result<HeaderMap> {
|
||||||
let mut headers = HeaderMap::new();
|
let mut headers = HeaderMap::new();
|
||||||
headers.insert(
|
headers.insert(
|
||||||
@@ -317,6 +327,17 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
})?,
|
})?,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if db_prefix.is_some() {
|
||||||
|
headers.insert(
|
||||||
|
"x-lancedb-database-prefix",
|
||||||
|
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"non-ascii database prefix '{}' provided",
|
||||||
|
db_prefix.unwrap()
|
||||||
|
),
|
||||||
|
})?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(v) = options.0.get("account_name") {
|
if let Some(v) = options.0.get("account_name") {
|
||||||
headers.insert(
|
headers.insert(
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use datafusion_physical_plan::{ExecutionPlan, SendableRecordBatchStream};
|
|||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use http::header::CONTENT_TYPE;
|
use http::header::CONTENT_TYPE;
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
use lance::arrow::json::JsonSchema;
|
use lance::arrow::json::{JsonDataType, JsonSchema};
|
||||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
||||||
use lance_datafusion::exec::OneShotExec;
|
use lance_datafusion::exec::OneShotExec;
|
||||||
@@ -643,25 +643,80 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
}
|
}
|
||||||
async fn add_columns(
|
async fn add_columns(
|
||||||
&self,
|
&self,
|
||||||
_transforms: NewColumnTransform,
|
transforms: NewColumnTransform,
|
||||||
_read_columns: Option<Vec<String>>,
|
_read_columns: Option<Vec<String>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
match transforms {
|
||||||
message: "add_columns is not yet supported.".into(),
|
NewColumnTransform::SqlExpressions(expressions) => {
|
||||||
})
|
let body = expressions
|
||||||
|
.into_iter()
|
||||||
|
.map(|(name, expression)| {
|
||||||
|
serde_json::json!({
|
||||||
|
"name": name,
|
||||||
|
"expression": expression,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let body = serde_json::json!({ "new_columns": body });
|
||||||
|
let request = self
|
||||||
|
.client
|
||||||
|
.post(&format!("/v1/table/{}/add_columns/", self.name))
|
||||||
|
.json(&body);
|
||||||
|
let (request_id, response) = self.client.send(request, false).await?;
|
||||||
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(Error::NotSupported {
|
||||||
|
message: "Only SQL expressions are supported for adding columns".into(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
async fn alter_columns(&self, _alterations: &[ColumnAlteration]) -> Result<()> {
|
|
||||||
|
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
let body = alterations
|
||||||
message: "alter_columns is not yet supported.".into(),
|
.iter()
|
||||||
})
|
.map(|alteration| {
|
||||||
|
let mut value = serde_json::json!({
|
||||||
|
"path": alteration.path,
|
||||||
|
});
|
||||||
|
if let Some(rename) = &alteration.rename {
|
||||||
|
value["rename"] = serde_json::Value::String(rename.clone());
|
||||||
|
}
|
||||||
|
if let Some(data_type) = &alteration.data_type {
|
||||||
|
let json_data_type = JsonDataType::try_from(data_type).unwrap();
|
||||||
|
let json_data_type = serde_json::to_value(&json_data_type).unwrap();
|
||||||
|
value["data_type"] = json_data_type;
|
||||||
|
}
|
||||||
|
if let Some(nullable) = &alteration.nullable {
|
||||||
|
value["nullable"] = serde_json::Value::Bool(*nullable);
|
||||||
|
}
|
||||||
|
value
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let body = serde_json::json!({ "alterations": body });
|
||||||
|
let request = self
|
||||||
|
.client
|
||||||
|
.post(&format!("/v1/table/{}/alter_columns/", self.name))
|
||||||
|
.json(&body);
|
||||||
|
let (request_id, response) = self.client.send(request, false).await?;
|
||||||
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn drop_columns(&self, _columns: &[&str]) -> Result<()> {
|
|
||||||
|
async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
let body = serde_json::json!({ "columns": columns });
|
||||||
message: "drop_columns is not yet supported.".into(),
|
let request = self
|
||||||
})
|
.client
|
||||||
|
.post(&format!("/v1/table/{}/drop_columns/", self.name))
|
||||||
|
.json(&body);
|
||||||
|
let (request_id, response) = self.client.send(request, false).await?;
|
||||||
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
||||||
@@ -844,7 +899,17 @@ mod tests {
|
|||||||
Box::pin(table.update().column("a", "a + 1").execute().map_ok(|_| ())),
|
Box::pin(table.update().column("a", "a + 1").execute().map_ok(|_| ())),
|
||||||
Box::pin(table.add(example_data()).execute().map_ok(|_| ())),
|
Box::pin(table.add(example_data()).execute().map_ok(|_| ())),
|
||||||
Box::pin(table.merge_insert(&["test"]).execute(example_data())),
|
Box::pin(table.merge_insert(&["test"]).execute(example_data())),
|
||||||
Box::pin(table.delete("false")), // TODO: other endpoints.
|
Box::pin(table.delete("false")),
|
||||||
|
Box::pin(table.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![("x".into(), "y".into())]),
|
||||||
|
None,
|
||||||
|
)),
|
||||||
|
Box::pin(async {
|
||||||
|
let alterations = vec![ColumnAlteration::new("x".into()).rename("y".into())];
|
||||||
|
table.alter_columns(&alterations).await
|
||||||
|
}),
|
||||||
|
Box::pin(table.drop_columns(&["a"])),
|
||||||
|
// TODO: other endpoints.
|
||||||
];
|
];
|
||||||
|
|
||||||
for result in results {
|
for result in results {
|
||||||
@@ -1799,4 +1864,114 @@ mod tests {
|
|||||||
.await;
|
.await;
|
||||||
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_add_columns() {
|
||||||
|
let table = Table::new_with_handler("my_table", |request| {
|
||||||
|
assert_eq!(request.method(), "POST");
|
||||||
|
assert_eq!(request.url().path(), "/v1/table/my_table/add_columns/");
|
||||||
|
assert_eq!(
|
||||||
|
request.headers().get("Content-Type").unwrap(),
|
||||||
|
JSON_CONTENT_TYPE
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body = std::str::from_utf8(body).unwrap();
|
||||||
|
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||||
|
let new_columns = value.get("new_columns").unwrap().as_array().unwrap();
|
||||||
|
assert!(new_columns.len() == 2);
|
||||||
|
|
||||||
|
let col_name = new_columns[0]["name"].as_str().unwrap();
|
||||||
|
let expression = new_columns[0]["expression"].as_str().unwrap();
|
||||||
|
assert_eq!(col_name, "b");
|
||||||
|
assert_eq!(expression, "a + 1");
|
||||||
|
|
||||||
|
let col_name = new_columns[1]["name"].as_str().unwrap();
|
||||||
|
let expression = new_columns[1]["expression"].as_str().unwrap();
|
||||||
|
assert_eq!(col_name, "x");
|
||||||
|
assert_eq!(expression, "cast(NULL as int32)");
|
||||||
|
|
||||||
|
http::Response::builder().status(200).body("{}").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
table
|
||||||
|
.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![
|
||||||
|
("b".into(), "a + 1".into()),
|
||||||
|
("x".into(), "cast(NULL as int32)".into()),
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_columns() {
|
||||||
|
let table = Table::new_with_handler("my_table", |request| {
|
||||||
|
assert_eq!(request.method(), "POST");
|
||||||
|
assert_eq!(request.url().path(), "/v1/table/my_table/alter_columns/");
|
||||||
|
assert_eq!(
|
||||||
|
request.headers().get("Content-Type").unwrap(),
|
||||||
|
JSON_CONTENT_TYPE
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body = std::str::from_utf8(body).unwrap();
|
||||||
|
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||||
|
let alterations = value.get("alterations").unwrap().as_array().unwrap();
|
||||||
|
assert!(alterations.len() == 2);
|
||||||
|
|
||||||
|
let path = alterations[0]["path"].as_str().unwrap();
|
||||||
|
let data_type = alterations[0]["data_type"]["type"].as_str().unwrap();
|
||||||
|
assert_eq!(path, "b.c");
|
||||||
|
assert_eq!(data_type, "int32");
|
||||||
|
|
||||||
|
let path = alterations[1]["path"].as_str().unwrap();
|
||||||
|
let nullable = alterations[1]["nullable"].as_bool().unwrap();
|
||||||
|
let rename = alterations[1]["rename"].as_str().unwrap();
|
||||||
|
assert_eq!(path, "x");
|
||||||
|
assert!(nullable);
|
||||||
|
assert_eq!(rename, "y");
|
||||||
|
|
||||||
|
http::Response::builder().status(200).body("{}").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
table
|
||||||
|
.alter_columns(&[
|
||||||
|
ColumnAlteration::new("b.c".into()).cast_to(DataType::Int32),
|
||||||
|
ColumnAlteration::new("x".into())
|
||||||
|
.rename("y".into())
|
||||||
|
.set_nullable(true),
|
||||||
|
])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_columns() {
|
||||||
|
let table = Table::new_with_handler("my_table", |request| {
|
||||||
|
assert_eq!(request.method(), "POST");
|
||||||
|
assert_eq!(request.url().path(), "/v1/table/my_table/drop_columns/");
|
||||||
|
assert_eq!(
|
||||||
|
request.headers().get("Content-Type").unwrap(),
|
||||||
|
JSON_CONTENT_TYPE
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body = std::str::from_utf8(body).unwrap();
|
||||||
|
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
||||||
|
let columns = value.get("columns").unwrap().as_array().unwrap();
|
||||||
|
assert!(columns.len() == 2);
|
||||||
|
|
||||||
|
let col1 = columns[0].as_str().unwrap();
|
||||||
|
let col2 = columns[1].as_str().unwrap();
|
||||||
|
assert_eq!(col1, "a");
|
||||||
|
assert_eq!(col2, "b");
|
||||||
|
|
||||||
|
http::Response::builder().status(200).body("{}").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
table.drop_columns(&["a", "b"]).await.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
//! LanceDB Table APIs
|
//! LanceDB Table APIs
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -37,7 +36,8 @@ pub use lance::dataset::ColumnAlteration;
|
|||||||
pub use lance::dataset::NewColumnTransform;
|
pub use lance::dataset::NewColumnTransform;
|
||||||
pub use lance::dataset::ReadParams;
|
pub use lance::dataset::ReadParams;
|
||||||
use lance::dataset::{
|
use lance::dataset::{
|
||||||
Dataset, UpdateBuilder as LanceUpdateBuilder, Version, WhenMatched, WriteMode, WriteParams,
|
Dataset, InsertBuilder, UpdateBuilder as LanceUpdateBuilder, Version, WhenMatched, WriteMode,
|
||||||
|
WriteParams,
|
||||||
};
|
};
|
||||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
@@ -1046,12 +1046,6 @@ pub struct NativeTable {
|
|||||||
name: String,
|
name: String,
|
||||||
uri: String,
|
uri: String,
|
||||||
pub(crate) dataset: dataset::DatasetConsistencyWrapper,
|
pub(crate) dataset: dataset::DatasetConsistencyWrapper,
|
||||||
|
|
||||||
// the object store wrapper to use on write path
|
|
||||||
store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
|
||||||
|
|
||||||
storage_options: HashMap<String, String>,
|
|
||||||
|
|
||||||
// This comes from the connection options. We store here so we can pass down
|
// This comes from the connection options. We store here so we can pass down
|
||||||
// to the dataset when we recreate it (for example, in checkout_latest).
|
// to the dataset when we recreate it (for example, in checkout_latest).
|
||||||
read_consistency_interval: Option<std::time::Duration>,
|
read_consistency_interval: Option<std::time::Duration>,
|
||||||
@@ -1117,13 +1111,6 @@ impl NativeTable {
|
|||||||
None => params,
|
None => params,
|
||||||
};
|
};
|
||||||
|
|
||||||
let storage_options = params
|
|
||||||
.store_options
|
|
||||||
.clone()
|
|
||||||
.unwrap_or_default()
|
|
||||||
.storage_options
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let dataset = DatasetBuilder::from_uri(uri)
|
let dataset = DatasetBuilder::from_uri(uri)
|
||||||
.with_read_params(params)
|
.with_read_params(params)
|
||||||
.load()
|
.load()
|
||||||
@@ -1141,8 +1128,6 @@ impl NativeTable {
|
|||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
uri: uri.to_string(),
|
uri: uri.to_string(),
|
||||||
dataset,
|
dataset,
|
||||||
store_wrapper: write_store_wrapper,
|
|
||||||
storage_options,
|
|
||||||
read_consistency_interval,
|
read_consistency_interval,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1191,12 +1176,6 @@ impl NativeTable {
|
|||||||
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
|
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
|
||||||
None => params,
|
None => params,
|
||||||
};
|
};
|
||||||
let storage_options = params
|
|
||||||
.store_params
|
|
||||||
.clone()
|
|
||||||
.unwrap_or_default()
|
|
||||||
.storage_options
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let dataset = Dataset::write(batches, uri, Some(params))
|
let dataset = Dataset::write(batches, uri, Some(params))
|
||||||
.await
|
.await
|
||||||
@@ -1210,8 +1189,6 @@ impl NativeTable {
|
|||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
uri: uri.to_string(),
|
uri: uri.to_string(),
|
||||||
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
|
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
|
||||||
store_wrapper: write_store_wrapper,
|
|
||||||
storage_options,
|
|
||||||
read_consistency_interval,
|
read_consistency_interval,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1758,10 +1735,13 @@ impl TableInternal for NativeTable {
|
|||||||
add: AddDataBuilder<NoData>,
|
add: AddDataBuilder<NoData>,
|
||||||
data: Box<dyn RecordBatchReader + Send>,
|
data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let data =
|
let data = Box::new(MaybeEmbedded::try_new(
|
||||||
MaybeEmbedded::try_new(data, self.table_definition().await?, add.embedding_registry)?;
|
data,
|
||||||
|
self.table_definition().await?,
|
||||||
|
add.embedding_registry,
|
||||||
|
)?) as Box<dyn RecordBatchReader + Send>;
|
||||||
|
|
||||||
let mut lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams {
|
let lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams {
|
||||||
mode: match add.mode {
|
mode: match add.mode {
|
||||||
AddDataMode::Append => WriteMode::Append,
|
AddDataMode::Append => WriteMode::Append,
|
||||||
AddDataMode::Overwrite => WriteMode::Overwrite,
|
AddDataMode::Overwrite => WriteMode::Overwrite,
|
||||||
@@ -1769,27 +1749,15 @@ impl TableInternal for NativeTable {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
});
|
});
|
||||||
|
|
||||||
// Bring storage options from table
|
let dataset = {
|
||||||
let storage_options = lance_params
|
// Limited scope for the mutable borrow of self.dataset avoids deadlock.
|
||||||
.store_params
|
let ds = self.dataset.get_mut().await?;
|
||||||
.get_or_insert(Default::default())
|
InsertBuilder::new(Arc::new(ds.clone()))
|
||||||
.storage_options
|
.with_params(&lance_params)
|
||||||
.get_or_insert(Default::default());
|
.execute_stream(data)
|
||||||
for (key, value) in self.storage_options.iter() {
|
.await?
|
||||||
if !storage_options.contains_key(key) {
|
|
||||||
storage_options.insert(key.clone(), value.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// patch the params if we have a write store wrapper
|
|
||||||
let lance_params = match self.store_wrapper.clone() {
|
|
||||||
Some(wrapper) => lance_params.patch_with_store_wrapper(wrapper)?,
|
|
||||||
None => lance_params,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
self.dataset.ensure_mutable().await?;
|
|
||||||
let dataset = Dataset::write(data, &self.uri, Some(lance_params)).await?;
|
|
||||||
|
|
||||||
self.dataset.set_latest(dataset).await;
|
self.dataset.set_latest(dataset).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_schema::{DataType, Schema};
|
use arrow_schema::{DataType, Schema};
|
||||||
|
use lance::arrow::json::JsonDataType;
|
||||||
use lance::dataset::{ReadParams, WriteParams};
|
use lance::dataset::{ReadParams, WriteParams};
|
||||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
@@ -175,6 +176,15 @@ pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Note: this is temporary until we get a proper datatype conversion in Lance.
|
||||||
|
pub fn string_to_datatype(s: &str) -> Option<DataType> {
|
||||||
|
let data_type = serde_json::Value::String(s.to_string());
|
||||||
|
let json_type =
|
||||||
|
serde_json::Value::Object([("type".to_string(), data_type)].iter().cloned().collect());
|
||||||
|
let json_type: JsonDataType = serde_json::from_value(json_type).ok()?;
|
||||||
|
(&json_type).try_into().ok()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -239,4 +249,11 @@ mod tests {
|
|||||||
assert!(validate_table_name("my@table").is_err());
|
assert!(validate_table_name("my@table").is_err());
|
||||||
assert!(validate_table_name("name with space").is_err());
|
assert!(validate_table_name("name with space").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_string_to_datatype() {
|
||||||
|
let string = "int32";
|
||||||
|
let expected = DataType::Int32;
|
||||||
|
assert_eq!(string_to_datatype(string), Some(expected));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user