mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
10 Commits
v0.4.11
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
245786fed7 | ||
|
|
edd9a043f8 | ||
|
|
38c09fc294 | ||
|
|
ebaa2dede5 | ||
|
|
ba7618a026 | ||
|
|
a6bcbd007b | ||
|
|
5af74b5aca | ||
|
|
8a52619bc0 | ||
|
|
314d4c93e5 | ||
|
|
c5471ee694 |
58
.github/workflows/build_linux_wheel/action.yml
vendored
Normal file
58
.github/workflows/build_linux_wheel/action.yml
vendored
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# We create a composite action to be re-used both for testing and for releasing
|
||||||
|
name: build-linux-wheel
|
||||||
|
description: "Build a manylinux wheel for lance"
|
||||||
|
inputs:
|
||||||
|
python-minor-version:
|
||||||
|
description: "8, 9, 10, 11, 12"
|
||||||
|
required: true
|
||||||
|
args:
|
||||||
|
description: "--release"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
arm-build:
|
||||||
|
description: "Build for arm64 instead of x86_64"
|
||||||
|
# Note: this does *not* mean the host is arm64, since we might be cross-compiling.
|
||||||
|
required: false
|
||||||
|
default: "false"
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: CONFIRM ARM BUILD
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "ARM BUILD: ${{ inputs.arm-build }}"
|
||||||
|
- name: Build x86_64 Manylinux wheel
|
||||||
|
if: ${{ inputs.arm-build == 'false' }}
|
||||||
|
uses: PyO3/maturin-action@v1
|
||||||
|
with:
|
||||||
|
command: build
|
||||||
|
working-directory: python
|
||||||
|
target: x86_64-unknown-linux-gnu
|
||||||
|
manylinux: "2_17"
|
||||||
|
args: ${{ inputs.args }}
|
||||||
|
before-script-linux: |
|
||||||
|
set -e
|
||||||
|
yum install -y openssl-devel \
|
||||||
|
&& curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$(uname -m).zip > /tmp/protoc.zip \
|
||||||
|
&& unzip /tmp/protoc.zip -d /usr/local \
|
||||||
|
&& rm /tmp/protoc.zip
|
||||||
|
- name: Build Arm Manylinux Wheel
|
||||||
|
if: ${{ inputs.arm-build == 'true' }}
|
||||||
|
uses: PyO3/maturin-action@v1
|
||||||
|
with:
|
||||||
|
command: build
|
||||||
|
working-directory: python
|
||||||
|
target: aarch64-unknown-linux-gnu
|
||||||
|
manylinux: "2_24"
|
||||||
|
args: ${{ inputs.args }}
|
||||||
|
before-script-linux: |
|
||||||
|
set -e
|
||||||
|
apt install -y unzip
|
||||||
|
if [ $(uname -m) = "x86_64" ]; then
|
||||||
|
PROTOC_ARCH="x86_64"
|
||||||
|
else
|
||||||
|
PROTOC_ARCH="aarch_64"
|
||||||
|
fi
|
||||||
|
curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$PROTOC_ARCH.zip > /tmp/protoc.zip \
|
||||||
|
&& unzip /tmp/protoc.zip -d /usr/local \
|
||||||
|
&& rm /tmp/protoc.zip
|
||||||
25
.github/workflows/build_mac_wheel/action.yml
vendored
Normal file
25
.github/workflows/build_mac_wheel/action.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# We create a composite action to be re-used both for testing and for releasing
|
||||||
|
name: build_wheel
|
||||||
|
description: "Build a lance wheel"
|
||||||
|
inputs:
|
||||||
|
python-minor-version:
|
||||||
|
description: "8, 9, 10, 11"
|
||||||
|
required: true
|
||||||
|
args:
|
||||||
|
description: "--release"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Install macos dependency
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
brew install protobuf
|
||||||
|
- name: Build wheel
|
||||||
|
uses: PyO3/maturin-action@v1
|
||||||
|
with:
|
||||||
|
command: build
|
||||||
|
args: ${{ inputs.args }}
|
||||||
|
working-directory: python
|
||||||
|
interpreter: 3.${{ inputs.python-minor-version }}
|
||||||
33
.github/workflows/build_windows_wheel/action.yml
vendored
Normal file
33
.github/workflows/build_windows_wheel/action.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# We create a composite action to be re-used both for testing and for releasing
|
||||||
|
name: build_wheel
|
||||||
|
description: "Build a lance wheel"
|
||||||
|
inputs:
|
||||||
|
python-minor-version:
|
||||||
|
description: "8, 9, 10, 11"
|
||||||
|
required: true
|
||||||
|
args:
|
||||||
|
description: "--release"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Install Protoc v21.12
|
||||||
|
working-directory: C:\
|
||||||
|
run: |
|
||||||
|
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||||
|
Set-Location C:\protoc
|
||||||
|
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||||
|
7z x protoc.zip
|
||||||
|
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||||
|
shell: powershell
|
||||||
|
- name: Build wheel
|
||||||
|
uses: PyO3/maturin-action@v1
|
||||||
|
with:
|
||||||
|
command: build
|
||||||
|
args: ${{ inputs.args }}
|
||||||
|
working-directory: python
|
||||||
|
- uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: windows-wheels
|
||||||
|
path: python\target\wheels
|
||||||
101
.github/workflows/pypi-publish.yml
vendored
101
.github/workflows/pypi-publish.yml
vendored
@@ -2,30 +2,91 @@ name: PyPI Publish
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
release:
|
release:
|
||||||
types: [ published ]
|
types: [published]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
publish:
|
linux:
|
||||||
runs-on: ubuntu-latest
|
timeout-minutes: 60
|
||||||
# Only runs on tags that matches the python-make-release action
|
strategy:
|
||||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
matrix:
|
||||||
defaults:
|
python-minor-version: ["8"]
|
||||||
run:
|
platform:
|
||||||
shell: bash
|
- x86_64
|
||||||
working-directory: python
|
- aarch64
|
||||||
|
runs-on: "ubuntu-22.04"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: "3.8"
|
python-version: 3.${{ matrix.python-minor-version }}
|
||||||
- name: Build distribution
|
- uses: ./.github/workflows/build_linux_wheel
|
||||||
run: |
|
|
||||||
ls -la
|
|
||||||
pip install wheel setuptools --upgrade
|
|
||||||
python setup.py sdist bdist_wheel
|
|
||||||
- name: Publish
|
|
||||||
uses: pypa/gh-action-pypi-publish@v1.8.5
|
|
||||||
with:
|
with:
|
||||||
password: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
packages-dir: python/dist
|
args: "--release --strip"
|
||||||
|
arm-build: ${{ matrix.platform == 'aarch64' }}
|
||||||
|
- uses: ./.github/workflows/upload_wheel
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
|
repo: "pypi"
|
||||||
|
mac:
|
||||||
|
timeout-minutes: 60
|
||||||
|
runs-on: ${{ matrix.config.runner }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-minor-version: ["8"]
|
||||||
|
config:
|
||||||
|
- target: x86_64-apple-darwin
|
||||||
|
runner: macos-13
|
||||||
|
- target: aarch64-apple-darwin
|
||||||
|
runner: macos-14
|
||||||
|
env:
|
||||||
|
MACOSX_DEPLOYMENT_TARGET: 10.15
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: 3.12
|
||||||
|
- uses: ./.github/workflows/build_mac_wheel
|
||||||
|
with:
|
||||||
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
|
args: "--release --strip --target ${{ matrix.config.target }}"
|
||||||
|
- uses: ./.github/workflows/upload_wheel
|
||||||
|
with:
|
||||||
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
|
token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
|
repo: "pypi"
|
||||||
|
windows:
|
||||||
|
timeout-minutes: 60
|
||||||
|
runs-on: windows-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-minor-version: ["8"]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: 3.${{ matrix.python-minor-version }}
|
||||||
|
- uses: ./.github/workflows/build_windows_wheel
|
||||||
|
with:
|
||||||
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
|
args: "--release --strip"
|
||||||
|
vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
|
||||||
|
- uses: ./.github/workflows/upload_wheel
|
||||||
|
with:
|
||||||
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
|
token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
|
repo: "pypi"
|
||||||
|
|||||||
210
.github/workflows/python.yml
vendored
210
.github/workflows/python.yml
vendored
@@ -14,49 +14,133 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linux:
|
lint:
|
||||||
|
name: "Lint"
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-minor-version: [ "8", "11" ]
|
|
||||||
runs-on: "ubuntu-22.04"
|
runs-on: "ubuntu-22.04"
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
working-directory: python
|
working-directory: python
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: 3.${{ matrix.python-minor-version }}
|
python-version: "3.11"
|
||||||
- name: Install lancedb
|
- name: Install ruff
|
||||||
run: |
|
run: |
|
||||||
pip install -e .[tests]
|
pip install ruff
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
- name: Format check
|
||||||
pip install pytest pytest-mock ruff
|
run: ruff format --check .
|
||||||
- name: Format check
|
- name: Lint
|
||||||
run: ruff format --check .
|
run: ruff .
|
||||||
- name: Lint
|
doctest:
|
||||||
run: ruff .
|
name: "Doctest"
|
||||||
- name: Run tests
|
timeout-minutes: 30
|
||||||
run: pytest -m "not slow" -x -v --durations=30 tests
|
runs-on: "ubuntu-22.04"
|
||||||
- name: doctest
|
defaults:
|
||||||
run: pytest --doctest-modules lancedb
|
run:
|
||||||
|
shell: bash
|
||||||
|
working-directory: python
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
cache: "pip"
|
||||||
|
- name: Install protobuf
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: python
|
||||||
|
- name: Install
|
||||||
|
run: |
|
||||||
|
pip install -e .[tests,dev,embeddings]
|
||||||
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
|
pip install mlx
|
||||||
|
- name: Doctest
|
||||||
|
run: pytest --doctest-modules python/lancedb
|
||||||
|
linux:
|
||||||
|
name: "Linux: python-3.${{ matrix.python-minor-version }}"
|
||||||
|
timeout-minutes: 30
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-minor-version: ["8", "11"]
|
||||||
|
runs-on: "ubuntu-22.04"
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
working-directory: python
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Install protobuf
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: 3.${{ matrix.python-minor-version }}
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: python
|
||||||
|
- uses: ./.github/workflows/build_linux_wheel
|
||||||
|
- uses: ./.github/workflows/run_tests
|
||||||
|
# Make sure wheels are not included in the Rust cache
|
||||||
|
- name: Delete wheels
|
||||||
|
run: rm -rf target/wheels
|
||||||
platform:
|
platform:
|
||||||
name: "Platform: ${{ matrix.config.name }}"
|
name: "Mac: ${{ matrix.config.name }}"
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- name: x86 Mac
|
- name: x86
|
||||||
runner: macos-13
|
runner: macos-13
|
||||||
- name: Arm Mac
|
- name: Arm
|
||||||
runner: macos-14
|
runner: macos-14
|
||||||
- name: x86 Windows
|
runs-on: "${{ matrix.config.runner }}"
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
working-directory: python
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
lfs: true
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: python
|
||||||
|
- uses: ./.github/workflows/build_mac_wheel
|
||||||
|
- uses: ./.github/workflows/run_tests
|
||||||
|
# Make sure wheels are not included in the Rust cache
|
||||||
|
- name: Delete wheels
|
||||||
|
run: rm -rf target/wheels
|
||||||
|
windows:
|
||||||
|
name: "Windows: ${{ matrix.config.name }}"
|
||||||
|
timeout-minutes: 30
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
config:
|
||||||
|
- name: x86
|
||||||
runner: windows-latest
|
runner: windows-latest
|
||||||
runs-on: "${{ matrix.config.runner }}"
|
runs-on: "${{ matrix.config.runner }}"
|
||||||
defaults:
|
defaults:
|
||||||
@@ -64,21 +148,22 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
working-directory: python
|
working-directory: python
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install lancedb
|
- uses: Swatinem/rust-cache@v2
|
||||||
run: |
|
with:
|
||||||
pip install -e .[tests]
|
workspaces: python
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
- uses: ./.github/workflows/build_windows_wheel
|
||||||
pip install pytest pytest-mock
|
- uses: ./.github/workflows/run_tests
|
||||||
- name: Run tests
|
# Make sure wheels are not included in the Rust cache
|
||||||
run: pytest -m "not slow" -x -v --durations=30 tests
|
- name: Delete wheels
|
||||||
|
run: rm -rf target/wheels
|
||||||
pydantic1x:
|
pydantic1x:
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
runs-on: "ubuntu-22.04"
|
runs-on: "ubuntu-22.04"
|
||||||
@@ -87,21 +172,22 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
working-directory: python
|
working-directory: python
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- name: Set up Python
|
- name: Install dependencies
|
||||||
uses: actions/setup-python@v5
|
run: |
|
||||||
with:
|
sudo apt update
|
||||||
python-version: 3.9
|
sudo apt install -y protobuf-compiler
|
||||||
- name: Install lancedb
|
- name: Set up Python
|
||||||
run: |
|
uses: actions/setup-python@v5
|
||||||
pip install "pydantic<2"
|
with:
|
||||||
pip install -e .[tests]
|
python-version: 3.9
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
- name: Install lancedb
|
||||||
pip install pytest pytest-mock
|
run: |
|
||||||
- name: Run tests
|
pip install "pydantic<2"
|
||||||
run: pytest -m "not slow" -x -v --durations=30 tests
|
pip install -e .[tests]
|
||||||
- name: doctest
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
run: pytest --doctest-modules lancedb
|
- name: Run tests
|
||||||
|
run: pytest -m "not slow" -x -v --durations=30 python/tests
|
||||||
|
|||||||
17
.github/workflows/run_tests/action.yml
vendored
Normal file
17
.github/workflows/run_tests/action.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
name: run-tests
|
||||||
|
|
||||||
|
description: "Install lance wheel and run unit tests"
|
||||||
|
inputs:
|
||||||
|
python-minor-version:
|
||||||
|
required: true
|
||||||
|
description: "8 9 10 11 12"
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Install lancedb
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
pip3 install $(ls target/wheels/lancedb-*.whl)[tests,dev,embeddings]
|
||||||
|
- name: pytest
|
||||||
|
shell: bash
|
||||||
|
run: pytest -m "not slow" -x -v --durations=30 python/python/tests
|
||||||
29
.github/workflows/upload_wheel/action.yml
vendored
Normal file
29
.github/workflows/upload_wheel/action.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
name: upload-wheel
|
||||||
|
|
||||||
|
description: "Upload wheels to Pypi"
|
||||||
|
inputs:
|
||||||
|
os:
|
||||||
|
required: true
|
||||||
|
description: "ubuntu-22.04 or macos-13"
|
||||||
|
repo:
|
||||||
|
required: false
|
||||||
|
description: "pypi or testpypi"
|
||||||
|
default: "pypi"
|
||||||
|
token:
|
||||||
|
required: true
|
||||||
|
description: "release token for the repo"
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Install dependencies
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install twine
|
||||||
|
- name: Publish wheel
|
||||||
|
env:
|
||||||
|
TWINE_USERNAME: __token__
|
||||||
|
TWINE_PASSWORD: ${{ inputs.token }}
|
||||||
|
shell: bash
|
||||||
|
run: twine upload --repository ${{ inputs.repo }} target/wheels/lancedb-*.whl
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -22,6 +22,11 @@ python/dist
|
|||||||
|
|
||||||
**/.hypothesis
|
**/.hypothesis
|
||||||
|
|
||||||
|
# Compiled Dynamic libraries
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
*.dll
|
||||||
|
|
||||||
## Javascript
|
## Javascript
|
||||||
*.node
|
*.node
|
||||||
**/node_modules
|
**/node_modules
|
||||||
|
|||||||
10
Cargo.toml
10
Cargo.toml
@@ -1,5 +1,5 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = ["rust/ffi/node", "rust/lancedb", "nodejs"]
|
members = ["rust/ffi/node", "rust/lancedb", "nodejs", "python"]
|
||||||
# Python package needs to be built by maturin.
|
# Python package needs to be built by maturin.
|
||||||
exclude = ["python"]
|
exclude = ["python"]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
|||||||
categories = ["database-implementations"]
|
categories = ["database-implementations"]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.9.18", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.10.1", "features" = ["dynamodb"] }
|
||||||
lance-index = { "version" = "=0.9.18" }
|
lance-index = { "version" = "=0.10.1" }
|
||||||
lance-linalg = { "version" = "=0.9.18" }
|
lance-linalg = { "version" = "=0.10.1" }
|
||||||
lance-testing = { "version" = "=0.9.18" }
|
lance-testing = { "version" = "=0.10.1" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "50.0", optional = false }
|
arrow = { version = "50.0", optional = false }
|
||||||
arrow-array = "50.0"
|
arrow-array = "50.0"
|
||||||
|
|||||||
27
dockerfiles/Dockerfile
Normal file
27
dockerfiles/Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||||
|
#Usage docker build -t lancedb:latest -f Dockerfile .
|
||||||
|
FROM python:3.10-slim-buster
|
||||||
|
|
||||||
|
# Install Rust
|
||||||
|
RUN apt-get update && apt-get install -y curl build-essential && \
|
||||||
|
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||||
|
|
||||||
|
# Set the environment variable for Rust
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
|
# Install protobuf compiler
|
||||||
|
RUN apt-get install -y protobuf-compiler && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN apt-get -y update &&\
|
||||||
|
apt-get -y upgrade && \
|
||||||
|
apt-get -y install git
|
||||||
|
|
||||||
|
|
||||||
|
# Verify installations
|
||||||
|
RUN python --version && \
|
||||||
|
rustc --version && \
|
||||||
|
protoc --version
|
||||||
|
|
||||||
|
RUN pip install tantivy lancedb
|
||||||
44
node/package-lock.json
generated
44
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -53,11 +53,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.4.10",
|
"@lancedb/vectordb-darwin-arm64": "0.4.11",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.4.10",
|
"@lancedb/vectordb-darwin-x64": "0.4.11",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.10",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.4.11",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.10",
|
"@lancedb/vectordb-linux-x64-gnu": "0.4.11",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.10"
|
"@lancedb/vectordb-win32-x64-msvc": "0.4.11"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@75lb/deep-merge": {
|
"node_modules/@75lb/deep-merge": {
|
||||||
@@ -329,9 +329,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.10.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.11.tgz",
|
||||||
"integrity": "sha512-y/uHOGb0g15pvqv5tdTyZ6oN+0QVpBmZDzKFWW6pPbuSZjB2uPqcs+ti0RB+AUdmS21kavVQqaNsw/HLKEGrHA==",
|
"integrity": "sha512-JDOKmFnuJPFkA7ZmrzBJolROwSjWr7yMvAbi40uLBc25YbbVezodd30u2EFtIwWwtk1GqNYRZ49FZOElKYeC/Q==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -341,9 +341,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.10.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.11.tgz",
|
||||||
"integrity": "sha512-XbfR58OkQpAe0xMSTrwJh9ZjGSzG9EZ7zwO6HfYem8PxcLYAcC6eWRWoSG/T0uObyrPTcYYyvHsp0eNQWYBFAQ==",
|
"integrity": "sha512-iy6r+8tp2v1EFgJV52jusXtxgO6NY6SkpOdX41xPqN2mQWMkfUAR9Xtks1mgknjPOIKH4MRc8ZS0jcW/UWmilQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -353,9 +353,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.10.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.11.tgz",
|
||||||
"integrity": "sha512-x40WKH9b+KxorRmKr9G7fv8p5mMj8QJQvRMA0v6v+nbZHr2FLlAZV+9mvhHOnm4AGIkPP5335cUgv6Qz6hgwkQ==",
|
"integrity": "sha512-5K6IVcTMuH0SZBjlqB5Gg39WC889FpTwIWKufxzQMMXrzxo5J3lKUHVoR28RRlNhDF2d9kZXBEyCpIfDFsV9iQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -365,9 +365,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.10.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.11.tgz",
|
||||||
"integrity": "sha512-CTGPpuzlqq2nVjUxI9gAJOT1oBANIovtIaFsOmBSnEAHgX7oeAxKy2b6L/kJzsgqSzvR5vfLwYcWFrr6ZmBxSA==",
|
"integrity": "sha512-hF9ZChsdqKqqnivOzd9mE7lC3PmhZadXtwThi2RrsPiOLoEaGDfmr6Ni3amVQnB3bR8YEJtTxdQxe0NC4uW/8g==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -377,9 +377,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.4.10",
|
"version": "0.4.11",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.10.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.11.tgz",
|
||||||
"integrity": "sha512-Fd7r74coZyrKzkfXg4WthqOL+uKyJyPTia6imcrMNqKOlTGdKmHf02Qi2QxWZrFaabkRYo4Tpn5FeRJ3yYX8CA==",
|
"integrity": "sha512-0+9ut1ccKoqIyGxsVixwx3771Z+DXpl5WfSmOeA8kf3v3jlOg2H+0YUahiXLDid2ju+yeLPrAUYm7A1gKHVhew==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -61,11 +61,13 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
|
||||||
"@neon-rs/load": "^0.0.74",
|
"@neon-rs/load": "^0.0.74",
|
||||||
"apache-arrow": "^14.0.2",
|
|
||||||
"axios": "^1.4.0"
|
"axios": "^1.4.0"
|
||||||
},
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
"apache-arrow": "^14.0.2"
|
||||||
|
},
|
||||||
"os": [
|
"os": [
|
||||||
"darwin",
|
"darwin",
|
||||||
"linux",
|
"linux",
|
||||||
|
|||||||
@@ -42,7 +42,10 @@ const {
|
|||||||
tableCompactFiles,
|
tableCompactFiles,
|
||||||
tableListIndices,
|
tableListIndices,
|
||||||
tableIndexStats,
|
tableIndexStats,
|
||||||
tableSchema
|
tableSchema,
|
||||||
|
tableAddColumns,
|
||||||
|
tableAlterColumns,
|
||||||
|
tableDropColumns
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
} = require('../native.js')
|
} = require('../native.js')
|
||||||
|
|
||||||
@@ -500,6 +503,59 @@ export interface Table<T = number[]> {
|
|||||||
filter(value: string): Query<T>
|
filter(value: string): Query<T>
|
||||||
|
|
||||||
schema: Promise<Schema>
|
schema: Promise<Schema>
|
||||||
|
|
||||||
|
// TODO: Support BatchUDF
|
||||||
|
/**
|
||||||
|
* Add new columns with defined values.
|
||||||
|
*
|
||||||
|
* @param newColumnTransforms pairs of column names and the SQL expression to use
|
||||||
|
* to calculate the value of the new column. These
|
||||||
|
* expressions will be evaluated for each row in the
|
||||||
|
* table, and can reference existing columns in the table.
|
||||||
|
*/
|
||||||
|
addColumns(newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alter the name or nullability of columns.
|
||||||
|
*
|
||||||
|
* @param columnAlterations One or more alterations to apply to columns.
|
||||||
|
*/
|
||||||
|
alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drop one or more columns from the dataset
|
||||||
|
*
|
||||||
|
* This is a metadata-only operation and does not remove the data from the
|
||||||
|
* underlying storage. In order to remove the data, you must subsequently
|
||||||
|
* call ``compact_files`` to rewrite the data without the removed columns and
|
||||||
|
* then call ``cleanup_files`` to remove the old files.
|
||||||
|
*
|
||||||
|
* @param columnNames The names of the columns to drop. These can be nested
|
||||||
|
* column references (e.g. "a.b.c") or top-level column
|
||||||
|
* names (e.g. "a").
|
||||||
|
*/
|
||||||
|
dropColumns(columnNames: string[]): Promise<void>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A definition of a column alteration. The alteration changes the column at
|
||||||
|
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||||
|
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||||
|
* must be provided.
|
||||||
|
*/
|
||||||
|
export interface ColumnAlteration {
|
||||||
|
/**
|
||||||
|
* The path to the column to alter. This is a dot-separated path to the column.
|
||||||
|
* If it is a top-level column then it is just the name of the column. If it is
|
||||||
|
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||||
|
* `c` nested inside a column `b` nested inside a column `a`.
|
||||||
|
*/
|
||||||
|
path: string
|
||||||
|
rename?: string
|
||||||
|
/**
|
||||||
|
* Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||||
|
*/
|
||||||
|
nullable?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface UpdateArgs {
|
export interface UpdateArgs {
|
||||||
@@ -1028,6 +1084,18 @@ export class LocalTable<T = number[]> implements Table<T> {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
|
||||||
|
return tableAddColumns.call(this._tbl, newColumnTransforms)
|
||||||
|
}
|
||||||
|
|
||||||
|
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||||
|
return tableAlterColumns.call(this._tbl, columnAlterations)
|
||||||
|
}
|
||||||
|
|
||||||
|
async dropColumns (columnNames: string[]): Promise<void> {
|
||||||
|
return tableDropColumns.call(this._tbl, columnNames)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface CleanupStats {
|
export interface CleanupStats {
|
||||||
|
|||||||
@@ -25,7 +25,8 @@ import {
|
|||||||
type UpdateArgs,
|
type UpdateArgs,
|
||||||
type UpdateSqlArgs,
|
type UpdateSqlArgs,
|
||||||
makeArrowTable,
|
makeArrowTable,
|
||||||
type MergeInsertArgs
|
type MergeInsertArgs,
|
||||||
|
type ColumnAlteration
|
||||||
} from '../index'
|
} from '../index'
|
||||||
import { Query } from '../query'
|
import { Query } from '../query'
|
||||||
|
|
||||||
@@ -474,4 +475,16 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
numUnindexedRows: results.data.num_unindexed_rows
|
numUnindexedRows: results.data.num_unindexed_rows
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
|
||||||
|
throw new Error('Add columns is not yet supported in LanceDB Cloud.')
|
||||||
|
}
|
||||||
|
|
||||||
|
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||||
|
throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
|
||||||
|
}
|
||||||
|
|
||||||
|
async dropColumns (columnNames: string[]): Promise<void> {
|
||||||
|
throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,8 +37,10 @@ import {
|
|||||||
Utf8,
|
Utf8,
|
||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
vectorFromArray,
|
vectorFromArray,
|
||||||
|
Float64,
|
||||||
Float32,
|
Float32,
|
||||||
Float16
|
Float16,
|
||||||
|
Int64
|
||||||
} from 'apache-arrow'
|
} from 'apache-arrow'
|
||||||
|
|
||||||
const expect = chai.expect
|
const expect = chai.expect
|
||||||
@@ -196,7 +198,7 @@ describe('LanceDB client', function () {
|
|||||||
const table = await con.openTable('vectors')
|
const table = await con.openTable('vectors')
|
||||||
const results = await table
|
const results = await table
|
||||||
.search([0.1, 0.1])
|
.search([0.1, 0.1])
|
||||||
.select(['is_active'])
|
.select(['is_active', 'vector'])
|
||||||
.execute()
|
.execute()
|
||||||
assert.equal(results.length, 2)
|
assert.equal(results.length, 2)
|
||||||
// vector and _distance are always returned
|
// vector and _distance are always returned
|
||||||
@@ -1057,3 +1059,63 @@ describe('Compact and cleanup', function () {
|
|||||||
assert.equal(await table.countRows(), 3)
|
assert.equal(await table.countRows(), 3)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('schema evolution', function () {
|
||||||
|
// Create a new sample table
|
||||||
|
it('can add a new column to the schema', async function () {
|
||||||
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] }
|
||||||
|
])
|
||||||
|
|
||||||
|
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as float)' }])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('id', new Int64()),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||||
|
new Field('price', new Float32())
|
||||||
|
])
|
||||||
|
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('can alter the columns in the schema', async function () {
|
||||||
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
const schema = new Schema([
|
||||||
|
new Field('id', new Int64(), false),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||||
|
new Field('price', new Float64(), false)
|
||||||
|
])
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2], price: 10.0 }
|
||||||
|
])
|
||||||
|
expect(await table.schema).to.deep.equal(schema)
|
||||||
|
|
||||||
|
await table.alterColumns([
|
||||||
|
{ path: 'id', rename: 'new_id' },
|
||||||
|
{ path: 'price', nullable: true }
|
||||||
|
])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('new_id', new Int64(), false),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||||
|
new Field('price', new Float64(), true)
|
||||||
|
])
|
||||||
|
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('can drop a column from the schema', async function () {
|
||||||
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] }
|
||||||
|
])
|
||||||
|
await table.dropColumns(['vector'])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('id', new Int64(), false)
|
||||||
|
])
|
||||||
|
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import * as path from "path";
|
|||||||
import * as fs from "fs";
|
import * as fs from "fs";
|
||||||
|
|
||||||
import { connect } from "../dist";
|
import { connect } from "../dist";
|
||||||
import { Schema, Field, Float32, Int32, FixedSizeList } from "apache-arrow";
|
import { Schema, Field, Float32, Int32, FixedSizeList, Int64, Float64 } from "apache-arrow";
|
||||||
import { makeArrowTable } from "../dist/arrow";
|
import { makeArrowTable } from "../dist/arrow";
|
||||||
|
|
||||||
describe("Test creating index", () => {
|
describe("Test creating index", () => {
|
||||||
@@ -214,4 +214,69 @@ describe("Read consistency interval", () => {
|
|||||||
expect(await table2.countRows()).toEqual(2n);
|
expect(await table2.countRows()).toEqual(2n);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
describe('schema evolution', function () {
|
||||||
|
let tmpDir: string;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "schema-evolution-"));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create a new sample table
|
||||||
|
it('can add a new column to the schema', async function () {
|
||||||
|
const con = await connect(tmpDir)
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] }
|
||||||
|
])
|
||||||
|
|
||||||
|
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as float)' }])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('id', new Int64(), true),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
|
||||||
|
new Field('price', new Float32(), false)
|
||||||
|
])
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema)
|
||||||
|
});
|
||||||
|
|
||||||
|
it('can alter the columns in the schema', async function () {
|
||||||
|
const con = await connect(tmpDir)
|
||||||
|
const schema = new Schema([
|
||||||
|
new Field('id', new Int64(), true),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
|
||||||
|
new Field('price', new Float64(), false)
|
||||||
|
])
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] }
|
||||||
|
])
|
||||||
|
// Can create a non-nullable column only through addColumns at the moment.
|
||||||
|
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as double)' }])
|
||||||
|
expect(await table.schema()).toEqual(schema)
|
||||||
|
|
||||||
|
await table.alterColumns([
|
||||||
|
{ path: 'id', rename: 'new_id' },
|
||||||
|
{ path: 'price', nullable: true }
|
||||||
|
])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('new_id', new Int64(), true),
|
||||||
|
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
|
||||||
|
new Field('price', new Float64(), true)
|
||||||
|
])
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema)
|
||||||
|
});
|
||||||
|
|
||||||
|
it('can drop a column from the schema', async function () {
|
||||||
|
const con = await connect(tmpDir)
|
||||||
|
const table = await con.createTable('vectors', [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] }
|
||||||
|
])
|
||||||
|
await table.dropColumns(['vector'])
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field('id', new Int64(), true)
|
||||||
|
])
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema)
|
||||||
|
});
|
||||||
});
|
});
|
||||||
35
nodejs/lancedb/native.d.ts
vendored
35
nodejs/lancedb/native.d.ts
vendored
@@ -12,6 +12,38 @@ export const enum MetricType {
|
|||||||
Cosine = 1,
|
Cosine = 1,
|
||||||
Dot = 2
|
Dot = 2
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* A definition of a column alteration. The alteration changes the column at
|
||||||
|
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||||
|
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||||
|
* must be provided.
|
||||||
|
*/
|
||||||
|
export interface ColumnAlteration {
|
||||||
|
/**
|
||||||
|
* The path to the column to alter. This is a dot-separated path to the column.
|
||||||
|
* If it is a top-level column then it is just the name of the column. If it is
|
||||||
|
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||||
|
* `c` nested inside a column `b` nested inside a column `a`.
|
||||||
|
*/
|
||||||
|
path: string
|
||||||
|
/**
|
||||||
|
* The new name of the column. If not provided then the name will not be changed.
|
||||||
|
* This must be distinct from the names of all other columns in the table.
|
||||||
|
*/
|
||||||
|
rename?: string
|
||||||
|
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
||||||
|
nullable?: boolean
|
||||||
|
}
|
||||||
|
/** A definition of a new column to add to a table. */
|
||||||
|
export interface AddColumnsSql {
|
||||||
|
/** The name of the new column. */
|
||||||
|
name: string
|
||||||
|
/**
|
||||||
|
* The values to populate the new column with, as a SQL expression.
|
||||||
|
* The expression can reference other columns in the table.
|
||||||
|
*/
|
||||||
|
valueSql: string
|
||||||
|
}
|
||||||
export interface ConnectionOptions {
|
export interface ConnectionOptions {
|
||||||
uri: string
|
uri: string
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
@@ -89,4 +121,7 @@ export class Table {
|
|||||||
delete(predicate: string): Promise<void>
|
delete(predicate: string): Promise<void>
|
||||||
createIndex(): IndexBuilder
|
createIndex(): IndexBuilder
|
||||||
query(): Query
|
query(): Query
|
||||||
|
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
||||||
|
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
||||||
|
dropColumns(columns: Array<string>): Promise<void>
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import { Schema, tableFromIPC } from "apache-arrow";
|
import { Schema, tableFromIPC } from "apache-arrow";
|
||||||
import { Table as _NativeTable } from "./native";
|
import { AddColumnsSql, ColumnAlteration, Table as _NativeTable } from "./native";
|
||||||
import { toBuffer, Data } from "./arrow";
|
import { toBuffer, Data } from "./arrow";
|
||||||
import { Query } from "./query";
|
import { Query } from "./query";
|
||||||
import { IndexBuilder } from "./indexer";
|
import { IndexBuilder } from "./indexer";
|
||||||
@@ -150,4 +150,42 @@ export class Table {
|
|||||||
}
|
}
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Support BatchUDF
|
||||||
|
/**
|
||||||
|
* Add new columns with defined values.
|
||||||
|
*
|
||||||
|
* @param newColumnTransforms pairs of column names and the SQL expression to use
|
||||||
|
* to calculate the value of the new column. These
|
||||||
|
* expressions will be evaluated for each row in the
|
||||||
|
* table, and can reference existing columns in the table.
|
||||||
|
*/
|
||||||
|
async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
|
||||||
|
await this.inner.addColumns(newColumnTransforms);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alter the name or nullability of columns.
|
||||||
|
*
|
||||||
|
* @param columnAlterations One or more alterations to apply to columns.
|
||||||
|
*/
|
||||||
|
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||||
|
await this.inner.alterColumns(columnAlterations);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drop one or more columns from the dataset
|
||||||
|
*
|
||||||
|
* This is a metadata-only operation and does not remove the data from the
|
||||||
|
* underlying storage. In order to remove the data, you must subsequently
|
||||||
|
* call ``compact_files`` to rewrite the data without the removed columns and
|
||||||
|
* then call ``cleanup_files`` to remove the old files.
|
||||||
|
*
|
||||||
|
* @param columnNames The names of the columns to drop. These can be nested
|
||||||
|
* column references (e.g. "a.b.c") or top-level column
|
||||||
|
* names (e.g. "a").
|
||||||
|
*/
|
||||||
|
async dropColumns(columnNames: string[]): Promise<void> {
|
||||||
|
await this.inner.dropColumns(columnNames);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,7 +62,7 @@
|
|||||||
"lancedb-linux-arm64-gnu": "0.4.3",
|
"lancedb-linux-arm64-gnu": "0.4.3",
|
||||||
"lancedb-linux-x64-gnu": "0.4.3"
|
"lancedb-linux-x64-gnu": "0.4.3"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"peerDependencies": {
|
||||||
"apache-arrow": "^15.0.0"
|
"apache-arrow": "^15.0.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,8 +13,11 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use arrow_ipc::writer::FileWriter;
|
use arrow_ipc::writer::FileWriter;
|
||||||
use lancedb::table::AddDataOptions;
|
use lance::dataset::ColumnAlteration as LanceColumnAlteration;
|
||||||
use lancedb::{ipc::ipc_file_to_batches, table::TableRef};
|
use lancedb::{
|
||||||
|
ipc::ipc_file_to_batches,
|
||||||
|
table::{AddDataOptions, TableRef},
|
||||||
|
};
|
||||||
use napi::bindgen_prelude::*;
|
use napi::bindgen_prelude::*;
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
@@ -93,4 +96,106 @@ impl Table {
|
|||||||
pub fn query(&self) -> Query {
|
pub fn query(&self) -> Query {
|
||||||
Query::new(self)
|
Query::new(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub async fn add_columns(&self, transforms: Vec<AddColumnsSql>) -> napi::Result<()> {
|
||||||
|
let transforms = transforms
|
||||||
|
.into_iter()
|
||||||
|
.map(|sql| (sql.name, sql.value_sql))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let transforms = lance::dataset::NewColumnTransform::SqlExpressions(transforms);
|
||||||
|
self.table
|
||||||
|
.add_columns(transforms, None)
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to add columns to table {}: {}",
|
||||||
|
self.table, err
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
|
||||||
|
for alteration in &alterations {
|
||||||
|
if alteration.rename.is_none() && alteration.nullable.is_none() {
|
||||||
|
return Err(napi::Error::from_reason(
|
||||||
|
"Alteration must have a 'rename' or 'nullable' field.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let alterations = alterations
|
||||||
|
.into_iter()
|
||||||
|
.map(LanceColumnAlteration::from)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
self.table
|
||||||
|
.alter_columns(&alterations)
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to alter columns in table {}: {}",
|
||||||
|
self.table, err
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<()> {
|
||||||
|
let col_refs = columns.iter().map(String::as_str).collect::<Vec<_>>();
|
||||||
|
self.table.drop_columns(&col_refs).await.map_err(|err| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to drop columns from table {}: {}",
|
||||||
|
self.table, err
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A definition of a column alteration. The alteration changes the column at
|
||||||
|
/// `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||||
|
/// and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||||
|
/// must be provided.
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct ColumnAlteration {
|
||||||
|
/// The path to the column to alter. This is a dot-separated path to the column.
|
||||||
|
/// If it is a top-level column then it is just the name of the column. If it is
|
||||||
|
/// a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||||
|
/// `c` nested inside a column `b` nested inside a column `a`.
|
||||||
|
pub path: String,
|
||||||
|
/// The new name of the column. If not provided then the name will not be changed.
|
||||||
|
/// This must be distinct from the names of all other columns in the table.
|
||||||
|
pub rename: Option<String>,
|
||||||
|
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||||
|
pub nullable: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ColumnAlteration> for LanceColumnAlteration {
|
||||||
|
fn from(js: ColumnAlteration) -> Self {
|
||||||
|
let ColumnAlteration {
|
||||||
|
path,
|
||||||
|
rename,
|
||||||
|
nullable,
|
||||||
|
} = js;
|
||||||
|
Self {
|
||||||
|
path,
|
||||||
|
rename,
|
||||||
|
nullable,
|
||||||
|
// TODO: wire up this field
|
||||||
|
data_type: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A definition of a new column to add to a table.
|
||||||
|
#[napi(object)]
|
||||||
|
pub struct AddColumnsSql {
|
||||||
|
/// The name of the new column.
|
||||||
|
pub name: String,
|
||||||
|
/// The values to populate the new column with, as a SQL expression.
|
||||||
|
/// The expression can reference other columns in the table.
|
||||||
|
pub value_sql: String,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.5.7
|
current_version = 0.6.0
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
26
python/Cargo.toml
Normal file
26
python/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
[package]
|
||||||
|
name = "lancedb-python"
|
||||||
|
version = "0.4.10"
|
||||||
|
edition.workspace = true
|
||||||
|
description = "Python bindings for LanceDB"
|
||||||
|
license.workspace = true
|
||||||
|
repository.workspace = true
|
||||||
|
keywords.workspace = true
|
||||||
|
categories.workspace = true
|
||||||
|
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "_lancedb"
|
||||||
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
lancedb = { path = "../rust/lancedb" }
|
||||||
|
env_logger = "0.10"
|
||||||
|
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
|
||||||
|
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
||||||
|
|
||||||
|
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||||
|
lzma-sys = { version = "*", features = ["static"] }
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
pyo3-build-config = { version = "0.20.3", features = ["extension-module", "abi3-py38"] }
|
||||||
@@ -20,10 +20,10 @@ results = table.search([0.1, 0.3]).limit(20).to_list()
|
|||||||
print(results)
|
print(results)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
Create a virtual environment and activate it:
|
LanceDb is based on the rust crate `lancedb` and is built with maturin. In order to build with maturin
|
||||||
|
you will either need a conda environment or a virtual environment (venv).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
@@ -33,7 +33,15 @@ python -m venv venv
|
|||||||
Install the necessary packages:
|
Install the necessary packages:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m pip install .
|
python -m pip install .[tests,dev]
|
||||||
|
```
|
||||||
|
|
||||||
|
To build the python package you can use maturin:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# This will build the rust bindings and place them in the appropriate place
|
||||||
|
# in your venv or conda environment
|
||||||
|
matruin develop
|
||||||
```
|
```
|
||||||
|
|
||||||
To run the unit tests:
|
To run the unit tests:
|
||||||
@@ -45,7 +53,7 @@ pytest
|
|||||||
To run the doc tests:
|
To run the doc tests:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pytest --doctest-modules lancedb
|
pytest --doctest-modules python/lancedb
|
||||||
```
|
```
|
||||||
|
|
||||||
To run linter and automatically fix all errors:
|
To run linter and automatically fix all errors:
|
||||||
@@ -61,31 +69,27 @@ If any packages are missing, install them with:
|
|||||||
pip install <PACKAGE_NAME>
|
pip install <PACKAGE_NAME>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
___
|
___
|
||||||
For **Windows** users, there may be errors when installing packages, so these commands may be helpful:
|
For **Windows** users, there may be errors when installing packages, so these commands may be helpful:
|
||||||
|
|
||||||
Activate the virtual environment:
|
Activate the virtual environment:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
. .\venv\Scripts\activate
|
. .\venv\Scripts\activate
|
||||||
```
|
```
|
||||||
|
|
||||||
You may need to run the installs separately:
|
You may need to run the installs separately:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install -e .[tests]
|
pip install -e .[tests]
|
||||||
pip install -e .[dev]
|
pip install -e .[dev]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
`tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation:
|
`tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install wheel
|
pip install wheel
|
||||||
pip install cargo
|
pip install cargo
|
||||||
conda install rust
|
conda install rust
|
||||||
pip install tantivy
|
pip install tantivy
|
||||||
```
|
```
|
||||||
|
|
||||||
To run the unit tests:
|
|
||||||
```bash
|
|
||||||
pytest
|
|
||||||
```
|
|
||||||
|
|||||||
3
python/build.rs
Normal file
3
python/build.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
fn main() {
|
||||||
|
pyo3_build_config::add_extension_module_link_args();
|
||||||
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.5.7"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.9.18",
|
"pylance==0.10.1",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
@@ -14,7 +14,7 @@ dependencies = [
|
|||||||
"pyyaml>=6.0",
|
"pyyaml>=6.0",
|
||||||
"click>=8.1.7",
|
"click>=8.1.7",
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"overrides>=0.7"
|
"overrides>=0.7",
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
@@ -26,7 +26,7 @@ keywords = [
|
|||||||
"data-science",
|
"data-science",
|
||||||
"machine-learning",
|
"machine-learning",
|
||||||
"arrow",
|
"arrow",
|
||||||
"data-analytics"
|
"data-analytics",
|
||||||
]
|
]
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Development Status :: 3 - Alpha",
|
"Development Status :: 3 - Alpha",
|
||||||
@@ -48,21 +48,53 @@ classifiers = [
|
|||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
tests = ["aiohttp", "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "duckdb", "pytz", "polars>=0.19"]
|
tests = [
|
||||||
|
"aiohttp",
|
||||||
|
"pandas>=1.4",
|
||||||
|
"pytest",
|
||||||
|
"pytest-mock",
|
||||||
|
"pytest-asyncio",
|
||||||
|
"duckdb",
|
||||||
|
"pytz",
|
||||||
|
"polars>=0.19",
|
||||||
|
]
|
||||||
dev = ["ruff", "pre-commit"]
|
dev = ["ruff", "pre-commit"]
|
||||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "mkdocs-ultralytics-plugin==0.0.44"]
|
docs = [
|
||||||
|
"mkdocs",
|
||||||
|
"mkdocs-jupyter",
|
||||||
|
"mkdocs-material",
|
||||||
|
"mkdocstrings[python]",
|
||||||
|
"mkdocs-ultralytics-plugin==0.0.44",
|
||||||
|
]
|
||||||
clip = ["torch", "pillow", "open-clip"]
|
clip = ["torch", "pillow", "open-clip"]
|
||||||
embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "huggingface_hub",
|
embeddings = [
|
||||||
"InstructorEmbedding", "google.generativeai", "boto3>=1.28.57", "awscli>=1.29.57", "botocore>=1.31.57"]
|
"openai>=1.6.1",
|
||||||
|
"sentence-transformers",
|
||||||
|
"torch",
|
||||||
|
"pillow",
|
||||||
|
"open-clip-torch",
|
||||||
|
"cohere",
|
||||||
|
"huggingface_hub",
|
||||||
|
"InstructorEmbedding",
|
||||||
|
"google.generativeai",
|
||||||
|
"boto3>=1.28.57",
|
||||||
|
"awscli>=1.29.57",
|
||||||
|
"botocore>=1.31.57",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.maturin]
|
||||||
|
python-source = "python"
|
||||||
|
module-name = "lancedb._lancedb"
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
lancedb = "lancedb.cli.cli:cli"
|
lancedb = "lancedb.cli.cli:cli"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools", "wheel"]
|
requires = ["maturin>=1.4"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "maturin"
|
||||||
|
|
||||||
[tool.ruff]
|
|
||||||
|
[tool.ruff.lint]
|
||||||
select = ["F", "E", "W", "I", "G", "TCH", "PERF"]
|
select = ["F", "E", "W", "I", "G", "TCH", "PERF"]
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
@@ -70,5 +102,5 @@ addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
|
|||||||
|
|
||||||
markers = [
|
markers = [
|
||||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||||
"asyncio"
|
"asyncio",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -19,8 +19,9 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
__version__ = importlib.metadata.version("lancedb")
|
__version__ = importlib.metadata.version("lancedb")
|
||||||
|
|
||||||
from .common import URI
|
from ._lancedb import connect as lancedb_connect
|
||||||
from .db import DBConnection, LanceDBConnection
|
from .common import URI, sanitize_uri
|
||||||
|
from .db import AsyncConnection, AsyncLanceDBConnection, DBConnection, LanceDBConnection
|
||||||
from .remote.db import RemoteDBConnection
|
from .remote.db import RemoteDBConnection
|
||||||
from .schema import vector # noqa: F401
|
from .schema import vector # noqa: F401
|
||||||
from .utils import sentry_log # noqa: F401
|
from .utils import sentry_log # noqa: F401
|
||||||
@@ -101,3 +102,74 @@ def connect(
|
|||||||
uri, api_key, region, host_override, request_thread_pool=request_thread_pool
|
uri, api_key, region, host_override, request_thread_pool=request_thread_pool
|
||||||
)
|
)
|
||||||
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)
|
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)
|
||||||
|
|
||||||
|
|
||||||
|
async def connect_async(
|
||||||
|
uri: URI,
|
||||||
|
*,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
region: str = "us-east-1",
|
||||||
|
host_override: Optional[str] = None,
|
||||||
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
|
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||||
|
) -> AsyncConnection:
|
||||||
|
"""Connect to a LanceDB database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
uri: str or Path
|
||||||
|
The uri of the database.
|
||||||
|
api_key: str, optional
|
||||||
|
If present, connect to LanceDB cloud.
|
||||||
|
Otherwise, connect to a database on file system or cloud storage.
|
||||||
|
Can be set via environment variable `LANCEDB_API_KEY`.
|
||||||
|
region: str, default "us-east-1"
|
||||||
|
The region to use for LanceDB Cloud.
|
||||||
|
host_override: str, optional
|
||||||
|
The override url for LanceDB Cloud.
|
||||||
|
read_consistency_interval: timedelta, default None
|
||||||
|
(For LanceDB OSS only)
|
||||||
|
The interval at which to check for updates to the table from other
|
||||||
|
processes. If None, then consistency is not checked. For performance
|
||||||
|
reasons, this is the default. For strong consistency, set this to
|
||||||
|
zero seconds. Then every read will check for updates from other
|
||||||
|
processes. As a compromise, you can set this to a non-zero timedelta
|
||||||
|
for eventual consistency. If more than that interval has passed since
|
||||||
|
the last check, then the table will be checked for updates. Note: this
|
||||||
|
consistency only applies to read operations. Write operations are
|
||||||
|
always consistent.
|
||||||
|
request_thread_pool: int or ThreadPoolExecutor, optional
|
||||||
|
The thread pool to use for making batch requests to the LanceDB Cloud API.
|
||||||
|
If an integer, then a ThreadPoolExecutor will be created with that
|
||||||
|
number of threads. If None, then a ThreadPoolExecutor will be created
|
||||||
|
with the default number of threads. If a ThreadPoolExecutor, then that
|
||||||
|
executor will be used for making requests. This is for LanceDB Cloud
|
||||||
|
only and is only used when making batch requests (i.e., passing in
|
||||||
|
multiple queries to the search method at once).
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
For a local directory, provide a path for the database:
|
||||||
|
|
||||||
|
>>> import lancedb
|
||||||
|
>>> db = lancedb.connect("~/.lancedb")
|
||||||
|
|
||||||
|
For object storage, use a URI prefix:
|
||||||
|
|
||||||
|
>>> db = lancedb.connect("s3://my-bucket/lancedb")
|
||||||
|
|
||||||
|
Connect to LancdDB cloud:
|
||||||
|
|
||||||
|
>>> db = lancedb.connect("db://my_database", api_key="ldb_...")
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
conn : DBConnection
|
||||||
|
A connection to a LanceDB database.
|
||||||
|
"""
|
||||||
|
return AsyncLanceDBConnection(
|
||||||
|
await lancedb_connect(
|
||||||
|
sanitize_uri(uri), api_key, region, host_override, read_consistency_interval
|
||||||
|
)
|
||||||
|
)
|
||||||
12
python/python/lancedb/_lancedb.pyi
Normal file
12
python/python/lancedb/_lancedb.pyi
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
class Connection(object):
|
||||||
|
async def table_names(self) -> list[str]: ...
|
||||||
|
|
||||||
|
async def connect(
|
||||||
|
uri: str,
|
||||||
|
api_key: Optional[str],
|
||||||
|
region: Optional[str],
|
||||||
|
host_override: Optional[str],
|
||||||
|
read_consistency_interval: Optional[float],
|
||||||
|
) -> Connection: ...
|
||||||
@@ -34,3 +34,7 @@ class Credential(str):
|
|||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "********"
|
return "********"
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_uri(uri: URI) -> str:
|
||||||
|
return str(uri)
|
||||||
@@ -28,6 +28,7 @@ from .util import fs_from_uri, get_uri_location, get_uri_scheme, join_uri
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from ._lancedb import Connection as LanceDbConnection
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
from .pydantic import LanceModel
|
from .pydantic import LanceModel
|
||||||
@@ -40,14 +41,21 @@ class DBConnection(EnforceOverrides):
|
|||||||
def table_names(
|
def table_names(
|
||||||
self, page_token: Optional[str] = None, limit: int = 10
|
self, page_token: Optional[str] = None, limit: int = 10
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List all table in this database
|
"""List all tables in this database, in sorted order
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
The token to use for pagination. If not present, start from the beginning.
|
||||||
|
Typically, this token is last table name from the previous page.
|
||||||
|
Only supported by LanceDb Cloud.
|
||||||
limit: int, default 10
|
limit: int, default 10
|
||||||
The size of the page to return.
|
The size of the page to return.
|
||||||
|
Only supported by LanceDb Cloud.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Iterable of str
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -412,3 +420,254 @@ class LanceDBConnection(DBConnection):
|
|||||||
def drop_database(self):
|
def drop_database(self):
|
||||||
filesystem, path = fs_from_uri(self.uri)
|
filesystem, path = fs_from_uri(self.uri)
|
||||||
filesystem.delete_dir(path)
|
filesystem.delete_dir(path)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncConnection(EnforceOverrides):
|
||||||
|
"""An active LanceDB connection interface."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def table_names(
|
||||||
|
self, *, page_token: Optional[str] = None, limit: int = 10
|
||||||
|
) -> Iterable[str]:
|
||||||
|
"""List all tables in this database, in sorted order
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
page_token: str, optional
|
||||||
|
The token to use for pagination. If not present, start from the beginning.
|
||||||
|
Typically, this token is last table name from the previous page.
|
||||||
|
Only supported by LanceDb Cloud.
|
||||||
|
limit: int, default 10
|
||||||
|
The size of the page to return.
|
||||||
|
Only supported by LanceDb Cloud.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Iterable of str
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def create_table(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
data: Optional[DATA] = None,
|
||||||
|
schema: Optional[Union[pa.Schema, LanceModel]] = None,
|
||||||
|
mode: str = "create",
|
||||||
|
exist_ok: bool = False,
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
|
) -> Table:
|
||||||
|
"""Create a [Table][lancedb.table.Table] in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
data: The data to initialize the table, *optional*
|
||||||
|
User must provide at least one of `data` or `schema`.
|
||||||
|
Acceptable types are:
|
||||||
|
|
||||||
|
- dict or list-of-dict
|
||||||
|
|
||||||
|
- pandas.DataFrame
|
||||||
|
|
||||||
|
- pyarrow.Table or pyarrow.RecordBatch
|
||||||
|
schema: The schema of the table, *optional*
|
||||||
|
Acceptable types are:
|
||||||
|
|
||||||
|
- pyarrow.Schema
|
||||||
|
|
||||||
|
- [LanceModel][lancedb.pydantic.LanceModel]
|
||||||
|
mode: str; default "create"
|
||||||
|
The mode to use when creating the table.
|
||||||
|
Can be either "create" or "overwrite".
|
||||||
|
By default, if the table already exists, an exception is raised.
|
||||||
|
If you want to overwrite the table, use mode="overwrite".
|
||||||
|
exist_ok: bool, default False
|
||||||
|
If a table by the same name already exists, then raise an exception
|
||||||
|
if exist_ok=False. If exist_ok=True, then open the existing table;
|
||||||
|
it will not add the provided data but will validate against any
|
||||||
|
schema that's specified.
|
||||||
|
on_bad_vectors: str, default "error"
|
||||||
|
What to do if any of the vectors are not the same size or contains NaNs.
|
||||||
|
One of "error", "drop", "fill".
|
||||||
|
fill_value: float
|
||||||
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceTable
|
||||||
|
A reference to the newly created table.
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
The vector index won't be created by default.
|
||||||
|
To create the index, call the `create_index` method on the table.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
Can create with list of tuples or dictionaries:
|
||||||
|
|
||||||
|
>>> import lancedb
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
|
||||||
|
... {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}]
|
||||||
|
>>> db.create_table("my_table", data)
|
||||||
|
LanceTable(connection=..., name="my_table")
|
||||||
|
>>> db["my_table"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: double
|
||||||
|
long: double
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
You can also pass a pandas DataFrame:
|
||||||
|
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> data = pd.DataFrame({
|
||||||
|
... "vector": [[1.1, 1.2], [0.2, 1.8]],
|
||||||
|
... "lat": [45.5, 40.1],
|
||||||
|
... "long": [-122.7, -74.1]
|
||||||
|
... })
|
||||||
|
>>> db.create_table("table2", data)
|
||||||
|
LanceTable(connection=..., name="table2")
|
||||||
|
>>> db["table2"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: double
|
||||||
|
long: double
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
Data is converted to Arrow before being written to disk. For maximum
|
||||||
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
|
>>> custom_schema = pa.schema([
|
||||||
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
|
... pa.field("lat", pa.float32()),
|
||||||
|
... pa.field("long", pa.float32())
|
||||||
|
... ])
|
||||||
|
>>> db.create_table("table3", data, schema = custom_schema)
|
||||||
|
LanceTable(connection=..., name="table3")
|
||||||
|
>>> db["table3"].head()
|
||||||
|
pyarrow.Table
|
||||||
|
vector: fixed_size_list<item: float>[2]
|
||||||
|
child 0, item: float
|
||||||
|
lat: float
|
||||||
|
long: float
|
||||||
|
----
|
||||||
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
|
lat: [[45.5,40.1]]
|
||||||
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
|
||||||
|
It is also possible to create an table from `[Iterable[pa.RecordBatch]]`:
|
||||||
|
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
|
>>> def make_batches():
|
||||||
|
... for i in range(5):
|
||||||
|
... yield pa.RecordBatch.from_arrays(
|
||||||
|
... [
|
||||||
|
... pa.array([[3.1, 4.1], [5.9, 26.5]],
|
||||||
|
... pa.list_(pa.float32(), 2)),
|
||||||
|
... pa.array(["foo", "bar"]),
|
||||||
|
... pa.array([10.0, 20.0]),
|
||||||
|
... ],
|
||||||
|
... ["vector", "item", "price"],
|
||||||
|
... )
|
||||||
|
>>> schema=pa.schema([
|
||||||
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
|
... pa.field("item", pa.utf8()),
|
||||||
|
... pa.field("price", pa.float32()),
|
||||||
|
... ])
|
||||||
|
>>> db.create_table("table4", make_batches(), schema=schema)
|
||||||
|
LanceTable(connection=..., name="table4")
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def open_table(self, name: str) -> Table:
|
||||||
|
"""Open a Lance Table in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
A LanceTable object representing the table.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def drop_table(self, name: str):
|
||||||
|
"""Drop a table from the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the table.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def drop_database(self):
|
||||||
|
"""
|
||||||
|
Drop database
|
||||||
|
This is the same thing as dropping all the tables
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncLanceDBConnection(AsyncConnection):
|
||||||
|
def __init__(self, connection: LanceDbConnection):
|
||||||
|
self._inner = connection
|
||||||
|
|
||||||
|
async def __repr__(self) -> str:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def table_names(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
page_token=None,
|
||||||
|
limit=None,
|
||||||
|
) -> Iterable[str]:
|
||||||
|
return await self._inner.table_names()
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def create_table(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
data: Optional[DATA] = None,
|
||||||
|
schema: Optional[Union[pa.Schema, LanceModel]] = None,
|
||||||
|
mode: str = "create",
|
||||||
|
exist_ok: bool = False,
|
||||||
|
on_bad_vectors: str = "error",
|
||||||
|
fill_value: float = 0.0,
|
||||||
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
|
) -> LanceTable:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def open_table(self, name: str) -> LanceTable:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def drop_table(self, name: str, ignore_missing: bool = False):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def drop_database(self):
|
||||||
|
raise NotImplementedError
|
||||||
@@ -16,7 +16,7 @@ from __future__ import annotations
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Type, Union
|
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
import deprecation
|
import deprecation
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -93,7 +93,7 @@ class Query(pydantic.BaseModel):
|
|||||||
metric: str = "L2"
|
metric: str = "L2"
|
||||||
|
|
||||||
# which columns to return in the results
|
# which columns to return in the results
|
||||||
columns: Optional[List[str]] = None
|
columns: Optional[Union[List[str], Dict[str, str]]] = None
|
||||||
|
|
||||||
# optional query parameters for tuning the results,
|
# optional query parameters for tuning the results,
|
||||||
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
# e.g. `{"nprobes": "10", "refine_factor": "10"}`
|
||||||
@@ -321,20 +321,27 @@ class LanceQueryBuilder(ABC):
|
|||||||
self._limit = limit
|
self._limit = limit
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def select(self, columns: list) -> LanceQueryBuilder:
|
def select(self, columns: Union[list[str], dict[str, str]]) -> LanceQueryBuilder:
|
||||||
"""Set the columns to return.
|
"""Set the columns to return.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
columns: list
|
columns: list of str, or dict of str to str default None
|
||||||
The columns to return.
|
List of column names to be fetched.
|
||||||
|
Or a dictionary of column names to SQL expressions.
|
||||||
|
All columns are fetched if None or unspecified.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
LanceQueryBuilder
|
LanceQueryBuilder
|
||||||
The LanceQueryBuilder object.
|
The LanceQueryBuilder object.
|
||||||
"""
|
"""
|
||||||
self._columns = columns
|
if isinstance(columns, list):
|
||||||
|
self._columns = columns
|
||||||
|
elif isinstance(columns, dict):
|
||||||
|
self._columns = list(columns.items())
|
||||||
|
else:
|
||||||
|
raise ValueError("columns must be a list or a dictionary")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
|
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
|
||||||
@@ -392,7 +399,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
>>> (table.search([0.4, 0.4])
|
>>> (table.search([0.4, 0.4])
|
||||||
... .metric("cosine")
|
... .metric("cosine")
|
||||||
... .where("b < 10")
|
... .where("b < 10")
|
||||||
... .select(["b"])
|
... .select(["b", "vector"])
|
||||||
... .limit(2)
|
... .limit(2)
|
||||||
... .to_pandas())
|
... .to_pandas())
|
||||||
b vector _distance
|
b vector _distance
|
||||||
@@ -15,7 +15,7 @@ import logging
|
|||||||
import uuid
|
import uuid
|
||||||
from concurrent.futures import Future
|
from concurrent.futures import Future
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Dict, Optional, Union
|
from typing import Dict, Iterable, Optional, Union
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from lance import json_to_schema
|
from lance import json_to_schema
|
||||||
@@ -473,6 +473,21 @@ class RemoteTable(Table):
|
|||||||
"count_rows() is not yet supported on the LanceDB cloud"
|
"count_rows() is not yet supported on the LanceDB cloud"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
|
raise NotImplementedError(
|
||||||
|
"add_columns() is not yet supported on the LanceDB cloud"
|
||||||
|
)
|
||||||
|
|
||||||
|
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
||||||
|
raise NotImplementedError(
|
||||||
|
"alter_columns() is not yet supported on the LanceDB cloud"
|
||||||
|
)
|
||||||
|
|
||||||
|
def drop_columns(self, columns: Iterable[str]):
|
||||||
|
raise NotImplementedError(
|
||||||
|
"drop_columns() is not yet supported on the LanceDB cloud"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
||||||
return tbl.add_column(
|
return tbl.add_column(
|
||||||
@@ -160,7 +160,7 @@ class Table(ABC):
|
|||||||
|
|
||||||
Can query the table with [Table.search][lancedb.table.Table.search].
|
Can query the table with [Table.search][lancedb.table.Table.search].
|
||||||
|
|
||||||
>>> table.search([0.4, 0.4]).select(["b"]).to_pandas()
|
>>> table.search([0.4, 0.4]).select(["b", "vector"]).to_pandas()
|
||||||
b vector _distance
|
b vector _distance
|
||||||
0 4 [0.5, 1.3] 0.82
|
0 4 [0.5, 1.3] 0.82
|
||||||
1 2 [1.1, 1.2] 1.13
|
1 2 [1.1, 1.2] 1.13
|
||||||
@@ -436,7 +436,7 @@ class Table(ABC):
|
|||||||
>>> query = [0.4, 1.4, 2.4]
|
>>> query = [0.4, 1.4, 2.4]
|
||||||
>>> (table.search(query)
|
>>> (table.search(query)
|
||||||
... .where("original_width > 1000", prefilter=True)
|
... .where("original_width > 1000", prefilter=True)
|
||||||
... .select(["caption", "original_width"])
|
... .select(["caption", "original_width", "vector"])
|
||||||
... .limit(2)
|
... .limit(2)
|
||||||
... .to_pandas())
|
... .to_pandas())
|
||||||
caption original_width vector _distance
|
caption original_width vector _distance
|
||||||
@@ -660,6 +660,56 @@ class Table(ABC):
|
|||||||
For most cases, the default should be fine.
|
For most cases, the default should be fine.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
|
"""
|
||||||
|
Add new columns with defined values.
|
||||||
|
|
||||||
|
This is not yet available in LanceDB Cloud.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
transforms: Dict[str, str]
|
||||||
|
A map of column name to a SQL expression to use to calculate the
|
||||||
|
value of the new column. These expressions will be evaluated for
|
||||||
|
each row in the table, and can reference existing columns.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def alter_columns(self, alterations: Iterable[Dict[str, str]]):
|
||||||
|
"""
|
||||||
|
Alter column names and nullability.
|
||||||
|
|
||||||
|
This is not yet available in LanceDB Cloud.
|
||||||
|
|
||||||
|
alterations : Iterable[Dict[str, Any]]
|
||||||
|
A sequence of dictionaries, each with the following keys:
|
||||||
|
- "path": str
|
||||||
|
The column path to alter. For a top-level column, this is the name.
|
||||||
|
For a nested column, this is the dot-separated path, e.g. "a.b.c".
|
||||||
|
- "name": str, optional
|
||||||
|
The new name of the column. If not specified, the column name is
|
||||||
|
not changed.
|
||||||
|
- "nullable": bool, optional
|
||||||
|
Whether the column should be nullable. If not specified, the column
|
||||||
|
nullability is not changed. Only non-nullable columns can be changed
|
||||||
|
to nullable. Currently, you cannot change a nullable column to
|
||||||
|
non-nullable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def drop_columns(self, columns: Iterable[str]):
|
||||||
|
"""
|
||||||
|
Drop columns from the table.
|
||||||
|
|
||||||
|
This is not yet available in LanceDB Cloud.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns : Iterable[str]
|
||||||
|
The names of the columns to drop.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class _LanceDatasetRef(ABC):
|
class _LanceDatasetRef(ABC):
|
||||||
@property
|
@property
|
||||||
@@ -1219,7 +1269,7 @@ class LanceTable(Table):
|
|||||||
>>> query = [0.4, 1.4, 2.4]
|
>>> query = [0.4, 1.4, 2.4]
|
||||||
>>> (table.search(query)
|
>>> (table.search(query)
|
||||||
... .where("original_width > 1000", prefilter=True)
|
... .where("original_width > 1000", prefilter=True)
|
||||||
... .select(["caption", "original_width"])
|
... .select(["caption", "original_width", "vector"])
|
||||||
... .limit(2)
|
... .limit(2)
|
||||||
... .to_pandas())
|
... .to_pandas())
|
||||||
caption original_width vector _distance
|
caption original_width vector _distance
|
||||||
@@ -1536,6 +1586,22 @@ class LanceTable(Table):
|
|||||||
"""
|
"""
|
||||||
return self.to_lance().optimize.compact_files(*args, **kwargs)
|
return self.to_lance().optimize.compact_files(*args, **kwargs)
|
||||||
|
|
||||||
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
|
self._dataset_mut.add_columns(transforms)
|
||||||
|
|
||||||
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
|
modified = []
|
||||||
|
# I called this name in pylance, but I think I regret that now. So we
|
||||||
|
# allow both name and rename.
|
||||||
|
for alter in alterations:
|
||||||
|
if "rename" in alter:
|
||||||
|
alter["name"] = alter.pop("rename")
|
||||||
|
modified.append(alter)
|
||||||
|
self._dataset_mut.alter_columns(*modified)
|
||||||
|
|
||||||
|
def drop_columns(self, columns: Iterable[str]):
|
||||||
|
self._dataset_mut.drop_columns(columns)
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_schema(
|
def _sanitize_schema(
|
||||||
data: pa.Table,
|
data: pa.Table,
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
from lancedb.cli.cli import cli
|
from lancedb.cli.cli import cli
|
||||||
from lancedb.utils import CONFIG
|
from lancedb.utils import CONFIG
|
||||||
|
|
||||||
@@ -13,7 +13,6 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.context import contextualize
|
from lancedb.context import contextualize
|
||||||
|
|
||||||
|
|
||||||
@@ -11,12 +11,11 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
|
|
||||||
|
|
||||||
@@ -166,6 +165,24 @@ def test_table_names(tmp_path):
|
|||||||
assert db.table_names() == ["test1", "test2", "test3"]
|
assert db.table_names() == ["test1", "test2", "test3"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_table_names_async(tmp_path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||||
|
"item": ["foo", "bar"],
|
||||||
|
"price": [10.0, 20.0],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
db.create_table("test2", data=data)
|
||||||
|
db.create_table("test1", data=data)
|
||||||
|
db.create_table("test3", data=data)
|
||||||
|
|
||||||
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
assert await db.table_names() == ["test1", "test2", "test3"]
|
||||||
|
|
||||||
|
|
||||||
def test_create_mode(tmp_path):
|
def test_create_mode(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
data = pd.DataFrame(
|
data = pd.DataFrame(
|
||||||
@@ -13,7 +13,6 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb import LanceDBConnection
|
from lancedb import LanceDBConnection
|
||||||
|
|
||||||
# TODO: setup integ test mark and script
|
# TODO: setup integ test mark and script
|
||||||
@@ -13,11 +13,10 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.conftest import MockTextEmbeddingFunction
|
from lancedb.conftest import MockTextEmbeddingFunction
|
||||||
from lancedb.embeddings import (
|
from lancedb.embeddings import (
|
||||||
EmbeddingFunctionConfig,
|
EmbeddingFunctionConfig,
|
||||||
@@ -14,12 +14,11 @@ import importlib
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.embeddings import get_registry
|
from lancedb.embeddings import get_registry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
|
|
||||||
@@ -185,10 +184,9 @@ def test_imagebind(tmp_path):
|
|||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
import lancedb.embeddings.imagebind
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
import lancedb.embeddings.imagebind
|
|
||||||
from lancedb.embeddings import get_registry
|
from lancedb.embeddings import get_registry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
|
|
||||||
@@ -14,13 +14,13 @@ import os
|
|||||||
import random
|
import random
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
import lancedb as ldb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
import tantivy
|
|
||||||
|
|
||||||
import lancedb as ldb
|
pytest.importorskip("lancedb.fts")
|
||||||
import lancedb.fts
|
tantivy = pytest.importorskip("tantivy")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -13,9 +13,8 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
|
import pytest
|
||||||
|
|
||||||
# You need to setup AWS credentials an a base path to run this test. Example
|
# You need to setup AWS credentials an a base path to run this test. Example
|
||||||
# AWS_PROFILE=default TEST_S3_BASE_URL=s3://my_bucket/dataset pytest tests/test_io.py
|
# AWS_PROFILE=default TEST_S3_BASE_URL=s3://my_bucket/dataset pytest tests/test_io.py
|
||||||
@@ -20,9 +20,8 @@ from typing import List, Optional, Tuple
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pydantic
|
import pydantic
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import Field
|
|
||||||
|
|
||||||
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
|
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
|
||||||
|
from pydantic import Field
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
@@ -18,7 +18,6 @@ import numpy as np
|
|||||||
import pandas.testing as tm
|
import pandas.testing as tm
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.db import LanceDBConnection
|
from lancedb.db import LanceDBConnection
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
from lancedb.query import LanceVectorQueryBuilder, Query
|
from lancedb.query import LanceVectorQueryBuilder, Query
|
||||||
@@ -88,7 +87,7 @@ def test_query_builder(table):
|
|||||||
rs = (
|
rs = (
|
||||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||||
.limit(1)
|
.limit(1)
|
||||||
.select(["id"])
|
.select(["id", "vector"])
|
||||||
.to_list()
|
.to_list()
|
||||||
)
|
)
|
||||||
assert rs[0]["id"] == 1
|
assert rs[0]["id"] == 1
|
||||||
@@ -17,7 +17,6 @@ import pandas as pd
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
|
|
||||||
from lancedb.remote.client import RestfulLanceDBClient, VectorQuery
|
from lancedb.remote.client import RestfulLanceDBClient, VectorQuery
|
||||||
|
|
||||||
|
|
||||||
@@ -11,9 +11,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
|
import pyarrow as pa
|
||||||
from lancedb.remote.client import VectorQuery, VectorQueryResult
|
from lancedb.remote.client import VectorQuery, VectorQueryResult
|
||||||
|
|
||||||
|
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.conftest import MockTextEmbeddingFunction # noqa
|
from lancedb.conftest import MockTextEmbeddingFunction # noqa
|
||||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
@@ -15,6 +14,9 @@ from lancedb.rerankers import (
|
|||||||
)
|
)
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
|
# Tests rely on FTS index
|
||||||
|
pytest.importorskip("lancedb.fts")
|
||||||
|
|
||||||
|
|
||||||
def get_test_table(tmp_path):
|
def get_test_table(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
@@ -20,19 +20,18 @@ from typing import List
|
|||||||
from unittest.mock import PropertyMock, patch
|
from unittest.mock import PropertyMock, patch
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.conftest import MockTextEmbeddingFunction
|
from lancedb.conftest import MockTextEmbeddingFunction
|
||||||
from lancedb.db import LanceDBConnection
|
from lancedb.db import LanceDBConnection
|
||||||
from lancedb.embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
from lancedb.embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class MockDB:
|
class MockDB:
|
||||||
@@ -804,6 +803,9 @@ def test_count_rows(db):
|
|||||||
|
|
||||||
|
|
||||||
def test_hybrid_search(db, tmp_path):
|
def test_hybrid_search(db, tmp_path):
|
||||||
|
# This test uses an FTS index
|
||||||
|
pytest.importorskip("lancedb.fts")
|
||||||
|
|
||||||
db = MockDB(str(tmp_path))
|
db = MockDB(str(tmp_path))
|
||||||
# Create a LanceDB table schema with a vector and a text column
|
# Create a LanceDB table schema with a vector and a text column
|
||||||
emb = EmbeddingFunctionRegistry.get_instance().get("test")()
|
emb = EmbeddingFunctionRegistry.get_instance().get("test")()
|
||||||
@@ -898,3 +900,29 @@ def test_restore_consistency(tmp_path):
|
|||||||
table.add([{"id": 2}])
|
table.add([{"id": 2}])
|
||||||
assert table_fixed.version == table.version - 1
|
assert table_fixed.version == table.version - 1
|
||||||
assert table_ref_latest.version == table.version
|
assert table_ref_latest.version == table.version
|
||||||
|
|
||||||
|
|
||||||
|
# Schema evolution
|
||||||
|
def test_add_columns(tmp_path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pa.table({"id": [0, 1]})
|
||||||
|
table = LanceTable.create(db, "my_table", data=data)
|
||||||
|
table.add_columns({"new_col": "id + 2"})
|
||||||
|
assert table.to_arrow().column_names == ["id", "new_col"]
|
||||||
|
assert table.to_arrow()["new_col"].to_pylist() == [2, 3]
|
||||||
|
|
||||||
|
|
||||||
|
def test_alter_columns(tmp_path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pa.table({"id": [0, 1]})
|
||||||
|
table = LanceTable.create(db, "my_table", data=data)
|
||||||
|
table.alter_columns({"path": "id", "rename": "new_id"})
|
||||||
|
assert table.to_arrow().column_names == ["new_id"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_drop_columns(tmp_path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
|
||||||
|
table = LanceTable.create(db, "my_table", data=data)
|
||||||
|
table.drop_columns(["category"])
|
||||||
|
assert table.to_arrow().column_names == ["id"]
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
|
import pytest
|
||||||
from lancedb.utils.events import _Events
|
from lancedb.utils.events import _Events
|
||||||
|
|
||||||
|
|
||||||
@@ -15,7 +15,6 @@ import os
|
|||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.util import get_uri_scheme, join_uri
|
from lancedb.util import get_uri_scheme, join_uri
|
||||||
|
|
||||||
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import setuptools
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
setuptools.setup()
|
|
||||||
66
python/src/connection.rs
Normal file
66
python/src/connection.rs
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
// Copyright 2024 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use lancedb::connection::Connection as LanceConnection;
|
||||||
|
use pyo3::{pyclass, pyfunction, pymethods, PyAny, PyRef, PyResult, Python};
|
||||||
|
use pyo3_asyncio::tokio::future_into_py;
|
||||||
|
|
||||||
|
use crate::error::PythonErrorExt;
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub struct Connection {
|
||||||
|
inner: LanceConnection,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Connection {
|
||||||
|
pub fn table_names(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
||||||
|
let inner = self_.inner.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.table_names().await.infer_error()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn connect(
|
||||||
|
py: Python,
|
||||||
|
uri: String,
|
||||||
|
api_key: Option<String>,
|
||||||
|
region: Option<String>,
|
||||||
|
host_override: Option<String>,
|
||||||
|
read_consistency_interval: Option<f64>,
|
||||||
|
) -> PyResult<&PyAny> {
|
||||||
|
future_into_py(py, async move {
|
||||||
|
let mut builder = lancedb::connect(&uri);
|
||||||
|
if let Some(api_key) = api_key {
|
||||||
|
builder = builder.api_key(&api_key);
|
||||||
|
}
|
||||||
|
if let Some(region) = region {
|
||||||
|
builder = builder.region(®ion);
|
||||||
|
}
|
||||||
|
if let Some(host_override) = host_override {
|
||||||
|
builder = builder.host_override(&host_override);
|
||||||
|
}
|
||||||
|
if let Some(read_consistency_interval) = read_consistency_interval {
|
||||||
|
let read_consistency_interval = Duration::from_secs_f64(read_consistency_interval);
|
||||||
|
builder = builder.read_consistency_interval(read_consistency_interval);
|
||||||
|
}
|
||||||
|
Ok(Connection {
|
||||||
|
inner: builder.execute().await.infer_error()?,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
61
python/src/error.rs
Normal file
61
python/src/error.rs
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
// Copyright 2024 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use pyo3::{
|
||||||
|
exceptions::{PyOSError, PyRuntimeError, PyValueError},
|
||||||
|
PyResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
use lancedb::error::Error as LanceError;
|
||||||
|
|
||||||
|
pub trait PythonErrorExt<T> {
|
||||||
|
/// Convert to a python error based on the Lance error type
|
||||||
|
fn infer_error(self) -> PyResult<T>;
|
||||||
|
/// Convert to OSError
|
||||||
|
fn os_error(self) -> PyResult<T>;
|
||||||
|
/// Convert to RuntimeError
|
||||||
|
fn runtime_error(self) -> PyResult<T>;
|
||||||
|
/// Convert to ValueError
|
||||||
|
fn value_error(self) -> PyResult<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||||
|
fn infer_error(self) -> PyResult<T> {
|
||||||
|
match &self {
|
||||||
|
Ok(_) => Ok(self.unwrap()),
|
||||||
|
Err(err) => match err {
|
||||||
|
LanceError::InvalidTableName { .. } => self.value_error(),
|
||||||
|
LanceError::TableNotFound { .. } => self.value_error(),
|
||||||
|
LanceError::TableAlreadyExists { .. } => self.runtime_error(),
|
||||||
|
LanceError::CreateDir { .. } => self.os_error(),
|
||||||
|
LanceError::Store { .. } => self.runtime_error(),
|
||||||
|
LanceError::Lance { .. } => self.runtime_error(),
|
||||||
|
LanceError::Schema { .. } => self.value_error(),
|
||||||
|
LanceError::Runtime { .. } => self.runtime_error(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn os_error(self) -> PyResult<T> {
|
||||||
|
self.map_err(|err| PyOSError::new_err(err.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn runtime_error(self) -> PyResult<T> {
|
||||||
|
self.map_err(|err| PyRuntimeError::new_err(err.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn value_error(self) -> PyResult<T> {
|
||||||
|
self.map_err(|err| PyValueError::new_err(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
32
python/src/lib.rs
Normal file
32
python/src/lib.rs
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
// Copyright 2024 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use connection::{connect, Connection};
|
||||||
|
use env_logger::Env;
|
||||||
|
use pyo3::{pymodule, types::PyModule, wrap_pyfunction, PyResult, Python};
|
||||||
|
|
||||||
|
pub mod connection;
|
||||||
|
pub(crate) mod error;
|
||||||
|
|
||||||
|
#[pymodule]
|
||||||
|
pub fn _lancedb(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
|
let env = Env::new()
|
||||||
|
.filter_or("LANCEDB_LOG", "warn")
|
||||||
|
.write_style("LANCEDB_LOG_STYLE");
|
||||||
|
env_logger::init_from_env(env);
|
||||||
|
m.add_class::<Connection>()?;
|
||||||
|
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||||
|
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -286,5 +286,8 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
|||||||
index::vector::table_create_vector_index,
|
index::vector::table_create_vector_index,
|
||||||
)?;
|
)?;
|
||||||
cx.export_function("tableSchema", JsTable::js_schema)?;
|
cx.export_function("tableSchema", JsTable::js_schema)?;
|
||||||
|
cx.export_function("tableAddColumns", JsTable::js_add_columns)?;
|
||||||
|
cx.export_function("tableAlterColumns", JsTable::js_alter_columns)?;
|
||||||
|
cx.export_function("tableDropColumns", JsTable::js_drop_columns)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ use std::ops::Deref;
|
|||||||
|
|
||||||
use arrow_array::{RecordBatch, RecordBatchIterator};
|
use arrow_array::{RecordBatch, RecordBatchIterator};
|
||||||
use lance::dataset::optimize::CompactionOptions;
|
use lance::dataset::optimize::CompactionOptions;
|
||||||
use lance::dataset::{WriteMode, WriteParams};
|
use lance::dataset::{ColumnAlteration, NewColumnTransform, WriteMode, WriteParams};
|
||||||
use lance::io::ObjectStoreParams;
|
use lance::io::ObjectStoreParams;
|
||||||
use lancedb::table::{AddDataOptions, OptimizeAction, WriteOptions};
|
use lancedb::table::{AddDataOptions, OptimizeAction, WriteOptions};
|
||||||
|
|
||||||
@@ -544,4 +544,118 @@ impl JsTable {
|
|||||||
});
|
});
|
||||||
Ok(promise)
|
Ok(promise)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn js_add_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let expressions = cx
|
||||||
|
.argument::<JsArray>(0)?
|
||||||
|
.to_vec(&mut cx)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|val| {
|
||||||
|
let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
|
||||||
|
let name = obj.get::<JsString, _, _>(&mut cx, "name")?.value(&mut cx);
|
||||||
|
let sql = obj
|
||||||
|
.get::<JsString, _, _>(&mut cx, "valueSql")?
|
||||||
|
.value(&mut cx);
|
||||||
|
Ok((name, sql))
|
||||||
|
})
|
||||||
|
.collect::<NeonResult<Vec<(String, String)>>>()?;
|
||||||
|
|
||||||
|
let transforms = NewColumnTransform::SqlExpressions(expressions);
|
||||||
|
|
||||||
|
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
let channel = cx.channel();
|
||||||
|
let table = js_table.table.clone();
|
||||||
|
|
||||||
|
rt.spawn(async move {
|
||||||
|
let result = table.add_columns(transforms, None).await;
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
result.or_throw(&mut cx)?;
|
||||||
|
Ok(cx.undefined())
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn js_alter_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let alterations = cx
|
||||||
|
.argument::<JsArray>(0)?
|
||||||
|
.to_vec(&mut cx)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|val| {
|
||||||
|
let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
|
||||||
|
let path = obj.get::<JsString, _, _>(&mut cx, "path")?.value(&mut cx);
|
||||||
|
let rename = obj
|
||||||
|
.get_opt::<JsString, _, _>(&mut cx, "rename")?
|
||||||
|
.map(|val| val.value(&mut cx));
|
||||||
|
let nullable = obj
|
||||||
|
.get_opt::<JsBoolean, _, _>(&mut cx, "nullable")?
|
||||||
|
.map(|val| val.value(&mut cx));
|
||||||
|
// TODO: support data type here. Will need to do some serialization/deserialization
|
||||||
|
|
||||||
|
if rename.is_none() && nullable.is_none() {
|
||||||
|
return cx.throw_error("At least one of 'name' or 'nullable' must be provided");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ColumnAlteration {
|
||||||
|
path,
|
||||||
|
rename,
|
||||||
|
nullable,
|
||||||
|
// TODO: wire up this field
|
||||||
|
data_type: None,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<NeonResult<Vec<ColumnAlteration>>>()?;
|
||||||
|
|
||||||
|
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
let channel = cx.channel();
|
||||||
|
let table = js_table.table.clone();
|
||||||
|
|
||||||
|
rt.spawn(async move {
|
||||||
|
let result = table.alter_columns(&alterations).await;
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
result.or_throw(&mut cx)?;
|
||||||
|
Ok(cx.undefined())
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn js_drop_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
|
let columns = cx
|
||||||
|
.argument::<JsArray>(0)?
|
||||||
|
.to_vec(&mut cx)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|val| {
|
||||||
|
Ok(val
|
||||||
|
.downcast_or_throw::<JsString, _>(&mut cx)?
|
||||||
|
.value(&mut cx))
|
||||||
|
})
|
||||||
|
.collect::<NeonResult<Vec<String>>>()?;
|
||||||
|
|
||||||
|
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
||||||
|
let rt = runtime(&mut cx)?;
|
||||||
|
|
||||||
|
let (deferred, promise) = cx.promise();
|
||||||
|
let channel = cx.channel();
|
||||||
|
let table = js_table.table.clone();
|
||||||
|
|
||||||
|
rt.spawn(async move {
|
||||||
|
let col_refs = columns.iter().map(|s| s.as_str()).collect::<Vec<_>>();
|
||||||
|
let result = table.drop_columns(&col_refs).await;
|
||||||
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
|
result.or_throw(&mut cx)?;
|
||||||
|
Ok(cx.undefined())
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(promise)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -623,7 +623,17 @@ impl ConnectionInternal for Database {
|
|||||||
async fn drop_table(&self, name: &str) -> Result<()> {
|
async fn drop_table(&self, name: &str) -> Result<()> {
|
||||||
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||||
let full_path = self.base_path.child(dir_name.clone());
|
let full_path = self.base_path.child(dir_name.clone());
|
||||||
self.object_store.remove_dir_all(full_path).await?;
|
self.object_store
|
||||||
|
.remove_dir_all(full_path)
|
||||||
|
.await
|
||||||
|
.map_err(|err| match err {
|
||||||
|
// this error is not lance::Error::DatasetNotFound,
|
||||||
|
// as the method `remove_dir_all` may be used to remove something not be a dataset
|
||||||
|
lance::Error::NotFound { .. } => Error::TableNotFound {
|
||||||
|
name: name.to_owned(),
|
||||||
|
},
|
||||||
|
_ => Error::from(err),
|
||||||
|
})?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -634,8 +644,6 @@ impl ConnectionInternal for Database {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::fs::create_dir_all;
|
|
||||||
|
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
@@ -691,13 +699,46 @@ mod tests {
|
|||||||
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
#[ignore = "this can't pass due to https://github.com/lancedb/lancedb/issues/1019, enable it after the bug fixed"]
|
||||||
|
async fn test_open_table() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
|
let db = connect(uri).execute().await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(db.table_names().await.unwrap().len(), 0);
|
||||||
|
// open non-exist table
|
||||||
|
assert!(matches!(
|
||||||
|
db.open_table("invalid_table").execute().await,
|
||||||
|
Err(crate::Error::TableNotFound { .. })
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(db.table_names().await.unwrap().len(), 0);
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||||
|
db.create_empty_table("table1", schema)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
db.open_table("table1").execute().await.unwrap();
|
||||||
|
let tables = db.table_names().await.unwrap();
|
||||||
|
assert_eq!(tables, vec!["table1".to_owned()]);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn drop_table() {
|
async fn drop_table() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
|
||||||
|
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
let db = connect(uri).execute().await.unwrap();
|
let db = connect(uri).execute().await.unwrap();
|
||||||
|
|
||||||
|
// drop non-exist table
|
||||||
|
assert!(matches!(
|
||||||
|
db.drop_table("invalid_table").await,
|
||||||
|
Err(crate::Error::TableNotFound { .. }),
|
||||||
|
));
|
||||||
|
|
||||||
|
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||||
db.drop_table("table1").await.unwrap();
|
db.drop_table("table1").await.unwrap();
|
||||||
|
|
||||||
let tables = db.table_names().await.unwrap();
|
let tables = db.table_names().await.unwrap();
|
||||||
|
|||||||
@@ -24,6 +24,13 @@ use crate::Error;
|
|||||||
|
|
||||||
const DEFAULT_TOP_K: usize = 10;
|
const DEFAULT_TOP_K: usize = 10;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Select {
|
||||||
|
All,
|
||||||
|
Simple(Vec<String>),
|
||||||
|
Projection(Vec<(String, String)>),
|
||||||
|
}
|
||||||
|
|
||||||
/// A builder for nearest neighbor queries for LanceDB.
|
/// A builder for nearest neighbor queries for LanceDB.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Query {
|
pub struct Query {
|
||||||
@@ -44,7 +51,7 @@ pub struct Query {
|
|||||||
/// Apply filter to the returned rows.
|
/// Apply filter to the returned rows.
|
||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
/// Select column projection.
|
/// Select column projection.
|
||||||
select: Option<Vec<String>>,
|
select: Select,
|
||||||
|
|
||||||
/// Default is true. Set to false to enforce a brute force search.
|
/// Default is true. Set to false to enforce a brute force search.
|
||||||
use_index: bool,
|
use_index: bool,
|
||||||
@@ -70,7 +77,7 @@ impl Query {
|
|||||||
metric_type: None,
|
metric_type: None,
|
||||||
use_index: true,
|
use_index: true,
|
||||||
filter: None,
|
filter: None,
|
||||||
select: None,
|
select: Select::All,
|
||||||
prefilter: false,
|
prefilter: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -115,7 +122,16 @@ impl Query {
|
|||||||
scanner.use_index(self.use_index);
|
scanner.use_index(self.use_index);
|
||||||
scanner.prefilter(self.prefilter);
|
scanner.prefilter(self.prefilter);
|
||||||
|
|
||||||
self.select.as_ref().map(|p| scanner.project(p.as_slice()));
|
match &self.select {
|
||||||
|
Select::Simple(select) => {
|
||||||
|
scanner.project(select.as_slice())?;
|
||||||
|
}
|
||||||
|
Select::Projection(select_with_transform) => {
|
||||||
|
scanner.project_with_transform(select_with_transform.as_slice())?;
|
||||||
|
}
|
||||||
|
Select::All => { /* Do nothing */ }
|
||||||
|
}
|
||||||
|
|
||||||
self.filter.as_ref().map(|f| scanner.filter(f));
|
self.filter.as_ref().map(|f| scanner.filter(f));
|
||||||
self.refine_factor.map(|rf| scanner.refine(rf));
|
self.refine_factor.map(|rf| scanner.refine(rf));
|
||||||
self.metric_type.map(|mt| scanner.distance_metric(mt));
|
self.metric_type.map(|mt| scanner.distance_metric(mt));
|
||||||
@@ -206,7 +222,23 @@ impl Query {
|
|||||||
///
|
///
|
||||||
/// Only select the specified columns. If not specified, all columns will be returned.
|
/// Only select the specified columns. If not specified, all columns will be returned.
|
||||||
pub fn select(mut self, columns: &[impl AsRef<str>]) -> Self {
|
pub fn select(mut self, columns: &[impl AsRef<str>]) -> Self {
|
||||||
self.select = Some(columns.iter().map(|c| c.as_ref().to_string()).collect());
|
self.select = Select::Simple(columns.iter().map(|c| c.as_ref().to_string()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return only the specified columns.
|
||||||
|
///
|
||||||
|
/// Only select the specified columns. If not specified, all columns will be returned.
|
||||||
|
pub fn select_with_projection(
|
||||||
|
mut self,
|
||||||
|
columns: &[(impl AsRef<str>, impl AsRef<str>)],
|
||||||
|
) -> Self {
|
||||||
|
self.select = Select::Projection(
|
||||||
|
columns
|
||||||
|
.iter()
|
||||||
|
.map(|(c, t)| (c.as_ref().to_string(), t.as_ref().to_string()))
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,7 +258,7 @@ mod tests {
|
|||||||
RecordBatchReader,
|
RecordBatchReader,
|
||||||
};
|
};
|
||||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||||
use futures::StreamExt;
|
use futures::{StreamExt, TryStreamExt};
|
||||||
use lance::dataset::Dataset;
|
use lance::dataset::Dataset;
|
||||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
@@ -294,6 +326,38 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_select_with_transform() {
|
||||||
|
let batches = make_non_empty_batches();
|
||||||
|
let ds = Dataset::write(batches, "memory://foo", None).await.unwrap();
|
||||||
|
|
||||||
|
let ds = DatasetConsistencyWrapper::new_latest(ds, None);
|
||||||
|
|
||||||
|
let query = Query::new(ds)
|
||||||
|
.limit(10)
|
||||||
|
.select_with_projection(&[("id2", "id * 2"), ("id", "id")]);
|
||||||
|
let result = query.execute_stream().await;
|
||||||
|
let mut batches = result
|
||||||
|
.expect("should have result")
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(batches.len(), 1);
|
||||||
|
let batch = batches.pop().unwrap();
|
||||||
|
|
||||||
|
// id, and id2
|
||||||
|
assert_eq!(batch.num_columns(), 2);
|
||||||
|
|
||||||
|
let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
|
||||||
|
let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
|
||||||
|
|
||||||
|
id.iter().zip(id2.iter()).for_each(|(id, id2)| {
|
||||||
|
let id = id.unwrap();
|
||||||
|
let id2 = id2.unwrap();
|
||||||
|
assert_eq!(id * 2, id2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_execute_no_vector() {
|
async fn test_execute_no_vector() {
|
||||||
// test that it's ok to not specify a query vector (just filter / limit)
|
// test that it's ok to not specify a query vector (just filter / limit)
|
||||||
|
|||||||
@@ -27,7 +27,10 @@ use lance::dataset::optimize::{
|
|||||||
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
||||||
};
|
};
|
||||||
pub use lance::dataset::ReadParams;
|
pub use lance::dataset::ReadParams;
|
||||||
use lance::dataset::{Dataset, UpdateBuilder, WhenMatched, WriteMode, WriteParams};
|
use lance::dataset::{
|
||||||
|
ColumnAlteration, Dataset, NewColumnTransform, UpdateBuilder, WhenMatched, WriteMode,
|
||||||
|
WriteParams,
|
||||||
|
};
|
||||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use lance_index::{optimize::OptimizeOptions, DatasetIndexExt};
|
use lance_index::{optimize::OptimizeOptions, DatasetIndexExt};
|
||||||
@@ -376,6 +379,19 @@ pub trait Table: std::fmt::Display + Send + Sync {
|
|||||||
/// Modeled after ``VACUUM`` in PostgreSQL.
|
/// Modeled after ``VACUUM`` in PostgreSQL.
|
||||||
/// Not all implementations support explicit optimization.
|
/// Not all implementations support explicit optimization.
|
||||||
async fn optimize(&self, action: OptimizeAction) -> Result<OptimizeStats>;
|
async fn optimize(&self, action: OptimizeAction) -> Result<OptimizeStats>;
|
||||||
|
|
||||||
|
/// Add new columns to the table, providing values to fill in.
|
||||||
|
async fn add_columns(
|
||||||
|
&self,
|
||||||
|
transforms: NewColumnTransform,
|
||||||
|
read_columns: Option<Vec<String>>,
|
||||||
|
) -> Result<()>;
|
||||||
|
|
||||||
|
/// Change a column's name or nullability.
|
||||||
|
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()>;
|
||||||
|
|
||||||
|
/// Remove columns from the table.
|
||||||
|
async fn drop_columns(&self, columns: &[&str]) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reference to a Table pointer.
|
/// Reference to a Table pointer.
|
||||||
@@ -902,6 +918,33 @@ impl Table for NativeTable {
|
|||||||
}
|
}
|
||||||
Ok(stats)
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn add_columns(
|
||||||
|
&self,
|
||||||
|
transforms: NewColumnTransform,
|
||||||
|
read_columns: Option<Vec<String>>,
|
||||||
|
) -> Result<()> {
|
||||||
|
self.dataset
|
||||||
|
.get_mut()
|
||||||
|
.await?
|
||||||
|
.add_columns(transforms, read_columns)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
|
||||||
|
self.dataset
|
||||||
|
.get_mut()
|
||||||
|
.await?
|
||||||
|
.alter_columns(alterations)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
|
||||||
|
self.dataset.get_mut().await?.drop_columns(columns).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Reference in New Issue
Block a user