mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
12 Commits
v0.1.4-pyt
...
v0.1.2-dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be910485e7 | ||
|
|
0028b95fd8 | ||
|
|
102f1d7404 | ||
|
|
500aa7b002 | ||
|
|
8aa0f6b4ba | ||
|
|
140aa32e08 | ||
|
|
a067c3dc85 | ||
|
|
e762a4db4b | ||
|
|
5e0ff01879 | ||
|
|
84356220dd | ||
|
|
6c03662c68 | ||
|
|
5e098f4fe5 |
70
.github/workflows/make_release_commit.yml
vendored
Normal file
70
.github/workflows/make_release_commit.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: Create release commit
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: 'Just create the local commit/tags but do not push it'
|
||||
required: true
|
||||
default: "false"
|
||||
type: choice
|
||||
options:
|
||||
- "true"
|
||||
- "false"
|
||||
part:
|
||||
description: 'What kind of release is this?'
|
||||
required: true
|
||||
default: 'patch'
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out main
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: main
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Install cargo utils
|
||||
run: cargo install cargo-bump cargo-get
|
||||
- name: Bump vectordb
|
||||
working-directory: rust/vectordb
|
||||
run: |
|
||||
cargo bump ${{ inputs.part }}
|
||||
echo "CRATE_VERSION=$(cargo get version)" >> $GITHUB_ENV
|
||||
- name: Bump rust/ffi/node
|
||||
working-directory: rust/ffi/node
|
||||
run: |
|
||||
cargo bump ${{ inputs.part }}
|
||||
echo "FFI_CRATE_VERSION=$(cargo get version)" >> $GITHUB_ENV
|
||||
- name: Bump node
|
||||
working-directory: node
|
||||
run: |
|
||||
npm version ${{ inputs.part }}
|
||||
echo "NPM_PACKAGE_VERSION=$(cat package.json | jq -r '.version')" >> $GITHUB_ENV
|
||||
- name: Create tag
|
||||
run: |
|
||||
if [ "$CRATE_VERSION" != "$FFI_CRATE_VERSION" ]; then
|
||||
echo "Version mismatch between rust/vectordb and rust/ffi/node"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$CRATE_VERSION" != "$NPM_PACKAGE_VERSION" ]; then
|
||||
echo "Version mismatch between rust/vectordb and node"
|
||||
exit 1
|
||||
fi
|
||||
export TAG="v$CRATE_VERSION'"
|
||||
git tag $TAG
|
||||
- name: Push new version and tag
|
||||
if: ${{ inputs.dry_run }} == "false"
|
||||
uses: ad-m/github-push-action@master
|
||||
with:
|
||||
github_token: ${{ secrets.RELEASE_TOKEN }}
|
||||
branch: main
|
||||
tags: true
|
||||
8
.github/workflows/node.yml
vendored
8
.github/workflows/node.yml
vendored
@@ -67,8 +67,10 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
npm run build
|
||||
npm run pack-build
|
||||
npm install --no-save ./dist/vectordb-*.tgz
|
||||
- name: Test
|
||||
run: npm run test
|
||||
macos:
|
||||
@@ -94,8 +96,10 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
npm run build
|
||||
npm run pack-build
|
||||
npm install --no-save ./dist/vectordb-*.tgz
|
||||
- name: Test
|
||||
run: |
|
||||
npm run test
|
||||
|
||||
6
.github/workflows/python.yml
vendored
6
.github/workflows/python.yml
vendored
@@ -30,8 +30,7 @@ jobs:
|
||||
python-version: 3.${{ matrix.python-minor-version }}
|
||||
- name: Install lancedb
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
pip install -e ".[fts]"
|
||||
pip install pytest
|
||||
- name: Run tests
|
||||
run: pytest -x -v --durations=30 tests
|
||||
@@ -53,8 +52,7 @@ jobs:
|
||||
python-version: "3.11"
|
||||
- name: Install lancedb
|
||||
run: |
|
||||
pip install -e .
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
pip install -e ".[fts]"
|
||||
pip install pytest
|
||||
- name: Run tests
|
||||
run: pytest -x -v --durations=30 tests
|
||||
174
.github/workflows/release.yml
vendored
Normal file
174
.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,174 @@
|
||||
name: Prepare Release
|
||||
|
||||
# TODO: bump versions in CI
|
||||
# NOTE: Python is a separate release for now.
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
jobs:
|
||||
draft-release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
draft: true
|
||||
prerelease: true # hardcoded on for now
|
||||
generate_release_notes: true
|
||||
|
||||
rust:
|
||||
runs-on: ubuntu-latest
|
||||
needs: draft-release
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: rust/vectordb
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Package Rust
|
||||
run: cargo package --all-features
|
||||
- uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
draft: true
|
||||
files: target/package/vectordb-*.crate
|
||||
fail_on_unmatched_files: true
|
||||
|
||||
node:
|
||||
runs-on: ubuntu-latest
|
||||
needs: draft-release
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: node
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run tsc
|
||||
npm pack
|
||||
- uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
draft: true
|
||||
files: node/vectordb-*.tgz
|
||||
fail_on_unmatched_files: true
|
||||
|
||||
node-macos:
|
||||
runs-on: macos-12
|
||||
needs: draft-release
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
target: [x86_64-apple-darwin, aarch64-apple-darwin]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- name: Install system dependencies
|
||||
run: brew install protobuf
|
||||
- name: Install npm dependencies
|
||||
run: |
|
||||
cd node
|
||||
npm ci
|
||||
- name: Install rustup target
|
||||
if: ${{ matrix.target == 'aarch64-apple-darwin' }}
|
||||
run: rustup target add aarch64-apple-darwin
|
||||
- name: Build MacOS native node modules
|
||||
run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
|
||||
- uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
draft: true
|
||||
files: node/dist/vectordb-darwin*.tgz
|
||||
fail_on_unmatched_files: true
|
||||
|
||||
node-linux:
|
||||
name: node-linux (${{ matrix.arch}}-unknown-linux-${{ matrix.libc }})
|
||||
runs-on: ubuntu-latest
|
||||
needs: draft-release
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
libc:
|
||||
- gnu
|
||||
# TODO: re-enable musl once we have refactored to pre-built containers
|
||||
# Right now we have to build node from source which is too expensive.
|
||||
# - musl
|
||||
arch:
|
||||
- x86_64
|
||||
- aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- name: Change owner to root (for npm)
|
||||
# The docker container is run as root, so we need the files to be owned by root
|
||||
# Otherwise npm is a nightmare: https://github.com/npm/cli/issues/3773
|
||||
run: sudo chown -R root:root .
|
||||
- name: Set up QEMU
|
||||
if: ${{ matrix.arch == 'aarch64' }}
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: arm64
|
||||
- name: Build Linux GNU native node modules
|
||||
if: ${{ matrix.libc == 'gnu' }}
|
||||
run: |
|
||||
docker run \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/manylinux2014_${{ matrix.arch }} \
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-gnu
|
||||
- name: Build musl Linux native node modules
|
||||
if: ${{ matrix.libc == 'musl' }}
|
||||
run: |
|
||||
docker run --platform linux/arm64/v8 \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/musllinux_1_1_${{ matrix.arch }} \
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-musl
|
||||
- uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
draft: true
|
||||
files: node/dist/vectordb-linux*.tgz
|
||||
fail_on_unmatched_files: true
|
||||
|
||||
release:
|
||||
needs: [rust, node, node-macos, node-linux]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||
run: |
|
||||
python -m twine upload --non-interactive \
|
||||
--skip-existing \
|
||||
--repository testpypi python/dist/*
|
||||
- name: Publish to NPM
|
||||
run: |
|
||||
for filename in node/dist/*.tgz; do
|
||||
npm publish --dry-run $filename
|
||||
done
|
||||
- name: Publish to crates.io
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
run: |
|
||||
cargo publish --dry-run --no-verify rust/target/vectordb-*.crate
|
||||
# - uses: softprops/action-gh-release@v1
|
||||
# with:
|
||||
# draft: false
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@
|
||||
**/__pycache__
|
||||
.DS_Store
|
||||
|
||||
.vscode
|
||||
|
||||
rust/target
|
||||
rust/Cargo.lock
|
||||
|
||||
|
||||
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -1052,7 +1052,6 @@ dependencies = [
|
||||
"paste",
|
||||
"petgraph",
|
||||
"rand",
|
||||
"regex",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -1646,9 +1645,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "0.4.17"
|
||||
version = "0.4.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86dda8185bd1ffae7b910c1f68035af23be9b717c52e9cc4de176cd30b47f772"
|
||||
checksum = "fc96cf89139af6f439a0e28ccd04ddf81be795b79fda3105b7a8952fadeb778e"
|
||||
dependencies = [
|
||||
"accelerate-src",
|
||||
"arrow",
|
||||
@@ -1685,7 +1684,6 @@ dependencies = [
|
||||
"rand",
|
||||
"reqwest",
|
||||
"shellexpand",
|
||||
"snafu",
|
||||
"sqlparser-lance",
|
||||
"tokio",
|
||||
"url",
|
||||
@@ -3358,13 +3356,12 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "vectordb"
|
||||
version = "0.0.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"lance",
|
||||
"object_store",
|
||||
"rand",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -3372,7 +3369,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vectordb-node"
|
||||
version = "0.1.0"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
|
||||
86
ci/build_linux_artifacts.sh
Normal file
86
ci/build_linux_artifacts.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
# Builds the Linux artifacts (node binaries).
|
||||
# Usage: ./build_linux_artifacts.sh [target]
|
||||
# Targets supported:
|
||||
# - x86_64-unknown-linux-gnu:centos
|
||||
# - aarch64-unknown-linux-gnu:centos
|
||||
# - aarch64-unknown-linux-musl
|
||||
# - x86_64-unknown-linux-musl
|
||||
|
||||
# TODO: refactor this into a Docker container we can pull
|
||||
|
||||
set -e
|
||||
|
||||
setup_dependencies() {
|
||||
echo "Installing system dependencies..."
|
||||
if [[ $1 == *musl ]]; then
|
||||
# musllinux
|
||||
apk add openssl-dev
|
||||
else
|
||||
# manylinux2014
|
||||
yum install -y openssl-devel unzip
|
||||
fi
|
||||
|
||||
if [[ $1 == x86_64* ]]; then
|
||||
ARCH=x86_64
|
||||
else
|
||||
# gnu target
|
||||
ARCH=aarch_64
|
||||
fi
|
||||
|
||||
# Install new enough protobuf (yum-provided is old)
|
||||
PB_REL=https://github.com/protocolbuffers/protobuf/releases
|
||||
PB_VERSION=23.1
|
||||
curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
|
||||
unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
|
||||
}
|
||||
|
||||
install_node() {
|
||||
echo "Installing node..."
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
|
||||
source "$HOME"/.bashrc
|
||||
|
||||
if [[ $1 == *musl ]]; then
|
||||
# This node version is 15, we need 16 or higher:
|
||||
# apk add nodejs-current npm
|
||||
# So instead we install from source (nvm doesn't provide binaries for musl):
|
||||
nvm install -s --no-progress 17
|
||||
else
|
||||
nvm install --no-progress 17 # latest that supports glibc 2.17
|
||||
fi
|
||||
}
|
||||
|
||||
install_rust() {
|
||||
echo "Installing rust..."
|
||||
curl https://sh.rustup.rs -sSf | bash -s -- -y
|
||||
export PATH="$PATH:/root/.cargo/bin"
|
||||
}
|
||||
|
||||
build_node_binary() {
|
||||
echo "Building node library for $1..."
|
||||
pushd node
|
||||
|
||||
npm ci
|
||||
|
||||
if [[ $1 == *musl ]]; then
|
||||
# This is needed for cargo to allow build cdylibs with musl
|
||||
export RUSTFLAGS="-C target-feature=-crt-static"
|
||||
fi
|
||||
# We don't pass in target, since the native target here already matches
|
||||
# and openblas-src doesn't do well with cross-compilation.
|
||||
npm run build-release
|
||||
npm run pack-build
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
TARGET=${1:-x86_64-unknown-linux-gnu}
|
||||
# Others:
|
||||
# aarch64-unknown-linux-gnu
|
||||
# x86_64-unknown-linux-musl
|
||||
# aarch64-unknown-linux-musl
|
||||
|
||||
setup_dependencies $TARGET
|
||||
install_node $TARGET
|
||||
install_rust
|
||||
build_node_binary $TARGET
|
||||
38
ci/build_macos_artifacts.sh
Normal file
38
ci/build_macos_artifacts.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
# Builds the macOS artifacts (node binaries).
|
||||
# Usage: ./build_macos_artifacts.sh [target]
|
||||
# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
|
||||
|
||||
prebuild_rust() {
|
||||
# Building here for the sake of easier debugging.
|
||||
pushd rust/ffi/node
|
||||
|
||||
for target in $1
|
||||
do
|
||||
echo "Building rust library for $target"
|
||||
export RUST_BACKTRACE=1
|
||||
cargo build --release --target $target
|
||||
done
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
build_node_binaries() {
|
||||
pushd node
|
||||
|
||||
for target in $1
|
||||
do
|
||||
echo "Building node library for $target"
|
||||
npm run build-release -- --target $target
|
||||
npm run pack-build -- --target $target
|
||||
done
|
||||
popd
|
||||
}
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
targets=$1
|
||||
else
|
||||
targets="x86_64-apple-darwin aarch64-apple-darwin"
|
||||
fi
|
||||
|
||||
prebuild_rust $targets
|
||||
build_node_binaries $targets
|
||||
90
ci/release_process.md
Normal file
90
ci/release_process.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# How to release
|
||||
|
||||
This is for the Rust crate and Node module. For now, the Python module is
|
||||
released separately.
|
||||
|
||||
The release is started by bumping the versions and pushing a new tag. To do this
|
||||
automatically, use the `make_release_commit` GitHub action.
|
||||
|
||||
When the tag is pushed, GitHub actions will start building the libraries and
|
||||
will upload them to a draft release.
|
||||
|
||||
While those jobs are running, edit the release notes as needed. For example,
|
||||
bring relevant new features and bugfixes to the top of the notes and the testing
|
||||
and CI changes to the bottom.
|
||||
|
||||
Once the jobs have finished, the release will be marked as not draft and the
|
||||
artifacts will be released to crates.io, NPM, and PyPI.
|
||||
|
||||
## Manual process
|
||||
|
||||
You can also build the artifacts locally on a MacOS machine.
|
||||
|
||||
### Build the MacOS release libraries
|
||||
|
||||
One-time setup:
|
||||
|
||||
```shell
|
||||
rustup target add x86_64-apple-darwin aarch64-apple-darwin
|
||||
```
|
||||
|
||||
To build:
|
||||
|
||||
```shell
|
||||
bash ci/build_macos_artifacts.sh
|
||||
```
|
||||
|
||||
### Build the Linux release libraries
|
||||
|
||||
To build a Linux library, we need to use docker with a different build script:
|
||||
|
||||
```shell
|
||||
ARCH=aarch64
|
||||
docker run \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/manylinux2014_$ARCH \
|
||||
bash ci/build_linux_artifacts.sh $ARCH-unknown-linux-gnu
|
||||
```
|
||||
|
||||
You can change `ARCH` to `x86_64`.
|
||||
|
||||
Similar script for musl binaries (not yet working):
|
||||
|
||||
```shell
|
||||
ARCH=aarch64
|
||||
docker run \
|
||||
--user $(id -u) \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/musllinux_1_1_$ARCH \
|
||||
bash ci/build_linux_artifacts.sh $ARCH-unknown-linux-musl
|
||||
```
|
||||
|
||||
<!--
|
||||
|
||||
For debugging, use these snippets:
|
||||
|
||||
```shell
|
||||
ARCH=aarch64
|
||||
docker run -it \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/manylinux2014_$ARCH \
|
||||
bash
|
||||
```
|
||||
|
||||
```shell
|
||||
ARCH=aarch64
|
||||
docker run -it \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/musllinux_1_1_$ARCH \
|
||||
bash
|
||||
```
|
||||
|
||||
Note: musllinux_1_1 is Alpine Linux 3.12
|
||||
-->
|
||||
|
||||
```
|
||||
docker run \
|
||||
-v $(pwd):/io -w /io \
|
||||
quay.io/pypa/musllinux_1_1_aarch64 \
|
||||
bash alpine_repro.sh
|
||||
```
|
||||
@@ -6,10 +6,9 @@ to make this available for JS as well.
|
||||
|
||||
## Installation
|
||||
|
||||
To use full text search, you must install optional dependency tantivy-py:
|
||||
To use full text search, you must install the fts optional dependencies:
|
||||
|
||||
# tantivy 0.19.2
|
||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||
`pip install lancedb[fts]`
|
||||
|
||||
|
||||
## Quickstart
|
||||
|
||||
2
node/.npmignore
Normal file
2
node/.npmignore
Normal file
@@ -0,0 +1,2 @@
|
||||
gen_test_data.py
|
||||
index.node
|
||||
@@ -8,6 +8,10 @@ A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb)
|
||||
npm install vectordb
|
||||
```
|
||||
|
||||
This will download the appropriate native library for your platform. We currently
|
||||
support x86_64 Linux, aarch64 Linux, Intel MacOS, and ARM (M1/M2) MacOS. We do not
|
||||
yet support Windows or musl-based Linux (such as Alpine Linux).
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
@@ -24,6 +28,19 @@ The [examples](./examples) folder contains complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
Build and install the rust library with:
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
npm run pack-build
|
||||
npm install --no-save ./dist/vectordb-*.tgz
|
||||
```
|
||||
|
||||
`npm run build` builds the Rust library, `npm run pack-build` packages the Rust
|
||||
binary into an npm module called `@vectordb/<platform>` (for example,
|
||||
`@vectordb/darwin-arm64.node`), and then `npm run install ...` installs that
|
||||
module.
|
||||
|
||||
The LanceDB javascript is built with npm:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -12,29 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
const { currentTarget } = require('@neon-rs/load');
|
||||
|
||||
let nativeLib;
|
||||
|
||||
function getPlatformLibrary() {
|
||||
if (process.platform === "darwin" && process.arch == "arm64") {
|
||||
return require('./aarch64-apple-darwin.node');
|
||||
} else if (process.platform === "darwin" && process.arch == "x64") {
|
||||
return require('./x86_64-apple-darwin.node');
|
||||
} else if (process.platform === "linux" && process.arch == "x64") {
|
||||
return require('./x86_64-unknown-linux-gnu.node');
|
||||
} else {
|
||||
throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
nativeLib = require('./index.node')
|
||||
nativeLib = require(`@vectordb/${currentTarget()}`);
|
||||
} catch (e) {
|
||||
if (e.code === "MODULE_NOT_FOUND") {
|
||||
nativeLib = getPlatformLibrary();
|
||||
} else {
|
||||
throw new Error('vectordb: failed to load native library. Please file a bug report at https://github.com/lancedb/lancedb/issues');
|
||||
}
|
||||
throw new Error(`vectordb: failed to load native library.
|
||||
You may need to run \`npm install @vectordb/${currentTarget()}\`.
|
||||
|
||||
If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
|
||||
|
||||
Source error: ${e}`);
|
||||
}
|
||||
|
||||
module.exports = nativeLib
|
||||
|
||||
// Dynamic require for runtime.
|
||||
module.exports = nativeLib;
|
||||
|
||||
45
node/package-lock.json
generated
45
node/package-lock.json
generated
@@ -7,12 +7,26 @@
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.1.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux"
|
||||
],
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^12.0.0",
|
||||
"@neon-rs/load": "^0.0.74",
|
||||
"@vectordb/darwin-arm64": "0.1.1",
|
||||
"@vectordb/darwin-x64": "0.1.1",
|
||||
"@vectordb/linux-x64-gnu": "0.1.1",
|
||||
"@vectordb/linux-x64-musl": "0.1.1",
|
||||
"apache-arrow": "^12.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@neon-rs/cli": "^0.0.74",
|
||||
"@types/chai": "^4.3.4",
|
||||
"@types/mocha": "^10.0.1",
|
||||
"@types/node": "^18.16.2",
|
||||
@@ -30,6 +44,12 @@
|
||||
"ts-node": "^10.9.1",
|
||||
"ts-node-dev": "^2.0.0",
|
||||
"typescript": "*"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@vectordb/darwin-arm64": "0.1.1",
|
||||
"@vectordb/darwin-x64": "0.1.1",
|
||||
"@vectordb/linux-x64-gnu": "0.1.1",
|
||||
"@vectordb/linux-x64-musl": "0.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@apache-arrow/ts": {
|
||||
@@ -197,6 +217,20 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
|
||||
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"neon": "index.js"
|
||||
}
|
||||
},
|
||||
"node_modules/@neon-rs/load": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
|
||||
"integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
|
||||
},
|
||||
"node_modules/@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
@@ -4191,6 +4225,17 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"@neon-rs/cli": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
|
||||
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
|
||||
"dev": true
|
||||
},
|
||||
"@neon-rs/load": {
|
||||
"version": "0.0.74",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
|
||||
"integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
|
||||
},
|
||||
"@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.1",
|
||||
"version": "0.1.2",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"scripts": {
|
||||
"tsc": "tsc -b",
|
||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json-render-diagnostics",
|
||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
|
||||
"build-release": "npm run build -- --release",
|
||||
"cross-release": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cross build --message-format=json --release -p vectordb-node",
|
||||
"test": "mocha -recursive dist/test",
|
||||
"lint": "eslint src --ext .js,.ts"
|
||||
"lint": "eslint src --ext .js,.ts",
|
||||
"pack-build": "neon pack-build",
|
||||
"check-npm": "printenv && which node && which npm && npm --version"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -24,6 +27,7 @@
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
"@neon-rs/cli": "^0.0.74",
|
||||
"@types/chai": "^4.3.4",
|
||||
"@types/mocha": "^10.0.1",
|
||||
"@types/node": "^18.16.2",
|
||||
@@ -44,6 +48,33 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@apache-arrow/ts": "^12.0.0",
|
||||
"@neon-rs/load": "^0.0.74",
|
||||
"apache-arrow": "^12.0.0"
|
||||
},
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux"
|
||||
],
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"neon": {
|
||||
"targets": {
|
||||
"x86_64-apple-darwin": "@vectordb/darwin-x64",
|
||||
"aarch64-apple-darwin": "@vectordb/darwin-arm64",
|
||||
"x86_64-unknown-linux-gnu": "@vectordb/linux-x64-gnu",
|
||||
"x86_64-unknown-linux-musl": "@vectordb/linux-x64-musl",
|
||||
"aarch64-unknown-linux-gnu": "@vectordb/linux-arm64-gnu",
|
||||
"aarch64-unknown-linux-musl": "@vectordb/linux-arm64-musl"
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@vectordb/darwin-arm64": "0.1.2",
|
||||
"@vectordb/darwin-x64": "0.1.2",
|
||||
"@vectordb/linux-x64-gnu": "0.1.2",
|
||||
"@vectordb/linux-x64-musl": "0.1.2",
|
||||
"@vectordb/linux-arm64-gnu": "0.1.2",
|
||||
"@vectordb/linux-arm64-musl": "0.1.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,16 +15,15 @@
|
||||
import {
|
||||
Field,
|
||||
Float32,
|
||||
List, type ListBuilder,
|
||||
List,
|
||||
makeBuilder,
|
||||
RecordBatchFileWriter,
|
||||
Table, Utf8,
|
||||
type Vector,
|
||||
vectorFromArray
|
||||
} from 'apache-arrow'
|
||||
import { type EmbeddingFunction } from './index'
|
||||
|
||||
export function convertToTable<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Table {
|
||||
export function convertToTable (data: Array<Record<string, unknown>>): Table {
|
||||
if (data.length === 0) {
|
||||
throw new Error('At least one record needs to be provided')
|
||||
}
|
||||
@@ -34,7 +33,11 @@ export function convertToTable<T> (data: Array<Record<string, unknown>>, embeddi
|
||||
|
||||
for (const columnsKey of columns) {
|
||||
if (columnsKey === 'vector') {
|
||||
const listBuilder = newVectorListBuilder()
|
||||
const children = new Field<Float32>('item', new Float32())
|
||||
const list = new List(children)
|
||||
const listBuilder = makeBuilder({
|
||||
type: list
|
||||
})
|
||||
const vectorSize = (data[0].vector as any[]).length
|
||||
for (const datum of data) {
|
||||
if ((datum[columnsKey] as any[]).length !== vectorSize) {
|
||||
@@ -49,14 +52,6 @@ export function convertToTable<T> (data: Array<Record<string, unknown>>, embeddi
|
||||
for (const datum of data) {
|
||||
values.push(datum[columnsKey])
|
||||
}
|
||||
|
||||
if (columnsKey === embeddings?.sourceColumn) {
|
||||
const vectors = embeddings.embed(values as T[])
|
||||
const listBuilder = newVectorListBuilder()
|
||||
vectors.map(v => listBuilder.append(v))
|
||||
records.vector = listBuilder.finish().toVector()
|
||||
}
|
||||
|
||||
if (typeof values[0] === 'string') {
|
||||
// `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column
|
||||
records[columnsKey] = vectorFromArray(values, new Utf8())
|
||||
@@ -69,17 +64,8 @@ export function convertToTable<T> (data: Array<Record<string, unknown>>, embeddi
|
||||
return new Table(records)
|
||||
}
|
||||
|
||||
// Creates a new Arrow ListBuilder that stores a Vector column
|
||||
function newVectorListBuilder (): ListBuilder<Float32, any> {
|
||||
const children = new Field<Float32>('item', new Float32())
|
||||
const list = new List(children)
|
||||
return makeBuilder({
|
||||
type: list
|
||||
})
|
||||
}
|
||||
|
||||
export async function fromRecordsToBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
|
||||
const table = convertToTable(data, embeddings)
|
||||
export async function fromRecordsToBuffer (data: Array<Record<string, unknown>>): Promise<Buffer> {
|
||||
const table = convertToTable(data)
|
||||
const writer = RecordBatchFileWriter.writeAll(table)
|
||||
return Buffer.from(await writer.toUint8Array())
|
||||
}
|
||||
|
||||
@@ -28,8 +28,7 @@ const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSe
|
||||
* @param uri The uri of the database.
|
||||
*/
|
||||
export async function connect (uri: string): Promise<Connection> {
|
||||
const db = await databaseNew(uri)
|
||||
return new Connection(db, uri)
|
||||
return new Connection(uri)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -39,9 +38,9 @@ export class Connection {
|
||||
private readonly _uri: string
|
||||
private readonly _db: any
|
||||
|
||||
constructor (db: any, uri: string) {
|
||||
constructor (uri: string) {
|
||||
this._uri = uri
|
||||
this._db = db
|
||||
this._db = databaseNew(uri)
|
||||
}
|
||||
|
||||
get uri (): string {
|
||||
@@ -56,50 +55,17 @@ export class Connection {
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
*/
|
||||
async openTable (name: string): Promise<Table>
|
||||
/**
|
||||
* Open a table in the database.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param embeddings An embedding function to use on this Table
|
||||
*/
|
||||
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
* Open a table in the database.
|
||||
* @param name The name of the table.
|
||||
*/
|
||||
async openTable (name: string): Promise<Table> {
|
||||
const tbl = await databaseOpenTable.call(this._db, name)
|
||||
if (embeddings !== undefined) {
|
||||
return new Table(tbl, name, embeddings)
|
||||
} else {
|
||||
return new Table(tbl, name)
|
||||
}
|
||||
return new Table(tbl, name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param data Non-empty Array of Records to be inserted into the Table
|
||||
*/
|
||||
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param data Non-empty Array of Records to be inserted into the Table
|
||||
* @param embeddings An embedding function to use on this Table
|
||||
*/
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
|
||||
if (embeddings !== undefined) {
|
||||
return new Table(tbl, name, embeddings)
|
||||
} else {
|
||||
return new Table(tbl, name)
|
||||
}
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
|
||||
await tableCreate.call(this._db, name, await fromRecordsToBuffer(data))
|
||||
return await this.openTable(name)
|
||||
}
|
||||
|
||||
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
|
||||
@@ -109,22 +75,16 @@ export class Connection {
|
||||
}
|
||||
}
|
||||
|
||||
export class Table<T = number[]> {
|
||||
/**
|
||||
* A table in a LanceDB database.
|
||||
*/
|
||||
export class Table {
|
||||
private readonly _tbl: any
|
||||
private readonly _name: string
|
||||
private readonly _embeddings?: EmbeddingFunction<T>
|
||||
|
||||
constructor (tbl: any, name: string)
|
||||
/**
|
||||
* @param tbl
|
||||
* @param name
|
||||
* @param embeddings An embedding function to use when interacting with this table
|
||||
*/
|
||||
constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>)
|
||||
constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) {
|
||||
constructor (tbl: any, name: string) {
|
||||
this._tbl = tbl
|
||||
this._name = name
|
||||
this._embeddings = embeddings
|
||||
}
|
||||
|
||||
get name (): string {
|
||||
@@ -132,16 +92,10 @@ export class Table<T = number[]> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a search query to find the nearest neighbors of the given search term
|
||||
* @param query The query search term
|
||||
*/
|
||||
search (query: T): Query {
|
||||
let queryVector: number[]
|
||||
if (this._embeddings !== undefined) {
|
||||
queryVector = this._embeddings.embed([query])[0]
|
||||
} else {
|
||||
queryVector = query as number[]
|
||||
}
|
||||
* Create a search query to find the nearest neighbors of the given query vector.
|
||||
* @param queryVector The query vector.
|
||||
*/
|
||||
search (queryVector: number[]): Query {
|
||||
return new Query(this._tbl, queryVector)
|
||||
}
|
||||
|
||||
@@ -152,7 +106,7 @@ export class Table<T = number[]> {
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async add (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString())
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Append.toString())
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -162,14 +116,9 @@ export class Table<T = number[]> {
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString())
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString())
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ANN index on this Table vector index.
|
||||
*
|
||||
* @param indexParams The parameters of this Index, @see VectorIndexParams.
|
||||
*/
|
||||
async create_index (indexParams: VectorIndexParams): Promise<any> {
|
||||
return tableCreateVectorIndex.call(this._tbl, indexParams)
|
||||
}
|
||||
@@ -319,21 +268,6 @@ export enum WriteMode {
|
||||
Append = 'append'
|
||||
}
|
||||
|
||||
/**
|
||||
* An embedding function that automatically creates vector representation for a given column.
|
||||
*/
|
||||
export interface EmbeddingFunction<T> {
|
||||
/**
|
||||
* The name of the column that will be used as input for the Embedding Function.
|
||||
*/
|
||||
sourceColumn: string
|
||||
|
||||
/**
|
||||
* Creates a vector representation for the given values.
|
||||
*/
|
||||
embed: (data: T[]) => number[][]
|
||||
}
|
||||
|
||||
/**
|
||||
* Distance metrics type.
|
||||
*/
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// IO tests
|
||||
|
||||
import { describe } from 'mocha'
|
||||
import { assert } from 'chai'
|
||||
|
||||
import * as lancedb from '../index'
|
||||
|
||||
describe('LanceDB S3 client', function () {
|
||||
if (process.env.TEST_S3_BASE_URL != null) {
|
||||
const baseUri = process.env.TEST_S3_BASE_URL
|
||||
it('should have a valid url', async function () {
|
||||
const uri = `${baseUri}/valid_url`
|
||||
const table = await createTestDB(uri, 2, 20)
|
||||
const con = await lancedb.connect(uri)
|
||||
assert.equal(con.uri, uri)
|
||||
|
||||
const results = await table.search([0.1, 0.3]).limit(5).execute()
|
||||
assert.equal(results.length, 5)
|
||||
})
|
||||
} else {
|
||||
describe.skip('Skip S3 test', function () {})
|
||||
}
|
||||
})
|
||||
|
||||
async function createTestDB (uri: string, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const data = []
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
const vector = []
|
||||
for (let j = 0; j < numDimensions; j++) {
|
||||
vector.push(i + (j * 0.1))
|
||||
}
|
||||
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
|
||||
}
|
||||
|
||||
return await con.createTable('vectors', data)
|
||||
}
|
||||
@@ -17,7 +17,7 @@ import { assert } from 'chai'
|
||||
import { track } from 'temp'
|
||||
|
||||
import * as lancedb from '../index'
|
||||
import { type EmbeddingFunction, MetricType, Query } from '../index'
|
||||
import { MetricType, Query } from '../index'
|
||||
|
||||
describe('LanceDB client', function () {
|
||||
describe('when creating a connection to lancedb', function () {
|
||||
@@ -140,39 +140,6 @@ describe('LanceDB client', function () {
|
||||
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
|
||||
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
|
||||
})
|
||||
|
||||
describe('when using a custom embedding function', function () {
|
||||
class TextEmbedding implements EmbeddingFunction<string> {
|
||||
sourceColumn: string
|
||||
|
||||
constructor (targetColumn: string) {
|
||||
this.sourceColumn = targetColumn
|
||||
}
|
||||
|
||||
_embedding_map = new Map<string, number[]>([
|
||||
['foo', [2.1, 2.2]],
|
||||
['bar', [3.1, 3.2]]
|
||||
])
|
||||
|
||||
embed (data: string[]): number[][] {
|
||||
return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0])
|
||||
}
|
||||
}
|
||||
|
||||
it('should encode the original data into embeddings', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
const embeddings = new TextEmbedding('name')
|
||||
|
||||
const data = [
|
||||
{ price: 10, name: 'foo' },
|
||||
{ price: 50, name: 'bar' }
|
||||
]
|
||||
const table = await con.createTable('vectors', data, embeddings)
|
||||
const results = await table.search('foo').execute()
|
||||
assert.equal(results.length, 2)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Query object', function () {
|
||||
|
||||
@@ -16,13 +16,7 @@ import os
|
||||
from typing import List, Tuple
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
|
||||
)
|
||||
import tantivy
|
||||
|
||||
from .table import LanceTable
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
|
||||
"You need to install the `lancedb[fts]` extra to use this method."
|
||||
)
|
||||
|
||||
from .fts import search_index
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.1.4"
|
||||
dependencies = ["pylance>=0.4.17", "ratelimiter", "retry", "tqdm"]
|
||||
version = "0.1.2"
|
||||
dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"]
|
||||
description = "lancedb"
|
||||
authors = [
|
||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||
@@ -45,6 +45,10 @@ dev = [
|
||||
docs = [
|
||||
"mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"
|
||||
]
|
||||
fts = [
|
||||
# tantivy 0.19.2
|
||||
"tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = [
|
||||
|
||||
@@ -14,6 +14,7 @@ import sys
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.embeddings import with_embeddings
|
||||
|
||||
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import lancedb.fts
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import tantivy
|
||||
|
||||
import lancedb as ldb
|
||||
import lancedb.fts
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@@ -17,6 +17,7 @@ import pandas as pd
|
||||
import pandas.testing as tm
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.table import LanceTable
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.1.0"
|
||||
version = "0.1.2"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
@@ -15,7 +15,7 @@ arrow-ipc = "37.0"
|
||||
arrow-schema = "37.0"
|
||||
once_cell = "1"
|
||||
futures = "0.3"
|
||||
lance = "0.4.17"
|
||||
lance = "0.4.3"
|
||||
vectordb = { path = "../../vectordb" }
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
||||
|
||||
@@ -39,7 +39,7 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
|
||||
let add_result = table
|
||||
.lock()
|
||||
.unwrap()
|
||||
.create_index(&index_params_builder)
|
||||
.create_idx(&index_params_builder)
|
||||
.await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
|
||||
@@ -56,46 +56,23 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
|
||||
RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
|
||||
}
|
||||
|
||||
fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
fn database_new(mut cx: FunctionContext) -> JsResult<JsBox<JsDatabase>> {
|
||||
let path = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
let channel = cx.channel();
|
||||
let (deferred, promise) = cx.promise();
|
||||
|
||||
rt.spawn(async move {
|
||||
let database = Database::connect(&path).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let db = JsDatabase {
|
||||
database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
|
||||
};
|
||||
Ok(cx.boxed(db))
|
||||
});
|
||||
});
|
||||
Ok(promise)
|
||||
let db = JsDatabase {
|
||||
database: Arc::new(Database::connect(path).or_else(|err| cx.throw_error(err.to_string()))?),
|
||||
};
|
||||
Ok(cx.boxed(db))
|
||||
}
|
||||
|
||||
fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
fn database_table_names(mut cx: FunctionContext) -> JsResult<JsArray> {
|
||||
let db = cx
|
||||
.this()
|
||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
let (deferred, promise) = cx.promise();
|
||||
let channel = cx.channel();
|
||||
let database = db.database.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let tables_rst = database.table_names().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let table_names = convert::vec_str_to_array(&tables, &mut cx);
|
||||
table_names
|
||||
});
|
||||
});
|
||||
Ok(promise)
|
||||
let tables = db
|
||||
.database
|
||||
.table_names()
|
||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
convert::vec_str_to_array(&tables, &mut cx)
|
||||
}
|
||||
|
||||
fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
@@ -110,7 +87,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
rt.spawn(async move {
|
||||
let table_rst = database.open_table(&table_name).await;
|
||||
let table_rst = database.open_table(table_name).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let table = Arc::new(Mutex::new(
|
||||
@@ -209,7 +186,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
|
||||
rt.block_on(async move {
|
||||
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches));
|
||||
let table_rst = database.create_table(&table_name, batch_reader).await;
|
||||
let table_rst = database.create_table(table_name, batch_reader).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let table = Arc::new(Mutex::new(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.0.1"
|
||||
version = "0.1.2"
|
||||
edition = "2021"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
@@ -12,9 +12,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
arrow-array = "37.0"
|
||||
arrow-data = "37.0"
|
||||
arrow-schema = "37.0"
|
||||
object_store = "0.5.6"
|
||||
|
||||
lance = "0.4.17"
|
||||
lance = "0.4.3"
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -12,19 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use lance::io::object_store::ObjectStore;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::table::Table;
|
||||
|
||||
pub struct Database {
|
||||
object_store: ObjectStore,
|
||||
|
||||
pub(crate) uri: String,
|
||||
pub(crate) path: Arc<PathBuf>,
|
||||
}
|
||||
|
||||
const LANCE_EXTENSION: &str = "lance";
|
||||
@@ -40,17 +37,12 @@ impl Database {
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Database] object.
|
||||
pub async fn connect(uri: &str) -> Result<Database> {
|
||||
let object_store = ObjectStore::new(uri).await?;
|
||||
if object_store.is_local() {
|
||||
let path = Path::new(uri);
|
||||
if !path.try_exists()? {
|
||||
create_dir_all(&path)?;
|
||||
}
|
||||
pub fn connect<P: AsRef<Path>>(path: P) -> Result<Database> {
|
||||
if !path.as_ref().try_exists()? {
|
||||
create_dir_all(&path)?;
|
||||
}
|
||||
Ok(Database {
|
||||
uri: uri.to_string(),
|
||||
object_store,
|
||||
path: Arc::new(path.as_ref().to_path_buf()),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -59,13 +51,12 @@ impl Database {
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Vec<String>] with all table names.
|
||||
pub async fn table_names(&self) -> Result<Vec<String>> {
|
||||
pub fn table_names(&self) -> Result<Vec<String>> {
|
||||
let f = self
|
||||
.object_store
|
||||
.read_dir("/")
|
||||
.await?
|
||||
.iter()
|
||||
.map(|fname| Path::new(fname))
|
||||
.path
|
||||
.read_dir()?
|
||||
.flatten()
|
||||
.map(|dir_entry| dir_entry.path())
|
||||
.filter(|path| {
|
||||
let is_lance = path
|
||||
.extension()
|
||||
@@ -85,10 +76,10 @@ impl Database {
|
||||
|
||||
pub async fn create_table(
|
||||
&self,
|
||||
name: &str,
|
||||
name: String,
|
||||
batches: Box<dyn RecordBatchReader>,
|
||||
) -> Result<Table> {
|
||||
Table::create(&self.uri, name, batches).await
|
||||
Table::create(self.path.clone(), name, batches).await
|
||||
}
|
||||
|
||||
/// Open a table in the database.
|
||||
@@ -99,8 +90,8 @@ impl Database {
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
||||
Table::open(&self.uri, name).await
|
||||
pub async fn open_table(&self, name: String) -> Result<Table> {
|
||||
Table::open(self.path.clone(), name).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,10 +105,10 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_connect() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = Database::connect(uri).await.unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
let db = Database::connect(&path_buf);
|
||||
|
||||
assert_eq!(db.uri, uri);
|
||||
assert_eq!(db.unwrap().path.as_path(), path_buf.as_path())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -127,16 +118,10 @@ mod tests {
|
||||
create_dir_all(tmp_dir.path().join("table2.lance")).unwrap();
|
||||
create_dir_all(tmp_dir.path().join("invalidlance")).unwrap();
|
||||
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = Database::connect(uri).await.unwrap();
|
||||
let tables = db.table_names().await.unwrap();
|
||||
let db = Database::connect(&tmp_dir.into_path()).unwrap();
|
||||
let tables = db.table_names().unwrap();
|
||||
assert_eq!(tables.len(), 2);
|
||||
assert!(tables.contains(&String::from("table1")));
|
||||
assert!(tables.contains(&String::from("table2")));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_s3() {
|
||||
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,15 +41,3 @@ impl From<lance::Error> for Error {
|
||||
Self::Lance(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<object_store::Error> for Error {
|
||||
fn from(e: object_store::Error) -> Self {
|
||||
Self::IO(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<object_store::path::Error> for Error {
|
||||
fn from(e: object_store::path::Error) -> Self {
|
||||
Self::IO(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{Float32Array, RecordBatchReader};
|
||||
@@ -24,21 +24,16 @@ use crate::index::vector::VectorIndexBuilder;
|
||||
use crate::query::Query;
|
||||
|
||||
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
||||
|
||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
/// A table in a LanceDB database.
|
||||
pub struct Table {
|
||||
name: String,
|
||||
uri: String,
|
||||
path: String,
|
||||
dataset: Arc<Dataset>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Table {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Table({})", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Table {
|
||||
/// Opens an existing Table
|
||||
///
|
||||
@@ -50,21 +45,18 @@ impl Table {
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
|
||||
let path = Path::new(base_uri);
|
||||
|
||||
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||
let uri = table_uri
|
||||
.as_path()
|
||||
pub async fn open(base_path: Arc<PathBuf>, name: String) -> Result<Self> {
|
||||
let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||
let ds_uri = ds_path
|
||||
.to_str()
|
||||
.ok_or(Error::IO(format!("Invalid table name: {}", name)))?;
|
||||
|
||||
let dataset = Dataset::open(&uri).await?;
|
||||
Ok(Table {
|
||||
name: name.to_string(),
|
||||
uri: uri.to_string(),
|
||||
.ok_or(Error::IO(format!("Unable to find table {}", name)))?;
|
||||
let dataset = Dataset::open(ds_uri).await?;
|
||||
let table = Table {
|
||||
name,
|
||||
path: ds_uri.to_string(),
|
||||
dataset: Arc::new(dataset),
|
||||
})
|
||||
};
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
/// Creates a new Table
|
||||
@@ -79,28 +71,25 @@ impl Table {
|
||||
///
|
||||
/// * A [Table] object.
|
||||
pub async fn create(
|
||||
base_uri: &str,
|
||||
name: &str,
|
||||
base_path: Arc<PathBuf>,
|
||||
name: String,
|
||||
mut batches: Box<dyn RecordBatchReader>,
|
||||
) -> Result<Self> {
|
||||
let base_path = Path::new(base_uri);
|
||||
let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||
let uri = table_uri
|
||||
.as_path()
|
||||
let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||
let path = ds_path
|
||||
.to_str()
|
||||
.ok_or(Error::IO(format!("Invalid table name: {}", name)))?
|
||||
.to_string();
|
||||
.ok_or(Error::IO(format!("Unable to find table {}", name)))?;
|
||||
|
||||
let dataset =
|
||||
Arc::new(Dataset::write(&mut batches, &uri, Some(WriteParams::default())).await?);
|
||||
Arc::new(Dataset::write(&mut batches, path, Some(WriteParams::default())).await?);
|
||||
Ok(Table {
|
||||
name: name.to_string(),
|
||||
uri,
|
||||
name,
|
||||
path: path.to_string(),
|
||||
dataset,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create index on the table.
|
||||
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
||||
pub async fn create_idx(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
||||
use lance::index::DatasetIndexExt;
|
||||
|
||||
let dataset = self
|
||||
@@ -136,7 +125,8 @@ impl Table {
|
||||
let mut params = WriteParams::default();
|
||||
params.mode = write_mode.unwrap_or(WriteMode::Append);
|
||||
|
||||
self.dataset = Arc::new(Dataset::write(&mut batches, &self.uri, Some(params)).await?);
|
||||
self.dataset =
|
||||
Arc::new(Dataset::write(&mut batches, self.path.as_str(), Some(params)).await?);
|
||||
Ok(batches.count())
|
||||
}
|
||||
|
||||
@@ -161,8 +151,6 @@ impl Table {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{
|
||||
Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchReader,
|
||||
};
|
||||
@@ -173,52 +161,53 @@ mod tests {
|
||||
use lance::index::vector::ivf::IvfBuildParams;
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use rand::Rng;
|
||||
use std::sync::Arc;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Result;
|
||||
use crate::index::vector::IvfPQIndexBuilder;
|
||||
use crate::table::Table;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_new_table_not_exists() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let table = Table::open(&uri, "test").await;
|
||||
let table = Table::open(Arc::new(path_buf), "test".to_string()).await;
|
||||
assert!(table.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test.lance");
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
||||
Dataset::write(
|
||||
&mut batches,
|
||||
path_buf.join("test.lance").to_str().unwrap(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(Arc::new(path_buf), "test".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(uri, "test").await.unwrap();
|
||||
|
||||
assert_eq!(table.name, "test")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_object_store_path() {
|
||||
use std::path::Path as StdPath;
|
||||
let p = StdPath::new("s3://bucket/path/to/file");
|
||||
let c = p.join("subfile");
|
||||
assert_eq!(c.to_str().unwrap(), "s3://bucket/path/to/file/subfile");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
let schema = batches.schema().clone();
|
||||
let mut table = Table::create(&uri, "test", batches).await.unwrap();
|
||||
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||
|
||||
let new_batches: Box<dyn RecordBatchReader> =
|
||||
@@ -236,11 +225,13 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_add_overwrite() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
let schema = batches.schema().clone();
|
||||
let mut table = Table::create(uri, "test", batches).await.unwrap();
|
||||
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||
|
||||
let new_batches: Box<dyn RecordBatchReader> =
|
||||
@@ -261,16 +252,21 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_search() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test.lance");
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
|
||||
Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
|
||||
Dataset::write(
|
||||
&mut batches,
|
||||
path_buf.join("test.lance").to_str().unwrap(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(Arc::new(path_buf), "test".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(uri, "test").await.unwrap();
|
||||
|
||||
let vector = Float32Array::from_iter_values([0.1, 0.2]);
|
||||
let query = table.search(vector.clone());
|
||||
assert_eq!(vector, query.query_vector);
|
||||
@@ -295,7 +291,7 @@ mod tests {
|
||||
use arrow_array::Float32Array;
|
||||
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let path_buf = tmp_dir.into_path();
|
||||
|
||||
let dimension = 16;
|
||||
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
@@ -322,7 +318,9 @@ mod tests {
|
||||
.unwrap()]);
|
||||
|
||||
let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches);
|
||||
let mut table = Table::create(uri, "test", reader).await.unwrap();
|
||||
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), reader)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut i = IvfPQIndexBuilder::new();
|
||||
|
||||
@@ -332,7 +330,7 @@ mod tests {
|
||||
.ivf_params(IvfBuildParams::new(256))
|
||||
.pq_params(PQBuildParams::default());
|
||||
|
||||
table.create_index(index_builder).await.unwrap();
|
||||
table.create_idx(index_builder).await.unwrap();
|
||||
|
||||
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
|
||||
assert_eq!(table.count_rows().await.unwrap(), 512);
|
||||
|
||||
Reference in New Issue
Block a user