Compare commits

...

29 Commits

Author SHA1 Message Date
Lance Release
0e4050e706 [python] Bump version: 0.1.14 → 0.1.15 2023-07-25 18:58:44 +00:00
Rob Meng
147796ffcd bump lance version for vectordb, fix minor bugs in lancedb remote client (#365) 2023-07-24 21:30:57 -04:00
Lance Release
6fd465ceef Updating package-lock.json 2023-07-24 20:02:35 +00:00
Lance Release
e2e5a0fb83 Updating package-lock.json 2023-07-24 19:27:32 +00:00
Lance Release
ff8d5a6d51 Bump version: 0.1.17 → 0.1.18 2023-07-24 19:27:17 +00:00
Will Jones
8829988ada ci: build node in manylinux docker container (#350)
Closes #359

TODO:
 * [x] test in a sample of Linux distro docker containers
2023-07-24 11:31:47 -07:00
gsilvestrin
80a32be121 bugfix(node): make WriteMode optional when specifying embeddings (#336) 2023-07-24 11:26:43 -07:00
Rob Meng
8325979bb8 dont print apikey in remote client toString, add hostoverride to python client (#353) 2023-07-23 18:44:00 -04:00
lindt
ed5ff5a482 [docs] typo fix (#352)
Co-authored-by: Stefan Rohe <think@eduroam152-169.nbk.vse.cz>
2023-07-22 11:18:58 +02:00
Lance Release
2c9371dcc4 Updating package-lock.json 2023-07-21 23:18:22 +00:00
Lance Release
6d5621da4a Updating package-lock.json 2023-07-21 22:39:21 +00:00
Lance Release
380c1572f3 Bump version: 0.1.16 → 0.1.17 2023-07-21 22:39:06 +00:00
gsilvestrin
4383848d53 feat(node): Add Linux ARM build (#348) 2023-07-21 15:33:02 -07:00
gsilvestrin
473c43860c bugfix: Set Github token when pushing changes (#351) 2023-07-21 15:31:44 -07:00
gsilvestrin
17cf244e53 Updating package-lock.json (#347) 2023-07-20 14:44:10 -07:00
Leon Yee
0b60694df4 [docs] typo fix (#346) 2023-07-20 14:33:56 -07:00
Lance Release
600da476e8 Updating package-lock.json 2023-07-20 20:24:54 +00:00
Lance Release
458217783c Bump version: 0.1.15 → 0.1.16 2023-07-20 20:24:37 +00:00
gsilvestrin
21b1a71a6b bugfix(node): Don't persist credentials on make-release-commit.yml (#345) 2023-07-20 13:24:06 -07:00
gsilvestrin
2d899675e8 bugfix(node): Make release task can't push to repo (#344) 2023-07-20 13:15:29 -07:00
Lance Release
1cbfc1bbf4 [python] Bump version: 0.1.13 → 0.1.14 2023-07-20 20:06:15 +00:00
gsilvestrin
a2bb497135 feat(node) Move native packages to @lancedb NPM org (#341)
- Move native packages to @lancedb org
- Move package-lock.json update to a reusable action and created a target to run it manually.
2023-07-20 12:54:39 -07:00
Will Jones
0cf40c8da3 fix: only use util function to build filesystem (#339) 2023-07-20 10:41:50 -07:00
Rob Meng
8233c689c3 fix remote SDK (#342) 2023-07-20 02:01:13 -04:00
gsilvestrin
6e24e731b8 Updating package-lock.json (#338) 2023-07-18 21:10:18 -07:00
Lance Release
f4ce86e12c [python] Bump version: 0.1.12 → 0.1.13 2023-07-19 03:09:50 +00:00
Lance Release
0664eaec82 Bump version: 0.1.14 → 0.1.15 2023-07-19 02:54:10 +00:00
Lei Xu
63acdc2069 [Python] Support pydantic v1 as well (#337)
Support both Pydantic v1 and v2 (breaking changes)
2023-07-18 19:53:09 -07:00
Rob Meng
a636bb1075 add support for host override (#335) 2023-07-18 21:21:39 -04:00
38 changed files with 768 additions and 333 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.14
current_version = 0.1.18
commit = True
message = Bump version: {current_version} → {new_version}
tag = True

View File

@@ -25,38 +25,35 @@ jobs:
bump-version:
runs-on: ubuntu-latest
steps:
- name: Check out main
uses: actions/checkout@v3
with:
ref: main
persist-credentials: false
fetch-depth: 0
lfs: true
- name: Set git configs for bumpversion
shell: bash
run: |
git config user.name 'Lance Release'
git config user.email 'lance-dev@lancedb.com'
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Bump version, create tag and commit
run: |
pip install bump2version
bumpversion --verbose ${{ inputs.part }}
- name: Update package-lock.json file
run: |
npm install
git add package-lock.json
# Add this change to the commit created by bumpversion
git commit --amend --no-edit
working-directory: node
- name: Push new version and tag
if: ${{ inputs.dry_run }} == "false"
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
branch: main
tags: true
- name: Check out main
uses: actions/checkout@v3
with:
ref: main
persist-credentials: false
fetch-depth: 0
lfs: true
- name: Set git configs for bumpversion
shell: bash
run: |
git config user.name 'Lance Release'
git config user.email 'lance-dev@lancedb.com'
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Bump version, create tag and commit
run: |
pip install bump2version
bumpversion --verbose ${{ inputs.part }}
- name: Push new version and tag
if: ${{ inputs.dry_run }} == "false"
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
branch: main
tags: true
- uses: ./.github/workflows/update_package_lock
if: ${{ inputs.dry_run }} == "false"
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}

View File

@@ -70,7 +70,7 @@ jobs:
npm run tsc
npm run build
npm run pack-build
npm install --no-save ./dist/vectordb-*.tgz
npm install --no-save ./dist/lancedb-vectordb-*.tgz
# Remove index.node to test with dependency installed
rm index.node
- name: Test
@@ -101,7 +101,7 @@ jobs:
npm run tsc
npm run build
npm run pack-build
npm install --no-save ./dist/vectordb-*.tgz
npm install --no-save ./dist/lancedb-vectordb-*.tgz
# Remove index.node to test with dependency installed
rm index.node
- name: Test

View File

@@ -46,75 +46,51 @@ jobs:
matrix:
target: [x86_64-apple-darwin, aarch64-apple-darwin]
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install system dependencies
run: brew install protobuf
- name: Install npm dependencies
run: |
cd node
npm ci
- name: Install rustup target
if: ${{ matrix.target == 'aarch64-apple-darwin' }}
run: rustup target add aarch64-apple-darwin
- name: Build MacOS native node modules
run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
- name: Upload Darwin Artifacts
uses: actions/upload-artifact@v3
with:
name: darwin-native
path: |
node/dist/vectordb-darwin*.tgz
- name: Checkout
uses: actions/checkout@v3
- name: Install system dependencies
run: brew install protobuf
- name: Install npm dependencies
run: |
cd node
npm ci
- name: Install rustup target
if: ${{ matrix.target == 'aarch64-apple-darwin' }}
run: rustup target add aarch64-apple-darwin
- name: Build MacOS native node modules
run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
- name: Upload Darwin Artifacts
uses: actions/upload-artifact@v3
with:
name: native-darwin
path: |
node/dist/lancedb-vectordb-darwin*.tgz
node-linux:
name: node-linux (${{ matrix.arch}}-unknown-linux-${{ matrix.libc }})
runs-on: ubuntu-latest
name: node-linux (${{ matrix.config.arch}}-unknown-linux-gnu
runs-on: ${{ matrix.config.runner }}
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
strategy:
fail-fast: false
matrix:
libc:
- gnu
# TODO: re-enable musl once we have refactored to pre-built containers
# Right now we have to build node from source which is too expensive.
# - musl
arch:
- x86_64
# Building on aarch64 is too slow for now
# - aarch64
config:
- arch: x86_64
runner: ubuntu-latest
- arch: aarch64
runner: buildjet-4vcpu-ubuntu-2204-arm
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Change owner to root (for npm)
# The docker container is run as root, so we need the files to be owned by root
# Otherwise npm is a nightmare: https://github.com/npm/cli/issues/3773
run: sudo chown -R root:root .
- name: Set up QEMU
if: ${{ matrix.arch == 'aarch64' }}
uses: docker/setup-qemu-action@v2
with:
platforms: arm64
- name: Build Linux GNU native node modules
if: ${{ matrix.libc == 'gnu' }}
run: |
docker run \
-v $(pwd):/io -w /io \
rust:1.70-bookworm \
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-gnu
- name: Build musl Linux native node modules
if: ${{ matrix.libc == 'musl' }}
run: |
docker run --platform linux/arm64/v8 \
-v $(pwd):/io -w /io \
quay.io/pypa/musllinux_1_1_${{ matrix.arch }} \
bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-musl
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v3
with:
name: linux-native
path: |
node/dist/vectordb-linux*.tgz
- name: Checkout
uses: actions/checkout@v3
- name: Build Linux Artifacts
run: |
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v3
with:
name: native-linux
path: |
node/dist/lancedb-vectordb-linux*.tgz
node-windows:
runs-on: windows-2022
@@ -145,12 +121,12 @@ jobs:
- name: Upload Windows Artifacts
uses: actions/upload-artifact@v3
with:
name: windows-native
name: native-windows
path: |
node/dist/vectordb-win32*.tgz
node/dist/lancedb-vectordb-win32*.tgz
release:
needs: [node, node-macos, node-linux]
needs: [node, node-macos, node-linux, node-windows]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -170,3 +146,18 @@ jobs:
for filename in *.tgz; do
npm publish $filename
done
update-package-lock:
needs: [release]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
ref: main
persist-credentials: false
fetch-depth: 0
lfs: true
- uses: ./.github/workflows/update_package_lock
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}

View File

@@ -0,0 +1,33 @@
name: update_package_lock
description: "Update node's package.lock"
inputs:
github_token:
required: true
description: "github token for the repo"
runs:
using: "composite"
steps:
- uses: actions/setup-node@v3
with:
node-version: 20
- name: Set git configs
shell: bash
run: |
git config user.name 'Lance Release'
git config user.email 'lance-dev@lancedb.com'
- name: Update package-lock.json file
working-directory: ./node
run: |
npm install
git add package-lock.json
git commit -m "Updating package-lock.json"
shell: bash
- name: Push changes
if: ${{ inputs.dry_run }} == "false"
uses: ad-m/github-push-action@master
with:
github_token: ${{ inputs.github_token }}
branch: main
tags: true

View File

@@ -0,0 +1,19 @@
name: Update package-lock.json
on:
workflow_dispatch:
jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
ref: main
persist-credentials: false
fetch-depth: 0
lfs: true
- uses: ./.github/workflows/update_package_lock
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}

View File

@@ -6,11 +6,11 @@ members = [
resolver = "2"
[workspace.dependencies]
lance = "=0.5.8"
lance = "=0.5.9"
arrow-array = "42.0"
arrow-data = "42.0"
arrow-schema = "42.0"
arrow-ipc = "42.0"
half = { "version" = "2.2.1", default-features = false }
half = { "version" = "=2.2.1", default-features = false }
object_store = "0.6.1"

83
ci/build_linux_artifacts.sh Normal file → Executable file
View File

@@ -1,72 +1,19 @@
#!/bin/bash
# Builds the Linux artifacts (node binaries).
# Usage: ./build_linux_artifacts.sh [target]
# Targets supported:
# - x86_64-unknown-linux-gnu:centos
# - aarch64-unknown-linux-gnu:centos
# - aarch64-unknown-linux-musl
# - x86_64-unknown-linux-musl
# TODO: refactor this into a Docker container we can pull
set -e
ARCH=${1:-x86_64}
setup_dependencies() {
echo "Installing system dependencies..."
if [[ $1 == *musl ]]; then
# musllinux
apk add openssl-dev
else
# rust / debian
apt update
apt install -y libssl-dev protobuf-compiler
fi
}
# We pass down the current user so that when we later mount the local files
# into the container, the files are accessible by the current user.
pushd ci/manylinux_node
docker build \
-t lancedb-node-manylinux \
--build-arg="ARCH=$ARCH" \
--build-arg="DOCKER_USER=$(id -u)" \
--progress=plain \
.
popd
install_node() {
echo "Installing node..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
source "$HOME"/.bashrc
if [[ $1 == *musl ]]; then
# This node version is 15, we need 16 or higher:
# apk add nodejs-current npm
# So instead we install from source (nvm doesn't provide binaries for musl):
nvm install -s --no-progress 17
else
nvm install --no-progress 17 # latest that supports glibc 2.17
fi
}
build_node_binary() {
echo "Building node library for $1..."
pushd node
npm ci
if [[ $1 == *musl ]]; then
# This is needed for cargo to allow build cdylibs with musl
export RUSTFLAGS="-C target-feature=-crt-static"
fi
# Cargo can run out of memory while pulling dependencies, especially when running
# in QEMU. This is a workaround for that.
export CARGO_NET_GIT_FETCH_WITH_CLI=true
# We don't pass in target, since the native target here already matches
# We need to pass OPENSSL_LIB_DIR and OPENSSL_INCLUDE_DIR for static build to work https://github.com/sfackler/rust-openssl/issues/877
OPENSSL_STATIC=1 OPENSSL_LIB_DIR=/usr/lib/x86_64-linux-gnu OPENSSL_INCLUDE_DIR=/usr/include/openssl/ npm run build-release
npm run pack-build
popd
}
TARGET=${1:-x86_64-unknown-linux-gnu}
# Others:
# aarch64-unknown-linux-gnu
# x86_64-unknown-linux-musl
# aarch64-unknown-linux-musl
setup_dependencies $TARGET
install_node $TARGET
build_node_binary $TARGET
docker run \
-v $(pwd):/io -w /io \
lancedb-node-manylinux \
bash ci/manylinux_node/build.sh $ARCH

View File

@@ -0,0 +1,31 @@
# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
# This container allows building the node modules native libraries in an
# environment with a very old glibc, so that we are compatible with a wide
# range of linux distributions.
ARG ARCH=x86_64
FROM quay.io/pypa/manylinux2014_${ARCH}
ARG ARCH=x86_64
ARG DOCKER_USER=default_user
# Install static openssl
COPY install_openssl.sh install_openssl.sh
RUN ./install_openssl.sh ${ARCH} > /dev/null
# Protobuf is also installed as root.
COPY install_protobuf.sh install_protobuf.sh
RUN ./install_protobuf.sh ${ARCH}
ENV DOCKER_USER=${DOCKER_USER}
# Create a group and user
RUN echo ${ARCH} && adduser --user-group --create-home --uid ${DOCKER_USER} build_user
# We switch to the user to install Rust and Node, since those like to be
# installed at the user level.
USER ${DOCKER_USER}
COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
RUN cp /prepare_manylinux_node.sh $HOME/ && \
cd $HOME && \
./prepare_manylinux_node.sh ${ARCH}

19
ci/manylinux_node/build.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
set -e
ARCH=${1:-x86_64}
if [ "$ARCH" = "x86_64" ]; then
export OPENSSL_LIB_DIR=/usr/local/lib64/
else
export OPENSSL_LIB_DIR=/usr/local/lib/
fi
export OPENSSL_STATIC=1
export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
source $HOME/.bashrc
cd node
npm ci
npm run build-release
npm run pack-build

View File

@@ -0,0 +1,26 @@
#!/bin/bash
# Builds openssl from source so we can statically link to it
# this is to avoid the error we get with the system installation:
# /usr/bin/ld: <library>: version node not found for symbol SSLeay@@OPENSSL_1.0.1
# /usr/bin/ld: failed to set dynamic section sizes: Bad value
set -e
git clone -b OpenSSL_1_1_1u \
--single-branch \
https://github.com/openssl/openssl.git
pushd openssl
if [[ $1 == x86_64* ]]; then
ARCH=linux-x86_64
else
# gnu target
ARCH=linux-aarch64
fi
./Configure no-shared $ARCH
make
make install

View File

@@ -0,0 +1,15 @@
#!/bin/bash
# Installs protobuf compiler. Should be run as root.
set -e
if [[ $1 == x86_64* ]]; then
ARCH=x86_64
else
# gnu target
ARCH=aarch_64
fi
PB_REL=https://github.com/protocolbuffers/protobuf/releases
PB_VERSION=23.1
curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -e
install_node() {
echo "Installing node..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
source "$HOME"/.bashrc
nvm install --no-progress 16
}
install_rust() {
echo "Installing rust..."
curl https://sh.rustup.rs -sSf | bash -s -- -y
export PATH="$PATH:/root/.cargo/bin"
}
install_node
install_rust

View File

@@ -1,6 +1,6 @@
# Vector embedding search using TransformersJS
## Embed and query data from LacneDB using TransformersJS
## Embed and query data from LanceDB using TransformersJS
<img id="splash" width="400" alt="transformersjs" src="https://github.com/lancedb/lancedb/assets/43097991/88a31e30-3d6f-4eef-9216-4b7c688f1b4f">

View File

@@ -181,7 +181,7 @@
"id": "c3852dd3",
"metadata": {},
"source": [
"# Generating emebeddings from our docs\n",
"# Generating embeddings from our docs\n",
"\n",
"Now that we have our raw documents loaded, we need to pre-process them to generate embeddings:"
]

View File

@@ -17,7 +17,7 @@ const { currentTarget } = require('@neon-rs/load');
let nativeLib;
try {
nativeLib = require(`vectordb-${currentTarget()}`);
nativeLib = require(`@lancedb/vectordb-${currentTarget()}`);
} catch (e) {
try {
// Might be developing locally, so try that. But don't expose that error
@@ -25,7 +25,7 @@ try {
nativeLib = require("./index.node");
} catch {
throw new Error(`vectordb: failed to load native library.
You may need to run \`npm install vectordb-${currentTarget()}\`.
You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues

332
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.1.14",
"version": "0.1.18",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.1.14",
"version": "0.1.18",
"cpu": [
"x64",
"arm64"
@@ -24,7 +24,7 @@
"axios": "^1.4.0"
},
"devDependencies": {
"@neon-rs/cli": "^0.0.74",
"@neon-rs/cli": "^0.0.160",
"@types/chai": "^4.3.4",
"@types/chai-as-promised": "^7.1.5",
"@types/mocha": "^10.0.1",
@@ -51,11 +51,11 @@
"typescript": "*"
},
"optionalDependencies": {
"vectordb-darwin-arm64": "0.1.14",
"vectordb-darwin-x64": "0.1.14",
"vectordb-linux-arm64-gnu": "0.1.14",
"vectordb-linux-x64-gnu": "0.1.14",
"vectordb-win32-x64-msvc": "0.1.14"
"@lancedb/vectordb-darwin-arm64": "0.1.18",
"@lancedb/vectordb-darwin-x64": "0.1.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
"@lancedb/vectordb-linux-x64-gnu": "0.1.18",
"@lancedb/vectordb-win32-x64-msvc": "0.1.18"
}
},
"node_modules/@apache-arrow/ts": {
@@ -85,6 +85,97 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
"integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
},
"node_modules/@cargo-messages/android-arm-eabi": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/android-arm-eabi/-/android-arm-eabi-0.0.160.tgz",
"integrity": "sha512-PTgCEmBHEPKJbxwlHVXB3aGES+NqpeBvn6hJNYWIkET3ZQCSJnScMlIDQXEkWndK7J+hW3Or3H32a93B/MbbfQ==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"android"
]
},
"node_modules/@cargo-messages/darwin-arm64": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/darwin-arm64/-/darwin-arm64-0.0.160.tgz",
"integrity": "sha512-YSVUuc8TUTi/XmZVg9KrH0bDywKLqC1zeTyZYAYDDmqVDZW9KeTnbBUECKRs56iyHeO+kuEkVW7MKf7j2zb/FA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@cargo-messages/darwin-x64": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/darwin-x64/-/darwin-x64-0.0.160.tgz",
"integrity": "sha512-U+YlAR+9tKpBljnNPWMop5YhvtwfIPQSAaUYN2llteC7ZNU5/cv8CGT1vm7uFNxr2LeGuAtRbzIh2gUmTV8mng==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@cargo-messages/linux-arm-gnueabihf": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/linux-arm-gnueabihf/-/linux-arm-gnueabihf-0.0.160.tgz",
"integrity": "sha512-wqAelTzVv1E7Ls4aviqUbem5xjzCaJQxQtVnLhv6pf1k0UyEHCS2WdufFFmWcojGe7QglI4uve3KTe01MKYj0A==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"linux"
]
},
"node_modules/@cargo-messages/linux-x64-gnu": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/linux-x64-gnu/-/linux-x64-gnu-0.0.160.tgz",
"integrity": "sha512-LQ6e7O7YYkWfDNIi/53q2QG/+lZok72LOG+NKDVCrrY4TYUcrTqWAybOV6IlkVntKPnpx8YB95umSQGeVuvhpQ==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"linux"
]
},
"node_modules/@cargo-messages/win32-arm64-msvc": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/win32-arm64-msvc/-/win32-arm64-msvc-0.0.160.tgz",
"integrity": "sha512-VDMBhyun02gIDwmEhkYP1W9Z0tYqn4drgY5Iua1qV2tYOU58RVkWhzUYxM9rzYbnwKZlltgM46J/j5QZ3VaFrA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"win32"
]
},
"node_modules/@cargo-messages/win32-x64-msvc": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/win32-x64-msvc/-/win32-x64-msvc-0.0.160.tgz",
"integrity": "sha512-vnoglDxF6zj0W/Co9D0H/bgnrhUuO5EumIf9v3ujLtBH94rAX11JsXh/FgC/8wQnQSsLyWSq70YxNS2wdETxjA==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"win32"
]
},
"node_modules/@cspotcode/source-map-support": {
"version": "0.8.1",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
@@ -223,13 +314,82 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
"integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
"integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
"integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
"integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
"integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": {
"version": "0.0.74",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
"integrity": "sha512-GQjzHPJVTOARbX3nP/fAWqBq7JlQ8XgfYlCa+iwzIXf0LC1EyfJTX+vqGD/36b9lKoyY01Z/aDUB9o/qF6ztHA==",
"dev": true,
"bin": {
"neon": "index.js"
},
"optionalDependencies": {
"@cargo-messages/android-arm-eabi": "0.0.160",
"@cargo-messages/darwin-arm64": "0.0.160",
"@cargo-messages/darwin-x64": "0.0.160",
"@cargo-messages/linux-arm-gnueabihf": "0.0.160",
"@cargo-messages/linux-x64-gnu": "0.0.160",
"@cargo-messages/win32-arm64-msvc": "0.0.160",
"@cargo-messages/win32-x64-msvc": "0.0.160"
}
},
"node_modules/@neon-rs/load": {
@@ -4297,42 +4457,6 @@
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
"dev": true
},
"node_modules/vectordb-darwin-arm64": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.14.tgz",
"integrity": "sha512-5doSFMUR4scxseo73thCxScmO3Wpb+cqPsIa7+2uneTEtBSViMbkw/1mGTC+rV4NTCnxhoiqHk9pJzZVeDMkPg==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/vectordb-darwin-x64": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.14.tgz",
"integrity": "sha512-x+qVaKNhAG65HdENL6GRJjxl1hZ7erRm3a2rhplyYoQyzuRPPBILeWzxkE01G1fb0+47dehe7Q4f/8BDaghcCQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/vectordb-linux-x64-gnu": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.14.tgz",
"integrity": "sha512-hvA2YYwTZK92k6nPH99Jn5N0CwagDOdnwMmjtCpzFOEYK7dY/2kcTOoQNlBwwNP9MYvgN6jdFD/Cwkih1X/qjA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/vscode-oniguruma": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz",
@@ -4578,6 +4702,55 @@
}
}
},
"@cargo-messages/android-arm-eabi": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/android-arm-eabi/-/android-arm-eabi-0.0.160.tgz",
"integrity": "sha512-PTgCEmBHEPKJbxwlHVXB3aGES+NqpeBvn6hJNYWIkET3ZQCSJnScMlIDQXEkWndK7J+hW3Or3H32a93B/MbbfQ==",
"dev": true,
"optional": true
},
"@cargo-messages/darwin-arm64": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/darwin-arm64/-/darwin-arm64-0.0.160.tgz",
"integrity": "sha512-YSVUuc8TUTi/XmZVg9KrH0bDywKLqC1zeTyZYAYDDmqVDZW9KeTnbBUECKRs56iyHeO+kuEkVW7MKf7j2zb/FA==",
"dev": true,
"optional": true
},
"@cargo-messages/darwin-x64": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/darwin-x64/-/darwin-x64-0.0.160.tgz",
"integrity": "sha512-U+YlAR+9tKpBljnNPWMop5YhvtwfIPQSAaUYN2llteC7ZNU5/cv8CGT1vm7uFNxr2LeGuAtRbzIh2gUmTV8mng==",
"dev": true,
"optional": true
},
"@cargo-messages/linux-arm-gnueabihf": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/linux-arm-gnueabihf/-/linux-arm-gnueabihf-0.0.160.tgz",
"integrity": "sha512-wqAelTzVv1E7Ls4aviqUbem5xjzCaJQxQtVnLhv6pf1k0UyEHCS2WdufFFmWcojGe7QglI4uve3KTe01MKYj0A==",
"dev": true,
"optional": true
},
"@cargo-messages/linux-x64-gnu": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/linux-x64-gnu/-/linux-x64-gnu-0.0.160.tgz",
"integrity": "sha512-LQ6e7O7YYkWfDNIi/53q2QG/+lZok72LOG+NKDVCrrY4TYUcrTqWAybOV6IlkVntKPnpx8YB95umSQGeVuvhpQ==",
"dev": true,
"optional": true
},
"@cargo-messages/win32-arm64-msvc": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/win32-arm64-msvc/-/win32-arm64-msvc-0.0.160.tgz",
"integrity": "sha512-VDMBhyun02gIDwmEhkYP1W9Z0tYqn4drgY5Iua1qV2tYOU58RVkWhzUYxM9rzYbnwKZlltgM46J/j5QZ3VaFrA==",
"dev": true,
"optional": true
},
"@cargo-messages/win32-x64-msvc": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@cargo-messages/win32-x64-msvc/-/win32-x64-msvc-0.0.160.tgz",
"integrity": "sha512-vnoglDxF6zj0W/Co9D0H/bgnrhUuO5EumIf9v3ujLtBH94rAX11JsXh/FgC/8wQnQSsLyWSq70YxNS2wdETxjA==",
"dev": true,
"optional": true
},
"@cspotcode/source-map-support": {
"version": "0.8.1",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
@@ -4678,11 +4851,50 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"@lancedb/vectordb-darwin-arm64": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
"integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
"optional": true
},
"@lancedb/vectordb-darwin-x64": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
"integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
"optional": true
},
"@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
"integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
"optional": true
},
"@lancedb/vectordb-linux-x64-gnu": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
"integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
"optional": true
},
"@lancedb/vectordb-win32-x64-msvc": {
"version": "0.1.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
"integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
"optional": true
},
"@neon-rs/cli": {
"version": "0.0.74",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
"integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
"dev": true
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
"integrity": "sha512-GQjzHPJVTOARbX3nP/fAWqBq7JlQ8XgfYlCa+iwzIXf0LC1EyfJTX+vqGD/36b9lKoyY01Z/aDUB9o/qF6ztHA==",
"dev": true,
"requires": {
"@cargo-messages/android-arm-eabi": "0.0.160",
"@cargo-messages/darwin-arm64": "0.0.160",
"@cargo-messages/darwin-x64": "0.0.160",
"@cargo-messages/linux-arm-gnueabihf": "0.0.160",
"@cargo-messages/linux-x64-gnu": "0.0.160",
"@cargo-messages/win32-arm64-msvc": "0.0.160",
"@cargo-messages/win32-x64-msvc": "0.0.160"
}
},
"@neon-rs/load": {
"version": "0.0.74",
@@ -7638,24 +7850,6 @@
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
"dev": true
},
"vectordb-darwin-arm64": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.14.tgz",
"integrity": "sha512-5doSFMUR4scxseo73thCxScmO3Wpb+cqPsIa7+2uneTEtBSViMbkw/1mGTC+rV4NTCnxhoiqHk9pJzZVeDMkPg==",
"optional": true
},
"vectordb-darwin-x64": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.14.tgz",
"integrity": "sha512-x+qVaKNhAG65HdENL6GRJjxl1hZ7erRm3a2rhplyYoQyzuRPPBILeWzxkE01G1fb0+47dehe7Q4f/8BDaghcCQ==",
"optional": true
},
"vectordb-linux-x64-gnu": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.14.tgz",
"integrity": "sha512-hvA2YYwTZK92k6nPH99Jn5N0CwagDOdnwMmjtCpzFOEYK7dY/2kcTOoQNlBwwNP9MYvgN6jdFD/Cwkih1X/qjA==",
"optional": true
},
"vscode-oniguruma": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.1.14",
"version": "0.1.18",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -27,7 +27,7 @@
"author": "Lance Devs",
"license": "Apache-2.0",
"devDependencies": {
"@neon-rs/cli": "^0.0.74",
"@neon-rs/cli": "^0.0.160",
"@types/chai": "^4.3.4",
"@types/chai-as-promised": "^7.1.5",
"@types/mocha": "^10.0.1",
@@ -70,18 +70,18 @@
],
"neon": {
"targets": {
"x86_64-apple-darwin": "vectordb-darwin-x64",
"aarch64-apple-darwin": "vectordb-darwin-arm64",
"x86_64-unknown-linux-gnu": "vectordb-linux-x64-gnu",
"aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu",
"x86_64-pc-windows-msvc": "vectordb-win32-x64-msvc"
"x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
}
},
"optionalDependencies": {
"vectordb-darwin-arm64": "0.1.14",
"vectordb-darwin-x64": "0.1.14",
"vectordb-linux-arm64-gnu": "0.1.14",
"vectordb-linux-x64-gnu": "0.1.14",
"vectordb-win32-x64-msvc": "0.1.14"
"@lancedb/vectordb-darwin-arm64": "0.1.18",
"@lancedb/vectordb-darwin-x64": "0.1.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
"@lancedb/vectordb-linux-x64-gnu": "0.1.18",
"@lancedb/vectordb-win32-x64-msvc": "0.1.18"
}
}

View File

@@ -26,3 +26,9 @@ export interface EmbeddingFunction<T> {
*/
embed: (data: T[]) => Promise<number[][]>
}
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
return Object.keys(value).length === 2 &&
typeof value.sourceColumn === 'string' &&
typeof value.embed === 'function'
}

View File

@@ -20,10 +20,12 @@ import { fromRecordsToBuffer } from './arrow'
import type { EmbeddingFunction } from './embedding/embedding_function'
import { RemoteConnection } from './remote'
import { Query } from './query'
import { isEmbeddingFunction } from './embedding/embedding_function'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
export { Query }
export type { EmbeddingFunction }
export { OpenAIEmbeddingFunction } from './embedding/openai'
@@ -44,6 +46,9 @@ export interface ConnectionOptions {
apiKey?: string
// Region to connect
region?: string
// override the host for the remote connections
hostOverride?: string
}
/**
@@ -97,10 +102,35 @@ export interface Connection {
*
* @param {string} name - The name of the table.
* @param data - Non-empty Array of Records to be inserted into the table
* @param {WriteMode} mode - The write mode to use when creating the table.
*/
createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
/**
* Creates a new Table and initialize it with new data.
*
* @param {string} name - The name of the table.
* @param data - Non-empty Array of Records to be inserted into the table
* @param {WriteOptions} options - The write options to use when creating the table.
*/
createTable (name: string, data: Array<Record<string, unknown>>, options: WriteOptions): Promise<Table>
/**
* Creates a new Table and initialize it with new data.
*
* @param {string} name - The name of the table.
* @param data - Non-empty Array of Records to be inserted into the table
* @param {EmbeddingFunction} embeddings - An embedding function to use on this table
*/
createTable<T>(name: string, data: Array<Record<string, unknown>>, mode?: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
/**
* Creates a new Table and initialize it with new data.
*
* @param {string} name - The name of the table.
* @param data - Non-empty Array of Records to be inserted into the table
* @param {EmbeddingFunction} embeddings - An embedding function to use on this table
* @param {WriteOptions} options - The write options to use when creating the table.
*/
createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>, options: WriteOptions): Promise<Table<T>>
createTableArrow(name: string, table: ArrowTable): Promise<Table>
@@ -234,32 +264,19 @@ export class LocalConnection implements Connection {
}
}
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
* @param mode The write mode to use when creating the table.
*/
async createTable (name: string, data: Array<Record<string, unknown>>, mode?: WriteMode): Promise<Table>
async createTable (name: string, data: Array<Record<string, unknown>>, mode: WriteMode): Promise<Table>
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
* @param mode The write mode to use when creating the table.
* @param embeddings An embedding function to use on this Table
*/
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
if (mode === undefined) {
mode = WriteMode.Create
async createTable<T> (name: string, data: Array<Record<string, unknown>>, optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>, opt?: WriteOptions): Promise<Table<T>> {
let writeOptions: WriteOptions = new DefaultWriteOptions()
if (opt !== undefined && isWriteOptions(opt)) {
writeOptions = opt
} else if (optsOrEmbedding !== undefined && isWriteOptions(optsOrEmbedding)) {
writeOptions = optsOrEmbedding
}
const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), mode.toLowerCase()]
let embeddings: undefined | EmbeddingFunction<T>
if (optsOrEmbedding !== undefined && isEmbeddingFunction(optsOrEmbedding)) {
embeddings = optsOrEmbedding
}
const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), writeOptions.writeMode?.toString()]
if (this._options.awsCredentials !== undefined) {
createArgs.push(this._options.awsCredentials.accessKeyId)
createArgs.push(this._options.awsCredentials.secretKey)
@@ -456,6 +473,23 @@ export enum WriteMode {
Append = 'append'
}
/**
* Write options when creating a Table.
*/
export interface WriteOptions {
/** A {@link WriteMode} to use on this operation */
writeMode?: WriteMode
}
export class DefaultWriteOptions implements WriteOptions {
writeMode = WriteMode.Create
}
export function isWriteOptions (value: any): value is WriteOptions {
return Object.keys(value).length === 1 &&
(value.writeMode === undefined || typeof value.writeMode === 'string')
}
/**
* Distance metrics type.
*/

View File

@@ -18,9 +18,15 @@ import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
export class HttpLancedbClient {
private readonly _url: string
private readonly _apiKey: () => string
public constructor (url: string, private readonly _apiKey: string) {
public constructor (
url: string,
apiKey: string,
private readonly _dbName?: string
) {
this._url = url
this._apiKey = () => apiKey
}
get uri (): string {
@@ -37,7 +43,7 @@ export class HttpLancedbClient {
filter?: string
): Promise<ArrowTable<any>> {
const response = await axios.post(
`${this._url}/v1/table/${tableName}`,
`${this._url}/v1/table/${tableName}/query/`,
{
vector,
k,
@@ -49,7 +55,8 @@ export class HttpLancedbClient {
{
headers: {
'Content-Type': 'application/json',
'x-api-key': this._apiKey
'x-api-key': this._apiKey(),
...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
},
responseType: 'arraybuffer',
timeout: 10000
@@ -79,7 +86,7 @@ export class HttpLancedbClient {
{
headers: {
'Content-Type': 'application/json',
'x-api-key': this._apiKey
'x-api-key': this._apiKey()
},
params,
timeout: 10000

View File

@@ -37,8 +37,13 @@ export class RemoteConnection implements Connection {
}
this._dbName = opts.uri.slice('db://'.length)
const server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
this._client = new HttpLancedbClient(server, opts.apiKey)
let server: string
if (opts.hostOverride === undefined) {
server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
} else {
server = opts.hostOverride
}
this._client = new HttpLancedbClient(server, opts.apiKey, opts.hostOverride === undefined ? undefined : this._dbName)
}
get uri (): string {

View File

@@ -18,8 +18,7 @@ import * as chai from 'chai'
import * as chaiAsPromised from 'chai-as-promised'
import * as lancedb from '../index'
import { type AwsCredentials, type EmbeddingFunction, MetricType, WriteMode } from '../index'
import { Query } from '../query'
import { type AwsCredentials, type EmbeddingFunction, MetricType, Query, WriteMode, DefaultWriteOptions, isWriteOptions } from '../index'
const expect = chai.expect
const assert = chai.assert
@@ -145,7 +144,7 @@ describe('LanceDB client', function () {
]
const tableName = 'overwrite'
await con.createTable(tableName, data, WriteMode.Create)
await con.createTable(tableName, data, { writeMode: WriteMode.Create })
const newData = [
{ id: 1, vector: [0.1, 0.2], price: 10 },
@@ -155,7 +154,7 @@ describe('LanceDB client', function () {
await expect(con.createTable(tableName, newData)).to.be.rejectedWith(Error, 'already exists')
const table = await con.createTable(tableName, newData, WriteMode.Overwrite)
const table = await con.createTable(tableName, newData, { writeMode: WriteMode.Overwrite })
assert.equal(table.name, tableName)
assert.equal(await table.countRows(), 3)
})
@@ -260,7 +259,7 @@ describe('LanceDB client', function () {
{ price: 10, name: 'foo' },
{ price: 50, name: 'bar' }
]
const table = await con.createTable('vectors', data, WriteMode.Create, embeddings)
const table = await con.createTable('vectors', data, embeddings, { writeMode: WriteMode.Create })
const results = await table.search('foo').execute()
assert.equal(results.length, 2)
})
@@ -318,3 +317,20 @@ describe('Drop table', function () {
assert.deepEqual(await con.tableNames(), ['t2'])
})
})
describe('WriteOptions', function () {
context('#isWriteOptions', function () {
it('should not match empty object', function () {
assert.equal(isWriteOptions({}), false)
})
it('should match write options', function () {
assert.equal(isWriteOptions({ writeMode: WriteMode.Create }), true)
})
it('should match undefined write mode', function () {
assert.equal(isWriteOptions({ writeMode: undefined }), true)
})
it('should match default write options', function () {
assert.equal(isWriteOptions(new DefaultWriteOptions()), true)
})
})
})

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.12
current_version = 0.1.15
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -19,7 +19,11 @@ from .schema import vector
def connect(
uri: URI, *, api_key: Optional[str] = None, region: str = "us-west-2"
uri: URI,
*,
api_key: Optional[str] = None,
region: str = "us-west-2",
host_override: Optional[str] = None,
) -> DBConnection:
"""Connect to a LanceDB database.
@@ -55,5 +59,5 @@ def connect(
if isinstance(uri, str) and uri.startswith("db://"):
if api_key is None:
raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
return RemoteDBConnection(uri, api_key, region)
return RemoteDBConnection(uri, api_key, region, host_override)
return LanceDBConnection(uri)

View File

@@ -327,6 +327,6 @@ class LanceDBConnection(DBConnection):
name: str
The name of the table.
"""
filesystem, path = pa.fs.FileSystem.from_uri(self.uri)
filesystem, path = fs_from_uri(self.uri)
table_path = os.path.join(path, name + ".lance")
filesystem.delete_dir(table_path)

View File

@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pydantic adapter for LanceDB"""
"""Pydantic (v1 / v2) adapter for LanceDB"""
from __future__ import annotations
@@ -19,11 +19,19 @@ import inspect
import sys
import types
from abc import ABC, abstractmethod
from typing import Any, List, Type, Union, _GenericAlias
from typing import Any, Callable, Dict, Generator, List, Type, Union, _GenericAlias
import numpy as np
import pyarrow as pa
import pydantic
from pydantic_core import CoreSchema, core_schema
import semver
PYDANTIC_VERSION = semver.Version.parse(pydantic.__version__)
try:
from pydantic_core import CoreSchema, core_schema
except ImportError:
if PYDANTIC_VERSION >= (2,):
raise
class FixedSizeListMixin(ABC):
@@ -73,6 +81,9 @@ def vector(
# TODO: make a public parameterized type.
class FixedSizeList(list, FixedSizeListMixin):
def __repr__(self):
return f"FixedSizeList(dim={dim})"
@staticmethod
def dim() -> int:
return dim
@@ -94,6 +105,25 @@ def vector(
),
)
@classmethod
def __get_validators__(cls) -> Generator[Callable, None, None]:
yield cls.validate
# For pydantic v1
@classmethod
def validate(cls, v):
if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim:
raise TypeError("A list of numbers or numpy.ndarray is needed")
return v
if PYDANTIC_VERSION < (2, 0):
@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]):
field_schema["items"] = {"type": "number"}
field_schema["maxItems"] = dim
field_schema["minItems"] = dim
return FixedSizeList
@@ -120,11 +150,20 @@ def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType:
)
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
fields = []
for name, field in model.model_fields.items():
fields.append(_pydantic_to_field(name, field))
return fields
if PYDANTIC_VERSION.major < 2:
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
return [
_pydantic_to_field(name, field) for name, field in model.__fields__.items()
]
else:
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
return [
_pydantic_to_field(name, field)
for name, field in model.model_fields.items()
]
def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType:

View File

@@ -48,11 +48,16 @@ class RestfulLanceDBClient:
db_name: str
region: str
api_key: Credential
host_override: Optional[str] = attr.field(default=None)
closed: bool = attr.field(default=False, init=False)
@functools.cached_property
def session(self) -> aiohttp.ClientSession:
url = f"https://{self.db_name}.{self.region}.api.lancedb.com"
url = (
self.host_override
or f"https://{self.db_name}.{self.region}.api.lancedb.com"
)
return aiohttp.ClientSession(url)
async def close(self):
@@ -66,6 +71,8 @@ class RestfulLanceDBClient:
}
if self.region == "local": # Local test mode
headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
if self.host_override:
headers["x-lancedb-database"] = self.db_name
return headers
@staticmethod
@@ -98,7 +105,7 @@ class RestfulLanceDBClient:
async def post(
self,
uri: str,
data: Union[Dict[str, Any], BaseModel, bytes],
data: Optional[Union[Dict[str, Any], BaseModel, bytes]] = None,
params: Optional[Dict[str, Any]] = None,
content_type: Optional[str] = None,
deserialize: Callable = lambda resp: resp.json(),
@@ -141,5 +148,7 @@ class RestfulLanceDBClient:
@_check_not_closed
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
"""Query a table."""
tbl = await self.post(f"/v1/table/{table_name}/", query, deserialize=_read_ipc)
tbl = await self.post(
f"/v1/table/{table_name}/query/", query, deserialize=_read_ipc
)
return VectorQueryResult(tbl)

View File

@@ -13,7 +13,7 @@
import asyncio
import uuid
from typing import List
from typing import List, Optional
from urllib.parse import urlparse
import pyarrow as pa
@@ -30,14 +30,22 @@ from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
class RemoteDBConnection(DBConnection):
"""A connection to a remote LanceDB database."""
def __init__(self, db_url: str, api_key: str, region: str):
def __init__(
self,
db_url: str,
api_key: str,
region: str,
host_override: Optional[str] = None,
):
"""Connect to a remote LanceDB database."""
parsed = urlparse(db_url)
if parsed.scheme != "db":
raise ValueError(f"Invalid scheme: {parsed.scheme}, only accepts db://")
self.db_name = parsed.netloc
self.api_key = api_key
self._client = RestfulLanceDBClient(self.db_name, region, api_key)
self._client = RestfulLanceDBClient(
self.db_name, region, api_key, host_override
)
try:
self._loop = asyncio.get_running_loop()
except RuntimeError:
@@ -95,7 +103,7 @@ class RemoteDBConnection(DBConnection):
self._loop.run_until_complete(
self._client.post(
f"/v1/table/{name}/create",
f"/v1/table/{name}/create/",
data=data,
params={"request_id": request_id},
content_type=ARROW_STREAM_CONTENT_TYPE,

View File

@@ -33,13 +33,13 @@ class RemoteTable(Table):
self._name = name
def __repr__(self) -> str:
return f"RemoteTable({self._conn.db_name}.{self.name})"
return f"RemoteTable({self._conn.db_name}.{self._name})"
@cached_property
def schema(self) -> pa.Schema:
"""Return the schema of the table."""
resp = self._conn._loop.run_until_complete(
self._conn._client.get(f"/v1/table/{self._name}/describe")
self._conn._client.post(f"/v1/table/{self._name}/describe/")
)
schema = json_to_schema(resp["schema"])
return schema
@@ -73,7 +73,7 @@ class RemoteTable(Table):
self._conn._loop.run_until_complete(
self._conn._client.post(
f"/v1/table/{self._name}/insert",
f"/v1/table/{self._name}/insert/",
data=payload,
params={"request_id": request_id, "mode": mode},
content_type=ARROW_STREAM_CONTENT_TYPE,

View File

@@ -23,12 +23,12 @@ import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.fs
from lance import LanceDataset
from lance.vector import vec_to_table
from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
from .util import fs_from_uri
def _sanitize_data(data, schema, on_bad_vectors, fill_value):
@@ -527,7 +527,7 @@ class LanceTable(Table):
@classmethod
def open(cls, db, name):
tbl = cls(db, name)
fs, path = pa.fs.FileSystem.from_uri(tbl._dataset_uri)
fs, path = fs_from_uri(tbl._dataset_uri)
file_info = fs.get_file_info(path)
if file_info.type != pa.fs.FileType.Directory:
raise FileNotFoundError(

View File

@@ -71,7 +71,8 @@ def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
Get a PyArrow FileSystem from a URI, handling extra environment variables.
"""
if get_uri_scheme(uri) == "s3":
if os.environ["AWS_ENDPOINT"]:
uri += "?endpoint_override=" + os.environ["AWS_ENDPOINT"]
fs = pa_fs.S3FileSystem(endpoint_override=os.environ.get("AWS_ENDPOINT"))
path = get_uri_location(uri)
return fs, path
return pa_fs.FileSystem.from_uri(uri)

View File

@@ -1,7 +1,7 @@
[project]
name = "lancedb"
version = "0.1.12"
dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic>=2", "attr"]
version = "0.1.15"
dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
description = "lancedb"
authors = [
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
@@ -52,3 +52,6 @@ requires = [
"wheel",
]
build-backend = "setuptools.build_meta"
[tool.isort]
profile = "black"

View File

@@ -20,7 +20,7 @@ import pyarrow as pa
import pydantic
import pytest
from lancedb.pydantic import pydantic_to_schema, vector
from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
@pytest.mark.skipif(
@@ -111,10 +111,16 @@ def test_fixed_size_list_field():
li: List[int]
data = TestModel(vec=list(range(16)), li=[1, 2, 3])
assert json.loads(data.model_dump_json()) == {
"vec": list(range(16)),
"li": [1, 2, 3],
}
if PYDANTIC_VERSION >= (2,):
assert json.loads(data.model_dump_json()) == {
"vec": list(range(16)),
"li": [1, 2, 3],
}
else:
assert data.dict() == {
"vec": list(range(16)),
"li": [1, 2, 3],
}
schema = pydantic_to_schema(TestModel)
assert schema == pa.schema(
@@ -124,7 +130,11 @@ def test_fixed_size_list_field():
]
)
json_schema = TestModel.model_json_schema()
if PYDANTIC_VERSION >= (2,):
json_schema = TestModel.model_json_schema()
else:
json_schema = TestModel.schema()
assert json_schema == {
"properties": {
"vec": {

View File

@@ -1,6 +1,6 @@
[package]
name = "vectordb-node"
version = "0.1.14"
version = "0.1.18"
description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0"
edition = "2018"

View File

@@ -26,7 +26,7 @@ pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
.column_by_name("vector")
.cloned()
.expect("vector column is missing");
// TODO: we should just consume the underlaying js buffer in the future instead of this arrow around a bunch of times
// TODO: we should just consume the underlying js buffer in the future instead of this arrow around a bunch of times
let arr = as_list_array(column.as_ref());
let list_size = arr.values().len() / record_batch.num_rows();
let r =

View File

@@ -54,7 +54,7 @@ struct JsTable {
impl Finalize for JsTable {}
// TODO: object_store didn't export this type so I copied it.
// Make a requiest to object_store to export this type
// Make a request to object_store to export this type
#[derive(Debug)]
pub struct StaticCredentialProvider<T> {
credential: Arc<T>,

View File

@@ -1,6 +1,6 @@
[package]
name = "vectordb"
version = "0.1.14"
version = "0.1.18"
edition = "2021"
description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0"