mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
chore: remove vectordb package (#2564)
```shell git rm -r rust/ffi git rm -r node git rm ci/build_windows_artifacts.ps1 git rm ci/build_windows_artifacts_nodejs.ps1 git rm ci/build_linux_artifacts.sh git rm ci/build_macos_artifacts.sh git rm -r ci/manylinux_node git rm .github/workflows/node.yml ```
This commit is contained in:
@@ -50,11 +50,6 @@ pre_commit_hooks = [
|
|||||||
optional_value = "final"
|
optional_value = "final"
|
||||||
values = ["beta", "final"]
|
values = ["beta", "final"]
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
filename = "node/package.json"
|
|
||||||
replace = "\"version\": \"{new_version}\","
|
|
||||||
search = "\"version\": \"{current_version}\","
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
[[tool.bumpversion.files]]
|
||||||
filename = "nodejs/package.json"
|
filename = "nodejs/package.json"
|
||||||
replace = "\"version\": \"{new_version}\","
|
replace = "\"version\": \"{new_version}\","
|
||||||
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
|
|||||||
replace = "\"version\": \"{new_version}\","
|
replace = "\"version\": \"{new_version}\","
|
||||||
search = "\"version\": \"{current_version}\","
|
search = "\"version\": \"{current_version}\","
|
||||||
|
|
||||||
# vectodb node binary packages
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
|
|
||||||
|
|
||||||
# Cargo files
|
# Cargo files
|
||||||
# ------------
|
# ------------
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
filename = "rust/ffi/node/Cargo.toml"
|
|
||||||
replace = "\nversion = \"{new_version}\""
|
|
||||||
search = "\nversion = \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
[[tool.bumpversion.files]]
|
||||||
filename = "rust/lancedb/Cargo.toml"
|
filename = "rust/lancedb/Cargo.toml"
|
||||||
replace = "\nversion = \"{new_version}\""
|
replace = "\nversion = \"{new_version}\""
|
||||||
|
|||||||
147
.github/workflows/node.yml
vendored
147
.github/workflows/node.yml
vendored
@@ -1,147 +0,0 @@
|
|||||||
name: Node
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- node/**
|
|
||||||
- rust/ffi/node/**
|
|
||||||
- .github/workflows/node.yml
|
|
||||||
- docker-compose.yml
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
env:
|
|
||||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
|
||||||
# "1" means line tables only, which is useful for panic tracebacks.
|
|
||||||
#
|
|
||||||
# Use native CPU to accelerate tests if possible, especially for f16
|
|
||||||
# target-cpu=haswell fixes failing ci build
|
|
||||||
RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
|
|
||||||
RUST_BACKTRACE: "1"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
linux:
|
|
||||||
name: Linux (Node ${{ matrix.node-version }})
|
|
||||||
timeout-minutes: 30
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
node-version: [ "18", "20" ]
|
|
||||||
runs-on: "ubuntu-22.04"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: ${{ matrix.node-version }}
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: npm run test
|
|
||||||
macos:
|
|
||||||
timeout-minutes: 30
|
|
||||||
runs-on: "macos-13"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: brew install protobuf
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: |
|
|
||||||
npm run test
|
|
||||||
aws-integtest:
|
|
||||||
timeout-minutes: 45
|
|
||||||
runs-on: "ubuntu-22.04"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
env:
|
|
||||||
AWS_ACCESS_KEY_ID: ACCESSKEY
|
|
||||||
AWS_SECRET_ACCESS_KEY: SECRETKEY
|
|
||||||
AWS_DEFAULT_REGION: us-west-2
|
|
||||||
# this one is for s3
|
|
||||||
AWS_ENDPOINT: http://localhost:4566
|
|
||||||
# this one is for dynamodb
|
|
||||||
DYNAMODB_ENDPOINT: http://localhost:4566
|
|
||||||
ALLOW_HTTP: true
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- name: start local stack
|
|
||||||
run: docker compose -f ../docker-compose.yml up -d --wait
|
|
||||||
- name: create s3
|
|
||||||
run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
|
|
||||||
- name: create ddb
|
|
||||||
run: |
|
|
||||||
aws dynamodb create-table \
|
|
||||||
--table-name lancedb-integtest \
|
|
||||||
--attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
|
|
||||||
--key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
|
|
||||||
--provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
|
|
||||||
--endpoint-url $DYNAMODB_ENDPOINT
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: npm run integration-test
|
|
||||||
197
.github/workflows/npm-publish.yml
vendored
197
.github/workflows/npm-publish.yml
vendored
@@ -365,200 +365,3 @@ jobs:
|
|||||||
ARGS="$ARGS --tag preview"
|
ARGS="$ARGS --tag preview"
|
||||||
fi
|
fi
|
||||||
npm publish $ARGS
|
npm publish $ARGS
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
# vectordb release (legacy)
|
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
# TODO: delete this when we drop vectordb
|
|
||||||
node:
|
|
||||||
name: vectordb Typescript
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: "npm"
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run tsc
|
|
||||||
npm pack
|
|
||||||
- name: Upload Linux Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-package
|
|
||||||
path: |
|
|
||||||
node/vectordb-*.tgz
|
|
||||||
|
|
||||||
node-macos:
|
|
||||||
name: vectordb ${{ matrix.config.arch }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- arch: x86_64-apple-darwin
|
|
||||||
runner: macos-13
|
|
||||||
- arch: aarch64-apple-darwin
|
|
||||||
# xlarge is implicitly arm64.
|
|
||||||
runner: macos-14
|
|
||||||
runs-on: ${{ matrix.config.runner }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install system dependencies
|
|
||||||
run: brew install protobuf
|
|
||||||
- name: Install npm dependencies
|
|
||||||
run: |
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
- name: Build MacOS native node modules
|
|
||||||
run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
|
|
||||||
- name: Upload Darwin Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-darwin-${{ matrix.config.arch }}
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-darwin*.tgz
|
|
||||||
|
|
||||||
node-linux-gnu:
|
|
||||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
|
|
||||||
runs-on: ${{ matrix.config.runner }}
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- arch: x86_64
|
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: warp-ubuntu-latest-arm64-4x
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
# To avoid OOM errors on ARM, we create a swap file.
|
|
||||||
- name: Configure aarch64 build
|
|
||||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
sudo fallocate -l 16G /swapfile
|
|
||||||
sudo chmod 600 /swapfile
|
|
||||||
sudo mkswap /swapfile
|
|
||||||
sudo swapon /swapfile
|
|
||||||
echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
|
|
||||||
# print info
|
|
||||||
swapon --show
|
|
||||||
free -h
|
|
||||||
- name: Build Linux Artifacts
|
|
||||||
run: |
|
|
||||||
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
|
|
||||||
- name: Upload Linux Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-linux-${{ matrix.config.arch }}-gnu
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-linux*.tgz
|
|
||||||
|
|
||||||
node-windows:
|
|
||||||
name: vectordb ${{ matrix.target }}
|
|
||||||
runs-on: windows-2022
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
target: [x86_64-pc-windows-msvc]
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Protoc v21.12
|
|
||||||
working-directory: C:\
|
|
||||||
run: |
|
|
||||||
New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
Set-Location C:\protoc
|
|
||||||
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
7z x protoc.zip
|
|
||||||
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
shell: powershell
|
|
||||||
- name: Install npm dependencies
|
|
||||||
run: |
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
- name: Build Windows native node modules
|
|
||||||
run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
|
|
||||||
- name: Upload Windows Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-windows
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-win32*.tgz
|
|
||||||
|
|
||||||
release:
|
|
||||||
name: vectordb NPM Publish
|
|
||||||
needs: [node, node-macos, node-linux-gnu, node-windows]
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
# Only runs on tags that matches the make-release action
|
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
|
||||||
steps:
|
|
||||||
- uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
pattern: node-*
|
|
||||||
- name: Display structure of downloaded files
|
|
||||||
run: ls -R
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
registry-url: "https://registry.npmjs.org"
|
|
||||||
- name: Publish to NPM
|
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
|
||||||
run: |
|
|
||||||
# Tag beta as "preview" instead of default "latest". See lancedb
|
|
||||||
# npm publish step for more info.
|
|
||||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
|
||||||
PUBLISH_ARGS="--tag preview"
|
|
||||||
fi
|
|
||||||
|
|
||||||
mv */*.tgz .
|
|
||||||
for filename in *.tgz; do
|
|
||||||
npm publish $PUBLISH_ARGS $filename
|
|
||||||
done
|
|
||||||
- name: Deprecate
|
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
|
||||||
# We need to deprecate the old package to avoid confusion.
|
|
||||||
# Each time we publish a new version, it gets undeprecated.
|
|
||||||
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: main
|
|
||||||
- name: Update package-lock.json
|
|
||||||
run: |
|
|
||||||
git config user.name 'Lance Release'
|
|
||||||
git config user.email 'lance-dev@lancedb.com'
|
|
||||||
bash ci/update_lockfiles.sh
|
|
||||||
- name: Push new commit
|
|
||||||
uses: ad-m/github-push-action@master
|
|
||||||
with:
|
|
||||||
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
|
||||||
branch: main
|
|
||||||
- name: Notify Slack Action
|
|
||||||
uses: ravsamhq/notify-slack-action@2.3.0
|
|
||||||
if: ${{ always() }}
|
|
||||||
with:
|
|
||||||
status: ${{ job.status }}
|
|
||||||
notify_when: "failure"
|
|
||||||
notification_title: "{workflow} is failing"
|
|
||||||
env:
|
|
||||||
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
|
||||||
|
|||||||
129
Cargo.lock
generated
129
Cargo.lock
generated
@@ -1480,7 +1480,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"glob",
|
"glob",
|
||||||
"libc",
|
"libc",
|
||||||
"libloading 0.8.8",
|
"libloading",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1573,15 +1573,6 @@ version = "0.3.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
|
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "conv"
|
|
||||||
version = "0.3.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299"
|
|
||||||
dependencies = [
|
|
||||||
"custom_derive",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "convert_case"
|
name = "convert_case"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
@@ -1797,12 +1788,6 @@ dependencies = [
|
|||||||
"syn 2.0.103",
|
"syn 2.0.103",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "custom_derive"
|
|
||||||
version = "0.1.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling"
|
name = "darling"
|
||||||
version = "0.20.11"
|
version = "0.20.11"
|
||||||
@@ -4430,7 +4415,7 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"rstest",
|
"rstest",
|
||||||
"semver 1.0.26",
|
"semver",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_with",
|
"serde_with",
|
||||||
@@ -4459,31 +4444,6 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lancedb-node"
|
|
||||||
version = "0.21.2"
|
|
||||||
dependencies = [
|
|
||||||
"arrow-array",
|
|
||||||
"arrow-ipc",
|
|
||||||
"arrow-schema",
|
|
||||||
"async-trait",
|
|
||||||
"chrono",
|
|
||||||
"conv",
|
|
||||||
"env_logger",
|
|
||||||
"futures",
|
|
||||||
"half",
|
|
||||||
"lance",
|
|
||||||
"lance-index",
|
|
||||||
"lance-linalg",
|
|
||||||
"lancedb",
|
|
||||||
"lzma-sys",
|
|
||||||
"neon",
|
|
||||||
"object_store",
|
|
||||||
"once_cell",
|
|
||||||
"snafu",
|
|
||||||
"tokio",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.21.2"
|
version = "0.21.2"
|
||||||
@@ -4607,16 +4567,6 @@ version = "0.2.174"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libloading"
|
|
||||||
version = "0.6.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libloading"
|
name = "libloading"
|
||||||
version = "0.8.8"
|
version = "0.8.8"
|
||||||
@@ -4995,7 +4945,7 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"regex",
|
"regex",
|
||||||
"semver 1.0.26",
|
"semver",
|
||||||
"syn 2.0.103",
|
"syn 2.0.103",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -5005,48 +4955,7 @@ version = "2.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3"
|
checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libloading 0.8.8",
|
"libloading",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "neon"
|
|
||||||
version = "0.10.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "28e15415261d880aed48122e917a45e87bb82cf0260bb6db48bbab44b7464373"
|
|
||||||
dependencies = [
|
|
||||||
"neon-build",
|
|
||||||
"neon-macros",
|
|
||||||
"neon-runtime",
|
|
||||||
"semver 0.9.0",
|
|
||||||
"smallvec",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "neon-build"
|
|
||||||
version = "0.10.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8bac98a702e71804af3dacfde41edde4a16076a7bbe889ae61e56e18c5b1c811"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "neon-macros"
|
|
||||||
version = "0.10.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b7288eac8b54af7913c60e0eb0e2a7683020dffa342ab3fd15e28f035ba897cf"
|
|
||||||
dependencies = [
|
|
||||||
"quote",
|
|
||||||
"syn 1.0.109",
|
|
||||||
"syn-mid",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "neon-runtime"
|
|
||||||
version = "0.10.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4676720fa8bb32c64c3d9f49c47a47289239ec46b4bdb66d0913cc512cb0daca"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"libloading 0.6.7",
|
|
||||||
"smallvec",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6728,7 +6637,7 @@ version = "0.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
|
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"semver 1.0.26",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6993,27 +6902,12 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "semver"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
|
||||||
dependencies = [
|
|
||||||
"semver-parser",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "semver"
|
name = "semver"
|
||||||
version = "1.0.26"
|
version = "1.0.26"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
|
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "semver-parser"
|
|
||||||
version = "0.7.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "seq-macro"
|
name = "seq-macro"
|
||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
@@ -7413,17 +7307,6 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn-mid"
|
|
||||||
version = "0.5.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "fea305d57546cc8cd04feb14b62ec84bf17f50e3f7b12560d7bfa9265f39d9ed"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn 1.0.109",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sync_wrapper"
|
name = "sync_wrapper"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
@@ -8059,7 +7942,7 @@ checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"gemm 0.18.2",
|
"gemm 0.18.2",
|
||||||
"half",
|
"half",
|
||||||
"libloading 0.8.8",
|
"libloading",
|
||||||
"memmap2 0.9.5",
|
"memmap2 0.9.5",
|
||||||
"num",
|
"num",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"rust/ffi/node",
|
|
||||||
"rust/lancedb",
|
"rust/lancedb",
|
||||||
"nodejs",
|
"nodejs",
|
||||||
"python",
|
"python",
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
ARCH=${1:-x86_64}
|
|
||||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
|
||||||
|
|
||||||
# We pass down the current user so that when we later mount the local files
|
|
||||||
# into the container, the files are accessible by the current user.
|
|
||||||
pushd ci/manylinux_node
|
|
||||||
docker build \
|
|
||||||
-t lancedb-node-manylinux \
|
|
||||||
--build-arg="ARCH=$ARCH" \
|
|
||||||
--build-arg="DOCKER_USER=$(id -u)" \
|
|
||||||
--progress=plain \
|
|
||||||
.
|
|
||||||
popd
|
|
||||||
|
|
||||||
# We turn on memory swap to avoid OOM killer
|
|
||||||
docker run \
|
|
||||||
-v $(pwd):/io -w /io \
|
|
||||||
--memory-swap=-1 \
|
|
||||||
lancedb-node-manylinux \
|
|
||||||
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
# Builds the macOS artifacts (node binaries).
|
|
||||||
# Usage: ./ci/build_macos_artifacts.sh [target]
|
|
||||||
# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
|
|
||||||
set -e
|
|
||||||
|
|
||||||
prebuild_rust() {
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
pushd rust/ffi/node
|
|
||||||
echo "Building rust library for $1"
|
|
||||||
export RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $1
|
|
||||||
popd
|
|
||||||
}
|
|
||||||
|
|
||||||
build_node_binaries() {
|
|
||||||
pushd node
|
|
||||||
echo "Building node library for $1"
|
|
||||||
npm run build-release -- --target $1
|
|
||||||
npm run pack-build -- --target $1
|
|
||||||
popd
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ -n "$1" ]; then
|
|
||||||
targets=$1
|
|
||||||
else
|
|
||||||
targets="x86_64-apple-darwin aarch64-apple-darwin"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Building artifacts for targets: $targets"
|
|
||||||
for target in $targets
|
|
||||||
do
|
|
||||||
prebuild_rust $target
|
|
||||||
build_node_binaries $target
|
|
||||||
done
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
# Builds the Windows artifacts (node binaries).
|
|
||||||
# Usage: .\ci\build_windows_artifacts.ps1 [target]
|
|
||||||
# Targets supported:
|
|
||||||
# - x86_64-pc-windows-msvc
|
|
||||||
# - i686-pc-windows-msvc
|
|
||||||
# - aarch64-pc-windows-msvc
|
|
||||||
|
|
||||||
function Prebuild-Rust {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
Push-Location -Path "rust/ffi/node"
|
|
||||||
Write-Host "Building rust library for $target"
|
|
||||||
$env:RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
function Build-NodeBinaries {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
Push-Location -Path "node"
|
|
||||||
Write-Host "Building node library for $target"
|
|
||||||
npm run build-release -- --target $target
|
|
||||||
npm run pack-build -- --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
$targets = $args[0]
|
|
||||||
if (-not $targets) {
|
|
||||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Host "Building artifacts for targets: $targets"
|
|
||||||
foreach ($target in $targets) {
|
|
||||||
Prebuild-Rust $target
|
|
||||||
Build-NodeBinaries $target
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
# Builds the Windows artifacts (nodejs binaries).
|
|
||||||
# Usage: .\ci\build_windows_artifacts_nodejs.ps1 [target]
|
|
||||||
# Targets supported:
|
|
||||||
# - x86_64-pc-windows-msvc
|
|
||||||
# - i686-pc-windows-msvc
|
|
||||||
# - aarch64-pc-windows-msvc
|
|
||||||
|
|
||||||
function Prebuild-Rust {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
Push-Location -Path "rust/lancedb"
|
|
||||||
Write-Host "Building rust library for $target"
|
|
||||||
$env:RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
function Build-NodeBinaries {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
Push-Location -Path "nodejs"
|
|
||||||
Write-Host "Building nodejs library for $target"
|
|
||||||
$env:RUST_TARGET=$target
|
|
||||||
npm run build-release
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
$targets = $args[0]
|
|
||||||
if (-not $targets) {
|
|
||||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Host "Building artifacts for targets: $targets"
|
|
||||||
foreach ($target in $targets) {
|
|
||||||
Prebuild-Rust $target
|
|
||||||
Build-NodeBinaries $target
|
|
||||||
}
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
|
|
||||||
# This container allows building the node modules native libraries in an
|
|
||||||
# environment with a very old glibc, so that we are compatible with a wide
|
|
||||||
# range of linux distributions.
|
|
||||||
ARG ARCH=x86_64
|
|
||||||
|
|
||||||
FROM quay.io/pypa/manylinux_2_28_${ARCH}
|
|
||||||
|
|
||||||
ARG ARCH=x86_64
|
|
||||||
ARG DOCKER_USER=default_user
|
|
||||||
|
|
||||||
# Protobuf is also installed as root.
|
|
||||||
COPY install_protobuf.sh install_protobuf.sh
|
|
||||||
RUN ./install_protobuf.sh ${ARCH}
|
|
||||||
|
|
||||||
ENV DOCKER_USER=${DOCKER_USER}
|
|
||||||
# Create a group and user, but only if it doesn't exist
|
|
||||||
RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
|
|
||||||
|
|
||||||
# We switch to the user to install Rust and Node, since those like to be
|
|
||||||
# installed at the user level.
|
|
||||||
USER ${DOCKER_USER}
|
|
||||||
|
|
||||||
COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
|
|
||||||
RUN cp /prepare_manylinux_node.sh $HOME/ && \
|
|
||||||
cd $HOME && \
|
|
||||||
./prepare_manylinux_node.sh ${ARCH}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
|
|
||||||
set -e
|
|
||||||
ARCH=${1:-x86_64}
|
|
||||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
|
||||||
|
|
||||||
#Alpine doesn't have .bashrc
|
|
||||||
FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
|
||||||
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
npm run build-release
|
|
||||||
npm run pack-build -- -t $TARGET_TRIPLE
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Installs protobuf compiler. Should be run as root.
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [[ $1 == x86_64* ]]; then
|
|
||||||
ARCH=x86_64
|
|
||||||
else
|
|
||||||
# gnu target
|
|
||||||
ARCH=aarch_64
|
|
||||||
fi
|
|
||||||
|
|
||||||
PB_REL=https://github.com/protocolbuffers/protobuf/releases
|
|
||||||
PB_VERSION=23.1
|
|
||||||
curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
|
|
||||||
unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
install_node() {
|
|
||||||
echo "Installing node..."
|
|
||||||
|
|
||||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
|
|
||||||
|
|
||||||
source "$HOME"/.bashrc
|
|
||||||
|
|
||||||
nvm install --no-progress 18
|
|
||||||
}
|
|
||||||
|
|
||||||
install_rust() {
|
|
||||||
echo "Installing rust..."
|
|
||||||
curl https://sh.rustup.rs -sSf | bash -s -- -y
|
|
||||||
export PATH="$PATH:/root/.cargo/bin"
|
|
||||||
}
|
|
||||||
|
|
||||||
install_node
|
|
||||||
install_rust
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
module.exports = {
|
|
||||||
env: {
|
|
||||||
browser: true,
|
|
||||||
es2021: true
|
|
||||||
},
|
|
||||||
extends: 'standard-with-typescript',
|
|
||||||
overrides: [
|
|
||||||
],
|
|
||||||
parserOptions: {
|
|
||||||
project: './tsconfig.json',
|
|
||||||
ecmaVersion: 'latest',
|
|
||||||
sourceType: 'module'
|
|
||||||
},
|
|
||||||
rules: {
|
|
||||||
"@typescript-eslint/method-signature-style": "off",
|
|
||||||
"@typescript-eslint/quotes": "off",
|
|
||||||
"@typescript-eslint/semi": "off",
|
|
||||||
"@typescript-eslint/explicit-function-return-type": "off",
|
|
||||||
"@typescript-eslint/space-before-function-paren": "off",
|
|
||||||
"@typescript-eslint/indent": "off",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
gen_test_data.py
|
|
||||||
index.node
|
|
||||||
dist/lancedb*.tgz
|
|
||||||
vectordb*.tgz
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
# Changelog
|
|
||||||
|
|
||||||
All notable changes to this project will be documented in this file.
|
|
||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
||||||
|
|
||||||
## [0.1.5] - 2023-06-00
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Support for macOS X86
|
|
||||||
|
|
||||||
## [0.1.4] - 2023-06-03
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Select / Project query API
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Deprecated created_index in favor of createIndex
|
|
||||||
|
|
||||||
## [0.1.3] - 2023-06-01
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Support S3 and Google Cloud Storage
|
|
||||||
- Embedding functions support
|
|
||||||
- OpenAI embedding function
|
|
||||||
|
|
||||||
## [0.1.2] - 2023-05-27
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Append records API
|
|
||||||
- Extra query params to to nodejs client
|
|
||||||
- Create_index API
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- bugfix: string columns should be converted to Utf8Array (#94)
|
|
||||||
|
|
||||||
## [0.1.1] - 2023-05-16
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- create_table API
|
|
||||||
- limit parameter for queries
|
|
||||||
- Typescript / JavaScript examples
|
|
||||||
- Linux support
|
|
||||||
|
|
||||||
## [0.1.0] - 2023-05-16
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Initial JavaScript / Node.js library for LanceDB
|
|
||||||
- Read-only api to query LanceDB datasets
|
|
||||||
- Supports macOS arm only
|
|
||||||
|
|
||||||
## [pre-0.1.0]
|
|
||||||
|
|
||||||
- Various prototypes / test builds
|
|
||||||
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
# LanceDB
|
|
||||||
|
|
||||||
A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
|
|
||||||
|
|
||||||
**DEPRECATED: This library is deprecated. Please use the new client,
|
|
||||||
[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install vectordb
|
|
||||||
```
|
|
||||||
|
|
||||||
This will download the appropriate native library for your platform. We currently
|
|
||||||
support:
|
|
||||||
|
|
||||||
* Linux (x86_64 and aarch64)
|
|
||||||
* MacOS (Intel and ARM/M1/M2)
|
|
||||||
* Windows (x86_64 only)
|
|
||||||
|
|
||||||
We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Basic Example
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
const lancedb = require('vectordb');
|
|
||||||
const db = await lancedb.connect('data/sample-lancedb');
|
|
||||||
const table = await db.createTable("my_table",
|
|
||||||
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
|
||||||
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
|
||||||
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
|
||||||
console.log(results);
|
|
||||||
```
|
|
||||||
|
|
||||||
The [examples](./examples) folder contains complete examples.
|
|
||||||
|
|
||||||
## Development
|
|
||||||
|
|
||||||
To build everything fresh:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install
|
|
||||||
npm run build
|
|
||||||
```
|
|
||||||
|
|
||||||
Then you should be able to run the tests with:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm test
|
|
||||||
```
|
|
||||||
|
|
||||||
### Fix lints
|
|
||||||
|
|
||||||
To run the linter and have it automatically fix all errors
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run lint -- --fix
|
|
||||||
```
|
|
||||||
|
|
||||||
To build documentation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
|
||||||
```
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
'use strict'
|
|
||||||
|
|
||||||
async function example () {
|
|
||||||
const lancedb = require('vectordb')
|
|
||||||
// You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
|
|
||||||
const apiKey = process.env.OPENAI_API_KEY
|
|
||||||
// The embedding function will create embeddings for the 'text' column(text in this case)
|
|
||||||
const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
|
|
||||||
|
|
||||||
const db = await lancedb.connect('data/sample-lancedb')
|
|
||||||
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: 'Black T-Shirt', price: 10 },
|
|
||||||
{ id: 2, text: 'Leather Jacket', price: 50 }
|
|
||||||
]
|
|
||||||
|
|
||||||
const table = await db.createTable('vectors', data, embedding)
|
|
||||||
console.log(await db.tableNames())
|
|
||||||
|
|
||||||
const results = await table
|
|
||||||
.search('keeps me warm')
|
|
||||||
.limit(1)
|
|
||||||
.execute()
|
|
||||||
console.log(results[0].text)
|
|
||||||
}
|
|
||||||
|
|
||||||
example().then(_ => { console.log('All done!') })
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb-example-js-openai",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"dependencies": {
|
|
||||||
"vectordb": "file:../..",
|
|
||||||
"openai": "^3.2.1"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
'use strict'
|
|
||||||
|
|
||||||
|
|
||||||
async function example() {
|
|
||||||
|
|
||||||
const lancedb = require('vectordb')
|
|
||||||
|
|
||||||
// Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
|
|
||||||
const { pipeline } = await import('@xenova/transformers')
|
|
||||||
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
||||||
|
|
||||||
|
|
||||||
// Create embedding function from pipeline which returns a list of vectors from batch
|
|
||||||
// sourceColumn is the name of the column in the data to be embedded
|
|
||||||
//
|
|
||||||
// Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
|
|
||||||
const embed_fun = {}
|
|
||||||
embed_fun.sourceColumn = 'text'
|
|
||||||
embed_fun.embed = async function (batch) {
|
|
||||||
let result = []
|
|
||||||
for (let text of batch) {
|
|
||||||
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
|
||||||
result.push(Array.from(res['data']))
|
|
||||||
}
|
|
||||||
return (result)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Link a folder and create a table with data
|
|
||||||
const db = await lancedb.connect('data/sample-lancedb')
|
|
||||||
|
|
||||||
const data = [
|
|
||||||
{ id: 1, text: 'Cherry', type: 'fruit' },
|
|
||||||
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
|
||||||
{ id: 3, text: 'Potato', type: 'vegetable' },
|
|
||||||
{ id: 4, text: 'Apple', type: 'fruit' },
|
|
||||||
{ id: 5, text: 'Banana', type: 'fruit' }
|
|
||||||
]
|
|
||||||
|
|
||||||
const table = await db.createTable('food_table', data, embed_fun)
|
|
||||||
|
|
||||||
|
|
||||||
// Query the table
|
|
||||||
const results = await table
|
|
||||||
.search("a sweet fruit to eat")
|
|
||||||
.metricType("cosine")
|
|
||||||
.limit(2)
|
|
||||||
.execute()
|
|
||||||
console.log(results.map(r => r.text))
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
example().then(_ => { console.log("Done!") })
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb-example-js-transformers",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "Example for using transformers.js with lancedb",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"dependencies": {
|
|
||||||
"@xenova/transformers": "^2.4.1",
|
|
||||||
"vectordb": "file:../.."
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,122 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
'use strict'
|
|
||||||
|
|
||||||
const lancedb = require('vectordb')
|
|
||||||
const fs = require('fs/promises')
|
|
||||||
const readline = require('readline/promises')
|
|
||||||
const { stdin: input, stdout: output } = require('process')
|
|
||||||
const { Configuration, OpenAIApi } = require('openai')
|
|
||||||
|
|
||||||
// Download file from XYZ
|
|
||||||
const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
|
|
||||||
|
|
||||||
(async () => {
|
|
||||||
// You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
|
|
||||||
const apiKey = process.env.OPENAI_API_KEY
|
|
||||||
// The embedding function will create embeddings for the 'context' column
|
|
||||||
const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
|
|
||||||
|
|
||||||
// Connects to LanceDB
|
|
||||||
const db = await lancedb.connect('data/youtube-lancedb')
|
|
||||||
|
|
||||||
// Open the vectors table or create one if it does not exist
|
|
||||||
let tbl
|
|
||||||
if ((await db.tableNames()).includes('vectors')) {
|
|
||||||
tbl = await db.openTable('vectors', embedFunction)
|
|
||||||
} else {
|
|
||||||
tbl = await createEmbeddingsTable(db, embedFunction)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
|
|
||||||
const configuration = new Configuration({ apiKey })
|
|
||||||
const openai = new OpenAIApi(configuration)
|
|
||||||
const rl = readline.createInterface({ input, output })
|
|
||||||
try {
|
|
||||||
while (true) {
|
|
||||||
const query = await rl.question('Prompt: ')
|
|
||||||
const results = await tbl
|
|
||||||
.search(query)
|
|
||||||
.select(['title', 'text', 'context'])
|
|
||||||
.limit(3)
|
|
||||||
.execute()
|
|
||||||
|
|
||||||
// console.table(results)
|
|
||||||
|
|
||||||
const response = await openai.createCompletion({
|
|
||||||
model: 'text-davinci-003',
|
|
||||||
prompt: createPrompt(query, results),
|
|
||||||
max_tokens: 400,
|
|
||||||
temperature: 0,
|
|
||||||
top_p: 1,
|
|
||||||
frequency_penalty: 0,
|
|
||||||
presence_penalty: 0
|
|
||||||
})
|
|
||||||
console.log(response.data.choices[0].text)
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.log('Error: ', err)
|
|
||||||
} finally {
|
|
||||||
rl.close()
|
|
||||||
}
|
|
||||||
process.exit(1)
|
|
||||||
})()
|
|
||||||
|
|
||||||
async function createEmbeddingsTable (db, embedFunction) {
|
|
||||||
console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
|
|
||||||
// read the input file into a JSON array, skipping empty lines
|
|
||||||
const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
|
|
||||||
.toString()
|
|
||||||
.split('\n')
|
|
||||||
.filter(line => line.length > 0)
|
|
||||||
.map(line => JSON.parse(line))
|
|
||||||
|
|
||||||
const data = contextualize(lines, 20, 'video_id')
|
|
||||||
return await db.createTable('vectors', data, embedFunction)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Each transcript has a small text column, we include previous transcripts in order to
|
|
||||||
// have more context information when creating embeddings
|
|
||||||
function contextualize (rows, contextSize, groupColumn) {
|
|
||||||
const grouped = []
|
|
||||||
rows.forEach(row => {
|
|
||||||
if (!grouped[row[groupColumn]]) {
|
|
||||||
grouped[row[groupColumn]] = []
|
|
||||||
}
|
|
||||||
grouped[row[groupColumn]].push(row)
|
|
||||||
})
|
|
||||||
|
|
||||||
const data = []
|
|
||||||
Object.keys(grouped).forEach(key => {
|
|
||||||
for (let i = 0; i < grouped[key].length; i++) {
|
|
||||||
const start = i - contextSize > 0 ? i - contextSize : 0
|
|
||||||
grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
|
|
||||||
}
|
|
||||||
data.push(...grouped[key])
|
|
||||||
})
|
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a prompt by aggregating all relevant contexts
|
|
||||||
function createPrompt (query, context) {
|
|
||||||
let prompt =
|
|
||||||
'Answer the question based on the context below.\n\n' +
|
|
||||||
'Context:\n'
|
|
||||||
|
|
||||||
// need to make sure our prompt is not larger than max size
|
|
||||||
prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
|
|
||||||
prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
|
|
||||||
return prompt
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb-example-js-openai",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"dependencies": {
|
|
||||||
"vectordb": "file:../..",
|
|
||||||
"openai": "^3.2.1"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
'use strict'
|
|
||||||
|
|
||||||
async function example () {
|
|
||||||
const lancedb = require('vectordb')
|
|
||||||
const db = await lancedb.connect('data/sample-lancedb')
|
|
||||||
|
|
||||||
const data = [
|
|
||||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
|
||||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
|
||||||
]
|
|
||||||
|
|
||||||
const table = await db.createTable('vectors', data)
|
|
||||||
console.log(await db.tableNames())
|
|
||||||
|
|
||||||
const results = await table
|
|
||||||
.search([0.1, 0.3])
|
|
||||||
.limit(20)
|
|
||||||
.execute()
|
|
||||||
console.log(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
example()
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb-example-js",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"dependencies": {
|
|
||||||
"vectordb": "file:../.."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb-example-ts",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "dist/index.js",
|
|
||||||
"types": "dist/index.d.ts",
|
|
||||||
"scripts": {
|
|
||||||
"tsc": "tsc -b",
|
|
||||||
"build": "tsc"
|
|
||||||
},
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/node": "^18.16.2",
|
|
||||||
"ts-node": "^10.9.1",
|
|
||||||
"ts-node-dev": "^2.0.0",
|
|
||||||
"typescript": "*"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"vectordb": "file:../.."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import * as vectordb from 'vectordb';
|
|
||||||
|
|
||||||
async function example () {
|
|
||||||
const db = await vectordb.connect('data/sample-lancedb')
|
|
||||||
|
|
||||||
const data = [
|
|
||||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
|
||||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
|
||||||
]
|
|
||||||
|
|
||||||
const table = await db.createTable('vectors', data)
|
|
||||||
console.log(await db.tableNames())
|
|
||||||
|
|
||||||
const results = await table
|
|
||||||
.search([0.1, 0.3])
|
|
||||||
.limit(20)
|
|
||||||
.execute()
|
|
||||||
console.log(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
example().then(_ => { console.log ("All done!") })
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"include": ["src/**/*.ts"],
|
|
||||||
"compilerOptions": {
|
|
||||||
"target": "es2016",
|
|
||||||
"module": "commonjs",
|
|
||||||
"declaration": true,
|
|
||||||
"outDir": "./dist",
|
|
||||||
"strict": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
const { currentTarget } = require('@neon-rs/load')
|
|
||||||
|
|
||||||
let nativeLib
|
|
||||||
|
|
||||||
try {
|
|
||||||
// When developing locally, give preference to the local built library
|
|
||||||
nativeLib = require('./index.node')
|
|
||||||
} catch {
|
|
||||||
try {
|
|
||||||
nativeLib = require(`@lancedb/vectordb-${currentTarget()}`)
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error(`vectordb: failed to load native library.
|
|
||||||
You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
|
|
||||||
|
|
||||||
If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
|
|
||||||
|
|
||||||
Source error: ${e}`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dynamic require for runtime.
|
|
||||||
module.exports = nativeLib
|
|
||||||
5239
node/package-lock.json
generated
5239
node/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,98 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "vectordb",
|
|
||||||
"version": "0.21.2",
|
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
|
||||||
"private": false,
|
|
||||||
"main": "dist/index.js",
|
|
||||||
"types": "dist/index.d.ts",
|
|
||||||
"scripts": {
|
|
||||||
"tsc": "tsc -b",
|
|
||||||
"build": "npm run tsc && cargo-cp-artifact --artifact cdylib lancedb_node index.node -- cargo build -p lancedb-node --message-format=json",
|
|
||||||
"build-release": "npm run build -- --release",
|
|
||||||
"test": "npm run tsc && mocha -recursive dist/test",
|
|
||||||
"integration-test": "npm run tsc && mocha -recursive dist/integration_test",
|
|
||||||
"lint": "eslint native.js src --ext .js,.ts",
|
|
||||||
"clean": "rm -rf node_modules *.node dist/",
|
|
||||||
"pack-build": "neon pack-build",
|
|
||||||
"check-npm": "printenv && which node && which npm && npm --version"
|
|
||||||
},
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://github.com/lancedb/lancedb.git"
|
|
||||||
},
|
|
||||||
"homepage": "https://lancedb.github.io/lancedb/",
|
|
||||||
"bugs": {
|
|
||||||
"url": "https://github.com/lancedb/lancedb/issues"
|
|
||||||
},
|
|
||||||
"keywords": [
|
|
||||||
"data-format",
|
|
||||||
"data-science",
|
|
||||||
"machine-learning",
|
|
||||||
"data-analytics"
|
|
||||||
],
|
|
||||||
"author": "Lance Devs",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"devDependencies": {
|
|
||||||
"@neon-rs/cli": "^0.0.160",
|
|
||||||
"@types/chai": "^4.3.4",
|
|
||||||
"@types/chai-as-promised": "^7.1.5",
|
|
||||||
"@types/mocha": "^10.0.1",
|
|
||||||
"@types/node": "^18.16.2",
|
|
||||||
"@types/sinon": "^10.0.15",
|
|
||||||
"@types/temp": "^0.9.1",
|
|
||||||
"@types/uuid": "^9.0.3",
|
|
||||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
|
||||||
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
|
||||||
"cargo-cp-artifact": "^0.1",
|
|
||||||
"chai": "^4.3.7",
|
|
||||||
"chai-as-promised": "^7.1.1",
|
|
||||||
"eslint": "^8.39.0",
|
|
||||||
"eslint-config-standard-with-typescript": "^34.0.1",
|
|
||||||
"eslint-plugin-import": "^2.26.0",
|
|
||||||
"eslint-plugin-n": "^15.7.0",
|
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
|
||||||
"mocha": "^10.2.0",
|
|
||||||
"openai": "^4.24.1",
|
|
||||||
"sinon": "^15.1.0",
|
|
||||||
"temp": "^0.9.4",
|
|
||||||
"ts-node": "^10.9.1",
|
|
||||||
"ts-node-dev": "^2.0.0",
|
|
||||||
"typedoc": "^0.24.7",
|
|
||||||
"typedoc-plugin-markdown": "^3.15.3",
|
|
||||||
"typescript": "^5.1.0",
|
|
||||||
"uuid": "^9.0.0"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@neon-rs/load": "^0.0.74",
|
|
||||||
"axios": "^1.4.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
|
||||||
"apache-arrow": "^14.0.2"
|
|
||||||
},
|
|
||||||
"os": [
|
|
||||||
"darwin",
|
|
||||||
"linux",
|
|
||||||
"win32"
|
|
||||||
],
|
|
||||||
"cpu": [
|
|
||||||
"x64",
|
|
||||||
"arm64"
|
|
||||||
],
|
|
||||||
"neon": {
|
|
||||||
"targets": {
|
|
||||||
"x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
|
|
||||||
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
|
|
||||||
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
|
|
||||||
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
|
|
||||||
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"optionalDependencies": {
|
|
||||||
"@lancedb/vectordb-darwin-x64": "0.21.2",
|
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.21.2",
|
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.2",
|
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2",
|
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,635 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import {
|
|
||||||
Field,
|
|
||||||
makeBuilder,
|
|
||||||
RecordBatchFileWriter,
|
|
||||||
Utf8,
|
|
||||||
type Vector,
|
|
||||||
FixedSizeList,
|
|
||||||
vectorFromArray,
|
|
||||||
Schema,
|
|
||||||
Table as ArrowTable,
|
|
||||||
RecordBatchStreamWriter,
|
|
||||||
List,
|
|
||||||
RecordBatch,
|
|
||||||
makeData,
|
|
||||||
Struct,
|
|
||||||
type Float,
|
|
||||||
DataType,
|
|
||||||
Binary,
|
|
||||||
Float32
|
|
||||||
} from "apache-arrow";
|
|
||||||
import { type EmbeddingFunction } from "./index";
|
|
||||||
import { sanitizeSchema } from "./sanitize";
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Options to control how a column should be converted to a vector array
|
|
||||||
*/
|
|
||||||
export class VectorColumnOptions {
|
|
||||||
/** Vector column type. */
|
|
||||||
type: Float = new Float32();
|
|
||||||
|
|
||||||
constructor(values?: Partial<VectorColumnOptions>) {
|
|
||||||
Object.assign(this, values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Options to control the makeArrowTable call. */
|
|
||||||
export class MakeArrowTableOptions {
|
|
||||||
/*
|
|
||||||
* Schema of the data.
|
|
||||||
*
|
|
||||||
* If this is not provided then the data type will be inferred from the
|
|
||||||
* JS type. Integer numbers will become int64, floating point numbers
|
|
||||||
* will become float64 and arrays will become variable sized lists with
|
|
||||||
* the data type inferred from the first element in the array.
|
|
||||||
*
|
|
||||||
* The schema must be specified if there are no records (e.g. to make
|
|
||||||
* an empty table)
|
|
||||||
*/
|
|
||||||
schema?: Schema;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Mapping from vector column name to expected type
|
|
||||||
*
|
|
||||||
* Lance expects vector columns to be fixed size list arrays (i.e. tensors)
|
|
||||||
* However, `makeArrowTable` will not infer this by default (it creates
|
|
||||||
* variable size list arrays). This field can be used to indicate that a column
|
|
||||||
* should be treated as a vector column and converted to a fixed size list.
|
|
||||||
*
|
|
||||||
* The keys should be the names of the vector columns. The value specifies the
|
|
||||||
* expected data type of the vector columns.
|
|
||||||
*
|
|
||||||
* If `schema` is provided then this field is ignored.
|
|
||||||
*
|
|
||||||
* By default, the column named "vector" will be assumed to be a float32
|
|
||||||
* vector column.
|
|
||||||
*/
|
|
||||||
vectorColumns: Record<string, VectorColumnOptions> = {
|
|
||||||
vector: new VectorColumnOptions()
|
|
||||||
};
|
|
||||||
|
|
||||||
embeddings?: EmbeddingFunction<any>;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If true then string columns will be encoded with dictionary encoding
|
|
||||||
*
|
|
||||||
* Set this to true if your string columns tend to repeat the same values
|
|
||||||
* often. For more precise control use the `schema` property to specify the
|
|
||||||
* data type for individual columns.
|
|
||||||
*
|
|
||||||
* If `schema` is provided then this property is ignored.
|
|
||||||
*/
|
|
||||||
dictionaryEncodeStrings: boolean = false;
|
|
||||||
|
|
||||||
constructor(values?: Partial<MakeArrowTableOptions>) {
|
|
||||||
Object.assign(this, values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An enhanced version of the {@link makeTable} function from Apache Arrow
|
|
||||||
* that supports nested fields and embeddings columns.
|
|
||||||
*
|
|
||||||
* This function converts an array of Record<String, any> (row-major JS objects)
|
|
||||||
* to an Arrow Table (a columnar structure)
|
|
||||||
*
|
|
||||||
* Note that it currently does not support nulls.
|
|
||||||
*
|
|
||||||
* If a schema is provided then it will be used to determine the resulting array
|
|
||||||
* types. Fields will also be reordered to fit the order defined by the schema.
|
|
||||||
*
|
|
||||||
* If a schema is not provided then the types will be inferred and the field order
|
|
||||||
* will be controlled by the order of properties in the first record.
|
|
||||||
*
|
|
||||||
* If the input is empty then a schema must be provided to create an empty table.
|
|
||||||
*
|
|
||||||
* When a schema is not specified then data types will be inferred. The inference
|
|
||||||
* rules are as follows:
|
|
||||||
*
|
|
||||||
* - boolean => Bool
|
|
||||||
* - number => Float64
|
|
||||||
* - String => Utf8
|
|
||||||
* - Buffer => Binary
|
|
||||||
* - Record<String, any> => Struct
|
|
||||||
* - Array<any> => List
|
|
||||||
*
|
|
||||||
* @param data input data
|
|
||||||
* @param options options to control the makeArrowTable call.
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
*
|
|
||||||
* ```ts
|
|
||||||
*
|
|
||||||
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
|
|
||||||
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
|
|
||||||
*
|
|
||||||
* const schema = new Schema([
|
|
||||||
* new Field("a", new Int32()),
|
|
||||||
* new Field("b", new Float32()),
|
|
||||||
* new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
|
|
||||||
* ]);
|
|
||||||
* const table = makeArrowTable([
|
|
||||||
* { a: 1, b: 2, c: [1, 2, 3] },
|
|
||||||
* { a: 4, b: 5, c: [4, 5, 6] },
|
|
||||||
* { a: 7, b: 8, c: [7, 8, 9] },
|
|
||||||
* ], { schema });
|
|
||||||
* ```
|
|
||||||
*
|
|
||||||
* By default it assumes that the column named `vector` is a vector column
|
|
||||||
* and it will be converted into a fixed size list array of type float32.
|
|
||||||
* The `vectorColumns` option can be used to support other vector column
|
|
||||||
* names and data types.
|
|
||||||
*
|
|
||||||
* ```ts
|
|
||||||
*
|
|
||||||
* const schema = new Schema([
|
|
||||||
new Field("a", new Float64()),
|
|
||||||
new Field("b", new Float64()),
|
|
||||||
new Field(
|
|
||||||
"vector",
|
|
||||||
new FixedSizeList(3, new Field("item", new Float32()))
|
|
||||||
),
|
|
||||||
]);
|
|
||||||
const table = makeArrowTable([
|
|
||||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
|
||||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
|
||||||
{ a: 7, b: 8, vector: [7, 8, 9] },
|
|
||||||
]);
|
|
||||||
assert.deepEqual(table.schema, schema);
|
|
||||||
* ```
|
|
||||||
*
|
|
||||||
* You can specify the vector column types and names using the options as well
|
|
||||||
*
|
|
||||||
* ```typescript
|
|
||||||
*
|
|
||||||
* const schema = new Schema([
|
|
||||||
new Field('a', new Float64()),
|
|
||||||
new Field('b', new Float64()),
|
|
||||||
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
|
|
||||||
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
|
|
||||||
]);
|
|
||||||
* const table = makeArrowTable([
|
|
||||||
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
|
||||||
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
|
||||||
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
|
||||||
], {
|
|
||||||
vectorColumns: {
|
|
||||||
vec1: { type: new Float16() },
|
|
||||||
vec2: { type: new Float16() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
* assert.deepEqual(table.schema, schema)
|
|
||||||
* ```
|
|
||||||
*/
|
|
||||||
export function makeArrowTable(
|
|
||||||
data: Array<Record<string, any>>,
|
|
||||||
options?: Partial<MakeArrowTableOptions>
|
|
||||||
): ArrowTable {
|
|
||||||
if (
|
|
||||||
data.length === 0 &&
|
|
||||||
(options?.schema === undefined || options?.schema === null)
|
|
||||||
) {
|
|
||||||
throw new Error("At least one record or a schema needs to be provided");
|
|
||||||
}
|
|
||||||
|
|
||||||
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
|
||||||
if (opt.schema !== undefined && opt.schema !== null) {
|
|
||||||
opt.schema = sanitizeSchema(opt.schema);
|
|
||||||
opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
|
|
||||||
}
|
|
||||||
|
|
||||||
const columns: Record<string, Vector> = {};
|
|
||||||
// TODO: sample dataset to find missing columns
|
|
||||||
// Prefer the field ordering of the schema, if present
|
|
||||||
const columnNames =
|
|
||||||
opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
|
|
||||||
for (const colName of columnNames) {
|
|
||||||
if (
|
|
||||||
data.length !== 0 &&
|
|
||||||
!Object.prototype.hasOwnProperty.call(data[0], colName)
|
|
||||||
) {
|
|
||||||
// The field is present in the schema, but not in the data, skip it
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Extract a single column from the records (transpose from row-major to col-major)
|
|
||||||
let values = data.map((datum) => datum[colName]);
|
|
||||||
|
|
||||||
// By default (type === undefined) arrow will infer the type from the JS type
|
|
||||||
let type;
|
|
||||||
if (opt.schema !== undefined) {
|
|
||||||
// If there is a schema provided, then use that for the type instead
|
|
||||||
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
|
|
||||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
|
||||||
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
|
|
||||||
values = values.map((v) => {
|
|
||||||
if (v === null) {
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
return BigInt(v);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Otherwise, check to see if this column is one of the vector columns
|
|
||||||
// defined by opt.vectorColumns and, if so, use the fixed size list type
|
|
||||||
const vectorColumnOptions = opt.vectorColumns[colName];
|
|
||||||
if (vectorColumnOptions !== undefined) {
|
|
||||||
type = newVectorType(values[0].length, vectorColumnOptions.type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Convert an Array of JS values to an arrow vector
|
|
||||||
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
|
|
||||||
} catch (error: unknown) {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opt.schema != null) {
|
|
||||||
// `new ArrowTable(columns)` infers a schema which may sometimes have
|
|
||||||
// incorrect nullability (it assumes nullable=true if there are 0 rows)
|
|
||||||
//
|
|
||||||
// `new ArrowTable(schema, columns)` will also fail because it will create a
|
|
||||||
// batch with an inferred schema and then complain that the batch schema
|
|
||||||
// does not match the provided schema.
|
|
||||||
//
|
|
||||||
// To work around this we first create a table with the wrong schema and
|
|
||||||
// then patch the schema of the batches so we can use
|
|
||||||
// `new ArrowTable(schema, batches)` which does not do any schema inference
|
|
||||||
const firstTable = new ArrowTable(columns);
|
|
||||||
const batchesFixed = firstTable.batches.map(
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
||||||
(batch) => new RecordBatch(opt.schema!, batch.data)
|
|
||||||
);
|
|
||||||
return new ArrowTable(opt.schema, batchesFixed);
|
|
||||||
} else {
|
|
||||||
return new ArrowTable(columns);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an empty Arrow table with the provided schema
|
|
||||||
*/
|
|
||||||
export function makeEmptyTable(schema: Schema): ArrowTable {
|
|
||||||
return makeArrowTable([], { schema });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to convert Array<Array<any>> to a variable sized list array
|
|
||||||
function makeListVector(lists: any[][]): Vector<any> {
|
|
||||||
if (lists.length === 0 || lists[0].length === 0) {
|
|
||||||
throw Error("Cannot infer list vector from empty array or empty list");
|
|
||||||
}
|
|
||||||
const sampleList = lists[0];
|
|
||||||
let inferredType;
|
|
||||||
try {
|
|
||||||
const sampleVector = makeVector(sampleList);
|
|
||||||
inferredType = sampleVector.type;
|
|
||||||
} catch (error: unknown) {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
throw Error(`Cannot infer list vector. Cannot infer inner type: ${error}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const listBuilder = makeBuilder({
|
|
||||||
type: new List(new Field("item", inferredType, true))
|
|
||||||
});
|
|
||||||
for (const list of lists) {
|
|
||||||
listBuilder.append(list);
|
|
||||||
}
|
|
||||||
return listBuilder.finish().toVector();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to convert an Array of JS values to an Arrow Vector
|
|
||||||
function makeVector(
|
|
||||||
values: any[],
|
|
||||||
type?: DataType,
|
|
||||||
stringAsDictionary?: boolean
|
|
||||||
): Vector<any> {
|
|
||||||
if (type !== undefined) {
|
|
||||||
// No need for inference, let Arrow create it
|
|
||||||
return vectorFromArray(values, type);
|
|
||||||
}
|
|
||||||
if (values.length === 0) {
|
|
||||||
throw Error(
|
|
||||||
"makeVector requires at least one value or the type must be specfied"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const sampleValue = values.find((val) => val !== null && val !== undefined);
|
|
||||||
if (sampleValue === undefined) {
|
|
||||||
throw Error(
|
|
||||||
"makeVector cannot infer the type if all values are null or undefined"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (Array.isArray(sampleValue)) {
|
|
||||||
// Default Arrow inference doesn't handle list types
|
|
||||||
return makeListVector(values);
|
|
||||||
} else if (Buffer.isBuffer(sampleValue)) {
|
|
||||||
// Default Arrow inference doesn't handle Buffer
|
|
||||||
return vectorFromArray(values, new Binary());
|
|
||||||
} else if (
|
|
||||||
!(stringAsDictionary ?? false) &&
|
|
||||||
(typeof sampleValue === "string" || sampleValue instanceof String)
|
|
||||||
) {
|
|
||||||
// If the type is string then don't use Arrow's default inference unless dictionaries are requested
|
|
||||||
// because it will always use dictionary encoding for strings
|
|
||||||
return vectorFromArray(values, new Utf8());
|
|
||||||
} else {
|
|
||||||
// Convert a JS array of values to an arrow vector
|
|
||||||
return vectorFromArray(values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function applyEmbeddings<T>(
|
|
||||||
table: ArrowTable,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
schema?: Schema
|
|
||||||
): Promise<ArrowTable> {
|
|
||||||
if (embeddings == null) {
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
if (schema !== undefined && schema !== null) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert from ArrowTable to Record<String, Vector>
|
|
||||||
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
|
||||||
const name = table.schema.fields[idx].name;
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
||||||
const vec = table.getChildAt(idx)!;
|
|
||||||
return [name, vec];
|
|
||||||
});
|
|
||||||
const newColumns = Object.fromEntries(colEntries);
|
|
||||||
|
|
||||||
const sourceColumn = newColumns[embeddings.sourceColumn];
|
|
||||||
const destColumn = embeddings.destColumn ?? "vector";
|
|
||||||
const innerDestType = embeddings.embeddingDataType ?? new Float32();
|
|
||||||
if (sourceColumn === undefined) {
|
|
||||||
throw new Error(
|
|
||||||
`Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (table.numRows === 0) {
|
|
||||||
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
|
|
||||||
// We have an empty table and it already has the embedding column so no work needs to be done
|
|
||||||
// Note: we don't return an error like we did below because this is a common occurrence. For example,
|
|
||||||
// if we call convertToTable with 0 records and a schema that includes the embedding
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
if (embeddings.embeddingDimension !== undefined) {
|
|
||||||
const destType = newVectorType(
|
|
||||||
embeddings.embeddingDimension,
|
|
||||||
innerDestType
|
|
||||||
);
|
|
||||||
newColumns[destColumn] = makeVector([], destType);
|
|
||||||
} else if (schema != null) {
|
|
||||||
const destField = schema.fields.find((f) => f.name === destColumn);
|
|
||||||
if (destField != null) {
|
|
||||||
newColumns[destColumn] = makeVector([], destField.type);
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
"Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
|
|
||||||
throw new Error(
|
|
||||||
`Attempt to apply embeddings to table failed because column ${destColumn} already existed`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (table.batches.length > 1) {
|
|
||||||
throw new Error(
|
|
||||||
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const values = sourceColumn.toArray();
|
|
||||||
const vectors = await embeddings.embed(values as T[]);
|
|
||||||
if (vectors.length !== values.length) {
|
|
||||||
throw new Error(
|
|
||||||
"Embedding function did not return an embedding for each input element"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const destType = newVectorType(vectors[0].length, innerDestType);
|
|
||||||
newColumns[destColumn] = makeVector(vectors, destType);
|
|
||||||
}
|
|
||||||
|
|
||||||
const newTable = new ArrowTable(newColumns);
|
|
||||||
if (schema != null) {
|
|
||||||
if (schema.fields.find((f) => f.name === destColumn) === undefined) {
|
|
||||||
throw new Error(
|
|
||||||
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return alignTable(newTable, schema);
|
|
||||||
}
|
|
||||||
return newTable;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert an Array of records into an Arrow Table, optionally applying an
|
|
||||||
* embeddings function to it.
|
|
||||||
*
|
|
||||||
* This function calls `makeArrowTable` first to create the Arrow Table.
|
|
||||||
* Any provided `makeTableOptions` (e.g. a schema) will be passed on to
|
|
||||||
* that call.
|
|
||||||
*
|
|
||||||
* The embedding function will be passed a column of values (based on the
|
|
||||||
* `sourceColumn` of the embedding function) and expects to receive back
|
|
||||||
* number[][] which will be converted into a fixed size list column. By
|
|
||||||
* default this will be a fixed size list of Float32 but that can be
|
|
||||||
* customized by the `embeddingDataType` property of the embedding function.
|
|
||||||
*
|
|
||||||
* If a schema is provided in `makeTableOptions` then it should include the
|
|
||||||
* embedding columns. If no schema is provded then embedding columns will
|
|
||||||
* be placed at the end of the table, after all of the input columns.
|
|
||||||
*/
|
|
||||||
export async function convertToTable<T>(
|
|
||||||
data: Array<Record<string, unknown>>,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
makeTableOptions?: Partial<MakeArrowTableOptions>
|
|
||||||
): Promise<ArrowTable> {
|
|
||||||
const table = makeArrowTable(data, makeTableOptions);
|
|
||||||
return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates the Arrow Type for a Vector column with dimension `dim`
|
|
||||||
function newVectorType<T extends Float>(
|
|
||||||
dim: number,
|
|
||||||
innerType: T
|
|
||||||
): FixedSizeList<T> {
|
|
||||||
// Somewhere we always default to have the elements nullable, so we need to set it to true
|
|
||||||
// otherwise we often get schema mismatches because the stored data always has schema with nullable elements
|
|
||||||
const children = new Field<T>("item", innerType, true);
|
|
||||||
return new FixedSizeList(dim, children);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serialize an Array of records into a buffer using the Arrow IPC File serialization
|
|
||||||
*
|
|
||||||
* This function will call `convertToTable` and pass on `embeddings` and `schema`
|
|
||||||
*
|
|
||||||
* `schema` is required if data is empty
|
|
||||||
*/
|
|
||||||
export async function fromRecordsToBuffer<T>(
|
|
||||||
data: Array<Record<string, unknown>>,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
schema?: Schema
|
|
||||||
): Promise<Buffer> {
|
|
||||||
if (schema !== undefined && schema !== null) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
const table = await convertToTable(data, embeddings, { schema, embeddings });
|
|
||||||
const writer = RecordBatchFileWriter.writeAll(table);
|
|
||||||
return Buffer.from(await writer.toUint8Array());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
|
|
||||||
*
|
|
||||||
* This function will call `convertToTable` and pass on `embeddings` and `schema`
|
|
||||||
*
|
|
||||||
* `schema` is required if data is empty
|
|
||||||
*/
|
|
||||||
export async function fromRecordsToStreamBuffer<T>(
|
|
||||||
data: Array<Record<string, unknown>>,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
schema?: Schema
|
|
||||||
): Promise<Buffer> {
|
|
||||||
if (schema !== null && schema !== undefined) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
const table = await convertToTable(data, embeddings, { schema });
|
|
||||||
const writer = RecordBatchStreamWriter.writeAll(table);
|
|
||||||
return Buffer.from(await writer.toUint8Array());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
|
|
||||||
*
|
|
||||||
* This function will apply `embeddings` to the table in a manner similar to
|
|
||||||
* `convertToTable`.
|
|
||||||
*
|
|
||||||
* `schema` is required if the table is empty
|
|
||||||
*/
|
|
||||||
export async function fromTableToBuffer<T>(
|
|
||||||
table: ArrowTable,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
schema?: Schema
|
|
||||||
): Promise<Buffer> {
|
|
||||||
if (schema !== null && schema !== undefined) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
|
|
||||||
const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
|
|
||||||
return Buffer.from(await writer.toUint8Array());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
|
|
||||||
*
|
|
||||||
* This function will apply `embeddings` to the table in a manner similar to
|
|
||||||
* `convertToTable`.
|
|
||||||
*
|
|
||||||
* `schema` is required if the table is empty
|
|
||||||
*/
|
|
||||||
export async function fromTableToStreamBuffer<T>(
|
|
||||||
table: ArrowTable,
|
|
||||||
embeddings?: EmbeddingFunction<T>,
|
|
||||||
schema?: Schema
|
|
||||||
): Promise<Buffer> {
|
|
||||||
if (schema !== null && schema !== undefined) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
|
|
||||||
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
|
|
||||||
return Buffer.from(await writer.toUint8Array());
|
|
||||||
}
|
|
||||||
|
|
||||||
function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
|
|
||||||
const alignedChildren = [];
|
|
||||||
for (const field of schema.fields) {
|
|
||||||
const indexInBatch = batch.schema.fields?.findIndex(
|
|
||||||
(f) => f.name === field.name
|
|
||||||
);
|
|
||||||
if (indexInBatch < 0) {
|
|
||||||
throw new Error(
|
|
||||||
`The column ${field.name} was not found in the Arrow Table`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
alignedChildren.push(batch.data.children[indexInBatch]);
|
|
||||||
}
|
|
||||||
const newData = makeData({
|
|
||||||
type: new Struct(schema.fields),
|
|
||||||
length: batch.numRows,
|
|
||||||
nullCount: batch.nullCount,
|
|
||||||
children: alignedChildren
|
|
||||||
});
|
|
||||||
return new RecordBatch(schema, newData);
|
|
||||||
}
|
|
||||||
|
|
||||||
function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
|
|
||||||
const alignedBatches = table.batches.map((batch) =>
|
|
||||||
alignBatch(batch, schema)
|
|
||||||
);
|
|
||||||
return new ArrowTable(schema, alignedBatches);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates an empty Arrow Table
|
|
||||||
export function createEmptyTable(schema: Schema): ArrowTable {
|
|
||||||
return new ArrowTable(sanitizeSchema(schema));
|
|
||||||
}
|
|
||||||
|
|
||||||
function validateSchemaEmbeddings(
|
|
||||||
schema: Schema<any>,
|
|
||||||
data: Array<Record<string, unknown>>,
|
|
||||||
embeddings: EmbeddingFunction<any> | undefined
|
|
||||||
) {
|
|
||||||
const fields = [];
|
|
||||||
const missingEmbeddingFields = [];
|
|
||||||
|
|
||||||
// First we check if the field is a `FixedSizeList`
|
|
||||||
// Then we check if the data contains the field
|
|
||||||
// if it does not, we add it to the list of missing embedding fields
|
|
||||||
// Finally, we check if those missing embedding fields are `this._embeddings`
|
|
||||||
// if they are not, we throw an error
|
|
||||||
for (const field of schema.fields) {
|
|
||||||
if (field.type instanceof FixedSizeList) {
|
|
||||||
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
|
||||||
missingEmbeddingFields.push(field);
|
|
||||||
} else {
|
|
||||||
fields.push(field);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fields.push(field);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
|
|
||||||
throw new Error(
|
|
||||||
`Table has embeddings: "${missingEmbeddingFields
|
|
||||||
.map((f) => f.name)
|
|
||||||
.join(",")}", but no embedding function was provided`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Schema(fields, schema.metadata);
|
|
||||||
}
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { type Float } from 'apache-arrow'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An embedding function that automatically creates vector representation for a given column.
|
|
||||||
*/
|
|
||||||
export interface EmbeddingFunction<T> {
|
|
||||||
/**
|
|
||||||
* The name of the column that will be used as input for the Embedding Function.
|
|
||||||
*/
|
|
||||||
sourceColumn: string
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The data type of the embedding
|
|
||||||
*
|
|
||||||
* The embedding function should return `number`. This will be converted into
|
|
||||||
* an Arrow float array. By default this will be Float32 but this property can
|
|
||||||
* be used to control the conversion.
|
|
||||||
*/
|
|
||||||
embeddingDataType?: Float
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The dimension of the embedding
|
|
||||||
*
|
|
||||||
* This is optional, normally this can be determined by looking at the results of
|
|
||||||
* `embed`. If this is not specified, and there is an attempt to apply the embedding
|
|
||||||
* to an empty table, then that process will fail.
|
|
||||||
*/
|
|
||||||
embeddingDimension?: number
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The name of the column that will contain the embedding
|
|
||||||
*
|
|
||||||
* By default this is "vector"
|
|
||||||
*/
|
|
||||||
destColumn?: string
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Should the source column be excluded from the resulting table
|
|
||||||
*
|
|
||||||
* By default the source column is included. Set this to true and
|
|
||||||
* only the embedding will be stored.
|
|
||||||
*/
|
|
||||||
excludeSource?: boolean
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a vector representation for the given values.
|
|
||||||
*/
|
|
||||||
embed: (data: T[]) => Promise<number[][]>
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
|
|
||||||
return typeof value.sourceColumn === 'string' &&
|
|
||||||
typeof value.embed === 'function'
|
|
||||||
}
|
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { type EmbeddingFunction } from '../index'
|
|
||||||
import type OpenAI from 'openai'
|
|
||||||
|
|
||||||
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
|
||||||
private readonly _openai: OpenAI
|
|
||||||
private readonly _modelName: string
|
|
||||||
|
|
||||||
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
|
|
||||||
/**
|
|
||||||
* @type {import("openai").default}
|
|
||||||
*/
|
|
||||||
let Openai
|
|
||||||
try {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
||||||
Openai = require('openai')
|
|
||||||
} catch {
|
|
||||||
throw new Error('please install openai@^4.24.1 using npm install openai')
|
|
||||||
}
|
|
||||||
|
|
||||||
this.sourceColumn = sourceColumn
|
|
||||||
const configuration = {
|
|
||||||
apiKey: openAIKey
|
|
||||||
}
|
|
||||||
|
|
||||||
this._openai = new Openai(configuration)
|
|
||||||
this._modelName = modelName
|
|
||||||
}
|
|
||||||
|
|
||||||
async embed (data: string[]): Promise<number[][]> {
|
|
||||||
const response = await this._openai.embeddings.create({
|
|
||||||
model: this._modelName,
|
|
||||||
input: data
|
|
||||||
})
|
|
||||||
|
|
||||||
const embeddings: number[][] = []
|
|
||||||
for (let i = 0; i < response.data.length; i++) {
|
|
||||||
embeddings.push(response.data[i].embedding)
|
|
||||||
}
|
|
||||||
return embeddings
|
|
||||||
}
|
|
||||||
|
|
||||||
sourceColumn: string
|
|
||||||
}
|
|
||||||
1399
node/src/index.ts
1399
node/src/index.ts
File diff suppressed because it is too large
Load Diff
@@ -1,155 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { describe } from 'mocha'
|
|
||||||
import * as chai from 'chai'
|
|
||||||
import { assert } from 'chai'
|
|
||||||
import * as chaiAsPromised from 'chai-as-promised'
|
|
||||||
import { v4 as uuidv4 } from 'uuid'
|
|
||||||
|
|
||||||
import * as lancedb from '../index'
|
|
||||||
import { tmpdir } from 'os'
|
|
||||||
import * as fs from 'fs'
|
|
||||||
import * as path from 'path'
|
|
||||||
|
|
||||||
chai.use(chaiAsPromised)
|
|
||||||
|
|
||||||
describe('LanceDB AWS Integration test', function () {
|
|
||||||
it('s3+ddb schema is processed correctly', async function () {
|
|
||||||
this.timeout(15000)
|
|
||||||
|
|
||||||
// WARNING: specifying engine is NOT a publicly supported feature in lancedb yet
|
|
||||||
// THE API WILL CHANGE
|
|
||||||
const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest')
|
|
||||||
const data = [{ vector: Array(128).fill(1.0) }]
|
|
||||||
|
|
||||||
const tableName = uuidv4()
|
|
||||||
let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
|
|
||||||
|
|
||||||
const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)]
|
|
||||||
await Promise.allSettled(futs)
|
|
||||||
|
|
||||||
table = await conn.openTable(tableName)
|
|
||||||
assert.equal(await table.countRows(), 6)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('LanceDB Mirrored Store Integration test', function () {
|
|
||||||
it('s3://...?mirroredStore=... param is processed correctly', async function () {
|
|
||||||
this.timeout(600000)
|
|
||||||
|
|
||||||
const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
|
|
||||||
console.log(dir)
|
|
||||||
const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
|
|
||||||
const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
|
|
||||||
data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 }))
|
|
||||||
data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 }))
|
|
||||||
data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 }))
|
|
||||||
|
|
||||||
const tableName = uuidv4()
|
|
||||||
|
|
||||||
// try create table and check if it's mirrored
|
|
||||||
const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
|
|
||||||
|
|
||||||
const mirroredPath = path.join(dir, `${tableName}.lance`)
|
|
||||||
|
|
||||||
const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
|
||||||
// there should be three dirs
|
|
||||||
assert.equal(files.length, 3, 'files after table creation')
|
|
||||||
assert.isTrue(files[0].isDirectory())
|
|
||||||
assert.isTrue(files[1].isDirectory())
|
|
||||||
|
|
||||||
const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
|
||||||
assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
|
|
||||||
assert.isTrue(transactionFiles[0].name.endsWith('.txn'))
|
|
||||||
|
|
||||||
const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
|
|
||||||
assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
|
|
||||||
assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
|
|
||||||
|
|
||||||
const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
|
||||||
assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
|
|
||||||
assert.isTrue(dataFiles[0].name.endsWith('.lance'))
|
|
||||||
|
|
||||||
// try create index and check if it's mirrored
|
|
||||||
await t.createIndex({ column: 'vector', type: 'ivf_pq' })
|
|
||||||
|
|
||||||
const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
|
||||||
// there should be four dirs
|
|
||||||
assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
|
|
||||||
assert.isTrue(filesAfterIndex[0].isDirectory())
|
|
||||||
assert.isTrue(filesAfterIndex[1].isDirectory())
|
|
||||||
assert.isTrue(filesAfterIndex[2].isDirectory())
|
|
||||||
|
|
||||||
// Two TXs now
|
|
||||||
const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
|
||||||
assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
|
|
||||||
assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
|
|
||||||
assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))
|
|
||||||
|
|
||||||
const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
|
||||||
assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
|
|
||||||
assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))
|
|
||||||
|
|
||||||
const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
|
|
||||||
assert.equal(indicesFiles.length, 1, 'indicesFiles')
|
|
||||||
assert.isTrue(indicesFiles[0].isDirectory())
|
|
||||||
|
|
||||||
const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
|
|
||||||
console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
|
|
||||||
assert.equal(indexFiles.length, 2, 'indexFiles')
|
|
||||||
const fileNames = indexFiles.map(f => f.name).sort()
|
|
||||||
assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
|
|
||||||
assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
|
|
||||||
assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')
|
|
||||||
|
|
||||||
// try delete and check if it's mirrored
|
|
||||||
await t.delete('id = 0')
|
|
||||||
|
|
||||||
const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
|
|
||||||
// there should be five dirs
|
|
||||||
assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
|
|
||||||
assert.isTrue(filesAfterDelete[0].isDirectory())
|
|
||||||
assert.isTrue(filesAfterDelete[1].isDirectory())
|
|
||||||
assert.isTrue(filesAfterDelete[2].isDirectory())
|
|
||||||
assert.isTrue(filesAfterDelete[3].isDirectory())
|
|
||||||
assert.isTrue(filesAfterDelete[4].isDirectory())
|
|
||||||
|
|
||||||
// Three TXs now
|
|
||||||
const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
|
|
||||||
assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
|
|
||||||
assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
|
|
||||||
assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))
|
|
||||||
|
|
||||||
const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
|
|
||||||
assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
|
|
||||||
assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))
|
|
||||||
|
|
||||||
const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
|
|
||||||
assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
|
|
||||||
assert.isTrue(indicesFilesAfterDelete[0].isDirectory())
|
|
||||||
|
|
||||||
const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
|
|
||||||
console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
|
|
||||||
assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
|
|
||||||
const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
|
|
||||||
assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
|
|
||||||
assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
|
|
||||||
assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
|
|
||||||
|
|
||||||
const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
|
|
||||||
assert.equal(deletionFiles.length, 1, 'deletionFiles')
|
|
||||||
assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
// Copyright 2024 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Middleware for Remote LanceDB Connection or Table
|
|
||||||
*/
|
|
||||||
export interface HttpMiddleware {
|
|
||||||
/**
|
|
||||||
* A callback that can be used to instrument the behavior of http requests to remote
|
|
||||||
* tables. It can be used to add headers, modify the request, or even short-circuit
|
|
||||||
* the request and return a response without making the request to the remote endpoint.
|
|
||||||
* It can also be used to modify the response from the remote endpoint.
|
|
||||||
*
|
|
||||||
* @param {RemoteResponse} res - Request to the remote endpoint
|
|
||||||
* @param {onRemoteRequestNext} next - Callback to advance the middleware chain
|
|
||||||
*/
|
|
||||||
onRemoteRequest(
|
|
||||||
req: RemoteRequest,
|
|
||||||
next: (req: RemoteRequest) => Promise<RemoteResponse>,
|
|
||||||
): Promise<RemoteResponse>
|
|
||||||
};
|
|
||||||
|
|
||||||
export enum Method {
|
|
||||||
GET,
|
|
||||||
POST
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A LanceDB Remote HTTP Request
|
|
||||||
*/
|
|
||||||
export interface RemoteRequest {
|
|
||||||
uri: string
|
|
||||||
method: Method
|
|
||||||
headers: Map<string, string>
|
|
||||||
params?: Map<string, string>
|
|
||||||
body?: any
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A LanceDB Remote HTTP Response
|
|
||||||
*/
|
|
||||||
export interface RemoteResponse {
|
|
||||||
status: number
|
|
||||||
statusText: string
|
|
||||||
headers: Map<string, string>
|
|
||||||
body: () => Promise<any>
|
|
||||||
}
|
|
||||||
@@ -1,163 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { Vector, tableFromIPC } from 'apache-arrow'
|
|
||||||
import { type EmbeddingFunction } from './embedding/embedding_function'
|
|
||||||
import { type MetricType } from '.'
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
||||||
const { tableSearch } = require('../native.js')
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A builder for nearest neighbor queries for LanceDB.
|
|
||||||
*/
|
|
||||||
export class Query<T = number[]> {
|
|
||||||
private readonly _query?: T
|
|
||||||
private readonly _tbl?: any
|
|
||||||
private _queryVector?: number[]
|
|
||||||
private _limit?: number
|
|
||||||
private _refineFactor?: number
|
|
||||||
private _nprobes: number
|
|
||||||
private _select?: string[]
|
|
||||||
private _filter?: string
|
|
||||||
private _metricType?: MetricType
|
|
||||||
private _prefilter: boolean
|
|
||||||
private _fastSearch: boolean
|
|
||||||
protected readonly _embeddings?: EmbeddingFunction<T>
|
|
||||||
|
|
||||||
constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
|
|
||||||
this._tbl = tbl
|
|
||||||
this._query = query
|
|
||||||
this._limit = 10
|
|
||||||
this._nprobes = 20
|
|
||||||
this._refineFactor = undefined
|
|
||||||
this._select = undefined
|
|
||||||
this._filter = undefined
|
|
||||||
this._metricType = undefined
|
|
||||||
this._embeddings = embeddings
|
|
||||||
this._prefilter = false
|
|
||||||
this._fastSearch = false
|
|
||||||
}
|
|
||||||
|
|
||||||
/***
|
|
||||||
* Sets the number of results that will be returned
|
|
||||||
* default value is 10
|
|
||||||
* @param value number of results
|
|
||||||
*/
|
|
||||||
limit (value: number): Query<T> {
|
|
||||||
this._limit = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Refine the results by reading extra elements and re-ranking them in memory.
|
|
||||||
* @param value refine factor to use in this query.
|
|
||||||
*/
|
|
||||||
refineFactor (value: number): Query<T> {
|
|
||||||
this._refineFactor = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The number of probes used. A higher number makes search more accurate but also slower.
|
|
||||||
* @param value The number of probes used.
|
|
||||||
*/
|
|
||||||
nprobes (value: number): Query<T> {
|
|
||||||
this._nprobes = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter statement to be applied to this query.
|
|
||||||
* @param value A filter in the same format used by a sql WHERE clause.
|
|
||||||
*/
|
|
||||||
filter (value: string): Query<T> {
|
|
||||||
this._filter = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
where = this.filter
|
|
||||||
|
|
||||||
/** Return only the specified columns.
|
|
||||||
*
|
|
||||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
|
||||||
*/
|
|
||||||
select (value: string[]): Query<T> {
|
|
||||||
this._select = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The MetricType used for this Query.
|
|
||||||
* @param value The metric to the. @see MetricType for the different options
|
|
||||||
*/
|
|
||||||
metricType (value: MetricType): Query<T> {
|
|
||||||
this._metricType = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
prefilter (value: boolean): Query<T> {
|
|
||||||
this._prefilter = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Skip searching un-indexed data. This can make search faster, but will miss
|
|
||||||
* any data that is not yet indexed.
|
|
||||||
*/
|
|
||||||
fastSearch (value: boolean): Query<T> {
|
|
||||||
this._fastSearch = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Execute the query and return the results as an Array of Objects
|
|
||||||
*/
|
|
||||||
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
|
||||||
if (this._query !== undefined) {
|
|
||||||
if (this._embeddings !== undefined) {
|
|
||||||
this._queryVector = (await this._embeddings.embed([this._query]))[0]
|
|
||||||
} else {
|
|
||||||
this._queryVector = this._query as number[]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const isElectron = this.isElectron()
|
|
||||||
const buffer = await tableSearch.call(this._tbl, this, isElectron)
|
|
||||||
const data = tableFromIPC(buffer)
|
|
||||||
|
|
||||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
|
||||||
const newObject: Record<string, unknown> = {}
|
|
||||||
Object.keys(entry).forEach((key: string) => {
|
|
||||||
if (entry[key] instanceof Vector) {
|
|
||||||
// toJSON() returns f16 array correctly
|
|
||||||
newObject[key] = (entry[key] as any).toJSON()
|
|
||||||
} else {
|
|
||||||
newObject[key] = entry[key] as any
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return newObject as unknown as T
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// See https://github.com/electron/electron/issues/2288
|
|
||||||
private isElectron (): boolean {
|
|
||||||
try {
|
|
||||||
// eslint-disable-next-line no-prototype-builtins
|
|
||||||
return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron'))
|
|
||||||
} catch (e) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,302 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
|
|
||||||
|
|
||||||
import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
|
|
||||||
|
|
||||||
import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
|
|
||||||
import type { MetricType } from '..'
|
|
||||||
|
|
||||||
interface HttpLancedbClientMiddleware {
|
|
||||||
onRemoteRequest(
|
|
||||||
req: RemoteRequest,
|
|
||||||
next: (req: RemoteRequest) => Promise<RemoteResponse>,
|
|
||||||
): Promise<RemoteResponse>
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Invoke the middleware chain and at the end call the remote endpoint
|
|
||||||
*/
|
|
||||||
async function callWithMiddlewares (
|
|
||||||
req: RemoteRequest,
|
|
||||||
middlewares: HttpLancedbClientMiddleware[],
|
|
||||||
opts?: MiddlewareInvocationOptions
|
|
||||||
): Promise<RemoteResponse> {
|
|
||||||
async function call (
|
|
||||||
i: number,
|
|
||||||
req: RemoteRequest
|
|
||||||
): Promise<RemoteResponse> {
|
|
||||||
// if we have reached the end of the middleware chain, make the request
|
|
||||||
if (i > middlewares.length) {
|
|
||||||
const headers = Object.fromEntries(req.headers.entries())
|
|
||||||
const params = Object.fromEntries(req.params?.entries() ?? [])
|
|
||||||
const timeout = opts?.timeout
|
|
||||||
let res
|
|
||||||
if (req.method === Method.POST) {
|
|
||||||
res = await axios.post(
|
|
||||||
req.uri,
|
|
||||||
req.body,
|
|
||||||
{
|
|
||||||
headers,
|
|
||||||
params,
|
|
||||||
timeout,
|
|
||||||
responseType: opts?.responseType
|
|
||||||
}
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
res = await axios.get(
|
|
||||||
req.uri,
|
|
||||||
{
|
|
||||||
headers,
|
|
||||||
params,
|
|
||||||
timeout
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return toLanceRes(res)
|
|
||||||
}
|
|
||||||
|
|
||||||
// call next middleware in chain
|
|
||||||
return await middlewares[i - 1].onRemoteRequest(
|
|
||||||
req,
|
|
||||||
async (req) => {
|
|
||||||
return await call(i + 1, req)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return await call(1, req)
|
|
||||||
}
|
|
||||||
|
|
||||||
interface MiddlewareInvocationOptions {
|
|
||||||
responseType?: ResponseType
|
|
||||||
timeout?: number
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Marshall the library response into a LanceDB response
|
|
||||||
*/
|
|
||||||
function toLanceRes (res: AxiosResponse): RemoteResponse {
|
|
||||||
const headers = new Map()
|
|
||||||
for (const h in res.headers) {
|
|
||||||
headers.set(h, res.headers[h])
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
status: res.status,
|
|
||||||
statusText: res.statusText,
|
|
||||||
headers,
|
|
||||||
body: async () => {
|
|
||||||
return res.data
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function decodeErrorData(
|
|
||||||
res: RemoteResponse,
|
|
||||||
responseType?: ResponseType
|
|
||||||
): Promise<string> {
|
|
||||||
const errorData = await res.body()
|
|
||||||
if (responseType === 'arraybuffer') {
|
|
||||||
return new TextDecoder().decode(errorData)
|
|
||||||
} else {
|
|
||||||
if (typeof errorData === 'object') {
|
|
||||||
return JSON.stringify(errorData)
|
|
||||||
}
|
|
||||||
|
|
||||||
return errorData
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class HttpLancedbClient {
|
|
||||||
private readonly _url: string
|
|
||||||
private readonly _apiKey: () => string
|
|
||||||
private readonly _middlewares: HttpLancedbClientMiddleware[]
|
|
||||||
private readonly _timeout: number | undefined
|
|
||||||
|
|
||||||
public constructor (
|
|
||||||
url: string,
|
|
||||||
apiKey: string,
|
|
||||||
timeout?: number,
|
|
||||||
private readonly _dbName?: string
|
|
||||||
|
|
||||||
) {
|
|
||||||
this._url = url
|
|
||||||
this._apiKey = () => apiKey
|
|
||||||
this._middlewares = []
|
|
||||||
this._timeout = timeout
|
|
||||||
}
|
|
||||||
|
|
||||||
get uri (): string {
|
|
||||||
return this._url
|
|
||||||
}
|
|
||||||
|
|
||||||
public async search (
|
|
||||||
tableName: string,
|
|
||||||
vector: number[],
|
|
||||||
k: number,
|
|
||||||
nprobes: number,
|
|
||||||
prefilter: boolean,
|
|
||||||
refineFactor?: number,
|
|
||||||
columns?: string[],
|
|
||||||
filter?: string,
|
|
||||||
metricType?: MetricType,
|
|
||||||
fastSearch?: boolean
|
|
||||||
): Promise<ArrowTable<any>> {
|
|
||||||
const result = await this.post(
|
|
||||||
`/v1/table/${tableName}/query/`,
|
|
||||||
{
|
|
||||||
vector,
|
|
||||||
k,
|
|
||||||
nprobes,
|
|
||||||
refine_factor: refineFactor,
|
|
||||||
columns,
|
|
||||||
filter,
|
|
||||||
prefilter,
|
|
||||||
metric: metricType,
|
|
||||||
fast_search: fastSearch
|
|
||||||
},
|
|
||||||
undefined,
|
|
||||||
undefined,
|
|
||||||
'arraybuffer'
|
|
||||||
)
|
|
||||||
const table = tableFromIPC(await result.body())
|
|
||||||
return table
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sent GET request.
|
|
||||||
*/
|
|
||||||
public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
|
|
||||||
const req = {
|
|
||||||
uri: `${this._url}${path}`,
|
|
||||||
method: Method.GET,
|
|
||||||
headers: new Map(Object.entries({
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'x-api-key': this._apiKey(),
|
|
||||||
...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
|
|
||||||
})),
|
|
||||||
params: new Map(Object.entries(params ?? {}))
|
|
||||||
}
|
|
||||||
|
|
||||||
let response
|
|
||||||
try {
|
|
||||||
response = await callWithMiddlewares(req, this._middlewares)
|
|
||||||
return response
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error(serializeErrorAsJson(err))
|
|
||||||
if (err.response === undefined) {
|
|
||||||
throw new Error(`Network Error: ${err.message as string}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
response = toLanceRes(err.response)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.status !== 200) {
|
|
||||||
const errorData = await decodeErrorData(response)
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${response.status}, ` +
|
|
||||||
`message: ${response.statusText}: ${errorData}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return response
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sent POST request.
|
|
||||||
*/
|
|
||||||
public async post (
|
|
||||||
path: string,
|
|
||||||
data?: any,
|
|
||||||
params?: Record<string, string>,
|
|
||||||
content?: string | undefined,
|
|
||||||
responseType?: ResponseType | undefined
|
|
||||||
): Promise<RemoteResponse> {
|
|
||||||
const req = {
|
|
||||||
uri: `${this._url}${path}`,
|
|
||||||
method: Method.POST,
|
|
||||||
headers: new Map(Object.entries({
|
|
||||||
'Content-Type': content ?? 'application/json',
|
|
||||||
'x-api-key': this._apiKey(),
|
|
||||||
...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
|
|
||||||
})),
|
|
||||||
params: new Map(Object.entries(params ?? {})),
|
|
||||||
body: data
|
|
||||||
}
|
|
||||||
|
|
||||||
let response
|
|
||||||
try {
|
|
||||||
response = await callWithMiddlewares(req, this._middlewares, {
|
|
||||||
responseType,
|
|
||||||
timeout: this._timeout
|
|
||||||
})
|
|
||||||
|
|
||||||
// return response
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error(serializeErrorAsJson(err))
|
|
||||||
|
|
||||||
if (err.response === undefined) {
|
|
||||||
throw new Error(`Network Error: ${err.message as string}`)
|
|
||||||
}
|
|
||||||
response = toLanceRes(err.response)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.status !== 200) {
|
|
||||||
const errorData = await decodeErrorData(response, responseType)
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${response.status}, ` +
|
|
||||||
`message: ${response.statusText}: ${errorData}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return response
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Instrument this client with middleware
|
|
||||||
* @param mw - The middleware that instruments the client
|
|
||||||
* @returns - an instance of this client instrumented with the middleware
|
|
||||||
*/
|
|
||||||
public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
|
|
||||||
const wrapped = this.clone()
|
|
||||||
wrapped._middlewares.push(mw)
|
|
||||||
return wrapped
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Make a clone of this client
|
|
||||||
*/
|
|
||||||
private clone (): HttpLancedbClient {
|
|
||||||
const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
|
|
||||||
for (const mw of this._middlewares) {
|
|
||||||
clone._middlewares.push(mw)
|
|
||||||
}
|
|
||||||
return clone
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function serializeErrorAsJson(err: AxiosError) {
|
|
||||||
const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
|
|
||||||
error.response = err.response != null
|
|
||||||
? JSON.parse(JSON.stringify(
|
|
||||||
err.response,
|
|
||||||
// config contains the request data, too noisy
|
|
||||||
Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
|
|
||||||
))
|
|
||||||
: null
|
|
||||||
return JSON.stringify({ error })
|
|
||||||
}
|
|
||||||
@@ -1,567 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import {
|
|
||||||
type EmbeddingFunction,
|
|
||||||
type Table,
|
|
||||||
type VectorIndexParams,
|
|
||||||
type Connection,
|
|
||||||
type ConnectionOptions,
|
|
||||||
type CreateTableOptions,
|
|
||||||
type VectorIndex,
|
|
||||||
type WriteOptions,
|
|
||||||
type IndexStats,
|
|
||||||
type UpdateArgs,
|
|
||||||
type UpdateSqlArgs,
|
|
||||||
makeArrowTable,
|
|
||||||
type MergeInsertArgs,
|
|
||||||
type ColumnAlteration
|
|
||||||
} from '../index'
|
|
||||||
import { Query } from '../query'
|
|
||||||
|
|
||||||
import { Vector, Table as ArrowTable } from 'apache-arrow'
|
|
||||||
import { HttpLancedbClient } from './client'
|
|
||||||
import { isEmbeddingFunction } from '../embedding/embedding_function'
|
|
||||||
import {
|
|
||||||
createEmptyTable,
|
|
||||||
fromRecordsToStreamBuffer,
|
|
||||||
fromTableToStreamBuffer
|
|
||||||
} from '../arrow'
|
|
||||||
import { toSQL, TTLCache } from '../util'
|
|
||||||
import { type HttpMiddleware } from '../middleware'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remote connection.
|
|
||||||
*/
|
|
||||||
export class RemoteConnection implements Connection {
|
|
||||||
private _client: HttpLancedbClient
|
|
||||||
private readonly _dbName: string
|
|
||||||
private readonly _tableCache = new TTLCache(300_000)
|
|
||||||
|
|
||||||
constructor (opts: ConnectionOptions) {
|
|
||||||
if (!opts.uri.startsWith('db://')) {
|
|
||||||
throw new Error(`Invalid remote DB URI: ${opts.uri}`)
|
|
||||||
}
|
|
||||||
if (opts.apiKey == null || opts.apiKey === '') {
|
|
||||||
opts = Object.assign({}, opts, { apiKey: process.env.LANCEDB_API_KEY })
|
|
||||||
}
|
|
||||||
if (opts.apiKey === undefined || opts.region === undefined) {
|
|
||||||
throw new Error(
|
|
||||||
'API key and region are must be passed for remote connections. ' +
|
|
||||||
'API key can also be set through LANCEDB_API_KEY env variable.')
|
|
||||||
}
|
|
||||||
|
|
||||||
this._dbName = opts.uri.slice('db://'.length)
|
|
||||||
let server: string
|
|
||||||
if (opts.hostOverride === undefined) {
|
|
||||||
server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
|
|
||||||
} else {
|
|
||||||
server = opts.hostOverride
|
|
||||||
}
|
|
||||||
this._client = new HttpLancedbClient(
|
|
||||||
server,
|
|
||||||
opts.apiKey,
|
|
||||||
opts.timeout,
|
|
||||||
opts.hostOverride === undefined ? undefined : this._dbName
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
get uri (): string {
|
|
||||||
// add the lancedb+ prefix back
|
|
||||||
return 'db://' + this._client.uri
|
|
||||||
}
|
|
||||||
|
|
||||||
async tableNames (
|
|
||||||
pageToken: string = '',
|
|
||||||
limit: number = 10
|
|
||||||
): Promise<string[]> {
|
|
||||||
const response = await this._client.get('/v1/table/', {
|
|
||||||
limit: `${limit}`,
|
|
||||||
page_token: pageToken
|
|
||||||
})
|
|
||||||
const body = await response.body()
|
|
||||||
for (const table of body.tables) {
|
|
||||||
this._tableCache.set(table, true)
|
|
||||||
}
|
|
||||||
return body.tables
|
|
||||||
}
|
|
||||||
|
|
||||||
async openTable (name: string): Promise<Table>
|
|
||||||
async openTable<T>(
|
|
||||||
name: string,
|
|
||||||
embeddings: EmbeddingFunction<T>
|
|
||||||
): Promise<Table<T>>
|
|
||||||
async openTable<T>(
|
|
||||||
name: string,
|
|
||||||
embeddings?: EmbeddingFunction<T>
|
|
||||||
): Promise<Table<T>> {
|
|
||||||
// check if the table exists
|
|
||||||
if (this._tableCache.get(name) === undefined) {
|
|
||||||
await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
|
|
||||||
this._tableCache.set(name, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (embeddings !== undefined) {
|
|
||||||
return new RemoteTable(this._client, name, embeddings)
|
|
||||||
} else {
|
|
||||||
return new RemoteTable(this._client, name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async createTable<T>(
|
|
||||||
nameOrOpts: string | CreateTableOptions<T>,
|
|
||||||
data?: Array<Record<string, unknown>> | ArrowTable,
|
|
||||||
optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
|
|
||||||
opt?: WriteOptions
|
|
||||||
): Promise<Table<T>> {
|
|
||||||
// Logic copied from LocatlConnection, refactor these to a base class + connectionImpl pattern
|
|
||||||
let schema
|
|
||||||
let embeddings: undefined | EmbeddingFunction<T>
|
|
||||||
let tableName: string
|
|
||||||
if (typeof nameOrOpts === 'string') {
|
|
||||||
if (
|
|
||||||
optsOrEmbedding !== undefined &&
|
|
||||||
isEmbeddingFunction(optsOrEmbedding)
|
|
||||||
) {
|
|
||||||
embeddings = optsOrEmbedding
|
|
||||||
}
|
|
||||||
tableName = nameOrOpts
|
|
||||||
} else {
|
|
||||||
schema = nameOrOpts.schema
|
|
||||||
embeddings = nameOrOpts.embeddingFunction
|
|
||||||
tableName = nameOrOpts.name
|
|
||||||
if (data === undefined) {
|
|
||||||
data = nameOrOpts.data
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let buffer: Buffer
|
|
||||||
|
|
||||||
function isEmpty (
|
|
||||||
data: Array<Record<string, unknown>> | ArrowTable<any>
|
|
||||||
): boolean {
|
|
||||||
if (data instanceof ArrowTable) {
|
|
||||||
return data.numRows === 0
|
|
||||||
}
|
|
||||||
return data.length === 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data === undefined || isEmpty(data)) {
|
|
||||||
if (schema === undefined) {
|
|
||||||
throw new Error('Either data or schema needs to defined')
|
|
||||||
}
|
|
||||||
buffer = await fromTableToStreamBuffer(createEmptyTable(schema))
|
|
||||||
} else if (data instanceof ArrowTable) {
|
|
||||||
buffer = await fromTableToStreamBuffer(data, embeddings)
|
|
||||||
} else {
|
|
||||||
// data is Array<Record<...>>
|
|
||||||
buffer = await fromRecordsToStreamBuffer(data, embeddings)
|
|
||||||
}
|
|
||||||
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(tableName)}/create/`,
|
|
||||||
buffer,
|
|
||||||
undefined,
|
|
||||||
'application/vnd.apache.arrow.stream'
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
this._tableCache.set(tableName, true)
|
|
||||||
if (embeddings === undefined) {
|
|
||||||
return new RemoteTable(this._client, tableName)
|
|
||||||
} else {
|
|
||||||
return new RemoteTable(this._client, tableName, embeddings)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async dropTable (name: string): Promise<void> {
|
|
||||||
await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
|
|
||||||
this._tableCache.delete(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
withMiddleware (middleware: HttpMiddleware): Connection {
|
|
||||||
const wrapped = this.clone()
|
|
||||||
wrapped._client = wrapped._client.withMiddleware(middleware)
|
|
||||||
return wrapped
|
|
||||||
}
|
|
||||||
|
|
||||||
private clone (): RemoteConnection {
|
|
||||||
const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
|
|
||||||
return Object.assign(clone, this)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class RemoteQuery<T = number[]> extends Query<T> {
|
|
||||||
constructor (
|
|
||||||
query: T,
|
|
||||||
private readonly _client: HttpLancedbClient,
|
|
||||||
private readonly _name: string,
|
|
||||||
embeddings?: EmbeddingFunction<T>
|
|
||||||
) {
|
|
||||||
super(query, undefined, embeddings)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: refactor this to a base class + queryImpl pattern
|
|
||||||
async execute<T = Record<string, unknown>>(): Promise<T[]> {
|
|
||||||
const embeddings = this._embeddings
|
|
||||||
const query = (this as any)._query
|
|
||||||
let queryVector: number[]
|
|
||||||
|
|
||||||
if (embeddings !== undefined) {
|
|
||||||
queryVector = (await embeddings.embed([query]))[0]
|
|
||||||
} else {
|
|
||||||
queryVector = query as number[]
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await this._client.search(
|
|
||||||
this._name,
|
|
||||||
queryVector,
|
|
||||||
(this as any)._limit,
|
|
||||||
(this as any)._nprobes,
|
|
||||||
(this as any)._prefilter,
|
|
||||||
(this as any)._refineFactor,
|
|
||||||
(this as any)._select,
|
|
||||||
(this as any)._filter,
|
|
||||||
(this as any)._metricType,
|
|
||||||
(this as any)._fastSearch
|
|
||||||
)
|
|
||||||
|
|
||||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
|
||||||
const newObject: Record<string, unknown> = {}
|
|
||||||
Object.keys(entry).forEach((key: string) => {
|
|
||||||
if (entry[key] instanceof Vector) {
|
|
||||||
newObject[key] = (entry[key] as any).toArray()
|
|
||||||
} else {
|
|
||||||
newObject[key] = entry[key] as any
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return newObject as unknown as T
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// we are using extend until we have next next version release
|
|
||||||
// Table and Connection has both been refactored to interfaces
|
|
||||||
export class RemoteTable<T = number[]> implements Table<T> {
|
|
||||||
private _client: HttpLancedbClient
|
|
||||||
private readonly _embeddings?: EmbeddingFunction<T>
|
|
||||||
private readonly _name: string
|
|
||||||
|
|
||||||
constructor (client: HttpLancedbClient, name: string)
|
|
||||||
constructor (
|
|
||||||
client: HttpLancedbClient,
|
|
||||||
name: string,
|
|
||||||
embeddings: EmbeddingFunction<T>
|
|
||||||
)
|
|
||||||
constructor (
|
|
||||||
client: HttpLancedbClient,
|
|
||||||
name: string,
|
|
||||||
embeddings?: EmbeddingFunction<T>
|
|
||||||
) {
|
|
||||||
this._client = client
|
|
||||||
this._name = name
|
|
||||||
this._embeddings = embeddings
|
|
||||||
}
|
|
||||||
|
|
||||||
get name (): string {
|
|
||||||
return this._name
|
|
||||||
}
|
|
||||||
|
|
||||||
get schema (): Promise<any> {
|
|
||||||
return this._client
|
|
||||||
.post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
|
|
||||||
.then(async (res) => {
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return (await res.body())?.schema
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
search (query: T): Query<T> {
|
|
||||||
return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
|
|
||||||
}
|
|
||||||
|
|
||||||
filter (where: string): Query<T> {
|
|
||||||
throw new Error('Not implemented')
|
|
||||||
}
|
|
||||||
|
|
||||||
async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
|
|
||||||
let tbl: ArrowTable
|
|
||||||
if (data instanceof ArrowTable) {
|
|
||||||
tbl = data
|
|
||||||
} else {
|
|
||||||
tbl = makeArrowTable(data, await this.schema)
|
|
||||||
}
|
|
||||||
|
|
||||||
const queryParams: any = {
|
|
||||||
on
|
|
||||||
}
|
|
||||||
if (args.whenMatchedUpdateAll !== false && args.whenMatchedUpdateAll !== null && args.whenMatchedUpdateAll !== undefined) {
|
|
||||||
queryParams.when_matched_update_all = 'true'
|
|
||||||
if (typeof args.whenMatchedUpdateAll === 'string') {
|
|
||||||
queryParams.when_matched_update_all_filt = args.whenMatchedUpdateAll
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
queryParams.when_matched_update_all = 'false'
|
|
||||||
}
|
|
||||||
if (args.whenNotMatchedInsertAll ?? false) {
|
|
||||||
queryParams.when_not_matched_insert_all = 'true'
|
|
||||||
} else {
|
|
||||||
queryParams.when_not_matched_insert_all = 'false'
|
|
||||||
}
|
|
||||||
if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) {
|
|
||||||
queryParams.when_not_matched_by_source_delete = 'true'
|
|
||||||
if (typeof args.whenNotMatchedBySourceDelete === 'string') {
|
|
||||||
queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
queryParams.when_not_matched_by_source_delete = 'false'
|
|
||||||
}
|
|
||||||
|
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
|
|
||||||
buffer,
|
|
||||||
queryParams,
|
|
||||||
'application/vnd.apache.arrow.stream'
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
|
|
||||||
let tbl: ArrowTable
|
|
||||||
if (data instanceof ArrowTable) {
|
|
||||||
tbl = data
|
|
||||||
} else {
|
|
||||||
tbl = makeArrowTable(data, await this.schema)
|
|
||||||
}
|
|
||||||
|
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/insert/`,
|
|
||||||
buffer,
|
|
||||||
{
|
|
||||||
mode: 'append'
|
|
||||||
},
|
|
||||||
'application/vnd.apache.arrow.stream'
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return tbl.numRows
|
|
||||||
}
|
|
||||||
|
|
||||||
async overwrite (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
|
|
||||||
let tbl: ArrowTable
|
|
||||||
if (data instanceof ArrowTable) {
|
|
||||||
tbl = data
|
|
||||||
} else {
|
|
||||||
tbl = makeArrowTable(data)
|
|
||||||
}
|
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/insert/`,
|
|
||||||
buffer,
|
|
||||||
{
|
|
||||||
mode: 'overwrite'
|
|
||||||
},
|
|
||||||
'application/vnd.apache.arrow.stream'
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return tbl.numRows
|
|
||||||
}
|
|
||||||
|
|
||||||
async createIndex (indexParams: VectorIndexParams): Promise<void> {
|
|
||||||
const unsupportedParams = [
|
|
||||||
'index_name',
|
|
||||||
'num_partitions',
|
|
||||||
'max_iters',
|
|
||||||
'use_opq',
|
|
||||||
'num_sub_vectors',
|
|
||||||
'num_bits',
|
|
||||||
'max_opq_iters',
|
|
||||||
'replace'
|
|
||||||
]
|
|
||||||
for (const param of unsupportedParams) {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/strict-boolean-expressions
|
|
||||||
if (indexParams[param as keyof VectorIndexParams]) {
|
|
||||||
throw new Error(`${param} is not supported for remote connections`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const column = indexParams.column ?? 'vector'
|
|
||||||
const indexType = 'vector'
|
|
||||||
const metricType = indexParams.metric_type ?? 'L2'
|
|
||||||
const indexCacheSize = indexParams.index_cache_size ?? null
|
|
||||||
|
|
||||||
const data = {
|
|
||||||
column,
|
|
||||||
index_type: indexType,
|
|
||||||
metric_type: metricType,
|
|
||||||
index_cache_size: indexCacheSize
|
|
||||||
}
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/create_index/`,
|
|
||||||
data
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async createScalarIndex (column: string): Promise<void> {
|
|
||||||
const indexType = 'scalar'
|
|
||||||
|
|
||||||
const data = {
|
|
||||||
column,
|
|
||||||
index_type: indexType,
|
|
||||||
replace: true
|
|
||||||
}
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
|
|
||||||
data
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
async dropIndex (index_name: string): Promise<void> {
|
|
||||||
const res = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
|
|
||||||
)
|
|
||||||
if (res.status !== 200) {
|
|
||||||
throw new Error(
|
|
||||||
`Server Error, status: ${res.status}, ` +
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
`message: ${res.statusText}: ${await res.body()}`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async countRows (filter?: string): Promise<number> {
|
|
||||||
const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
|
|
||||||
predicate: filter
|
|
||||||
})
|
|
||||||
return (await result.body())
|
|
||||||
}
|
|
||||||
|
|
||||||
async delete (filter: string): Promise<void> {
|
|
||||||
await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
|
|
||||||
predicate: filter
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
|
|
||||||
let filter: string | null
|
|
||||||
let updates: Record<string, string>
|
|
||||||
|
|
||||||
if ('valuesSql' in args) {
|
|
||||||
filter = args.where ?? null
|
|
||||||
updates = args.valuesSql
|
|
||||||
} else {
|
|
||||||
filter = args.where ?? null
|
|
||||||
updates = {}
|
|
||||||
for (const [key, value] of Object.entries(args.values)) {
|
|
||||||
updates[key] = toSQL(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
|
|
||||||
predicate: filter,
|
|
||||||
updates: Object.entries(updates).map(([key, value]) => [key, value])
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async listIndices (): Promise<VectorIndex[]> {
|
|
||||||
const results = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/index/list/`
|
|
||||||
)
|
|
||||||
return (await results.body()).indexes?.map((index: any) => ({
|
|
||||||
columns: index.columns,
|
|
||||||
name: index.index_name,
|
|
||||||
uuid: index.index_uuid,
|
|
||||||
status: index.status
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
async indexStats (indexName: string): Promise<IndexStats> {
|
|
||||||
const results = await this._client.post(
|
|
||||||
`/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
|
|
||||||
)
|
|
||||||
const body = await results.body()
|
|
||||||
return {
|
|
||||||
numIndexedRows: body?.num_indexed_rows,
|
|
||||||
numUnindexedRows: body?.num_unindexed_rows,
|
|
||||||
indexType: body?.index_type,
|
|
||||||
distanceType: body?.distance_type
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
|
|
||||||
throw new Error('Add columns is not yet supported in LanceDB Cloud.')
|
|
||||||
}
|
|
||||||
|
|
||||||
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
|
|
||||||
throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
|
|
||||||
}
|
|
||||||
|
|
||||||
async dropColumns (columnNames: string[]): Promise<void> {
|
|
||||||
throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
|
|
||||||
}
|
|
||||||
|
|
||||||
withMiddleware(middleware: HttpMiddleware): Table<T> {
|
|
||||||
const wrapped = this.clone()
|
|
||||||
wrapped._client = wrapped._client.withMiddleware(middleware)
|
|
||||||
return wrapped
|
|
||||||
}
|
|
||||||
|
|
||||||
private clone (): RemoteTable<T> {
|
|
||||||
const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
|
|
||||||
return Object.assign(clone, this)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,508 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
// The utilities in this file help sanitize data from the user's arrow
|
|
||||||
// library into the types expected by vectordb's arrow library. Node
|
|
||||||
// generally allows for mulitple versions of the same library (and sometimes
|
|
||||||
// even multiple copies of the same version) to be installed at the same
|
|
||||||
// time. However, arrow-js uses instanceof which expected that the input
|
|
||||||
// comes from the exact same library instance. This is not always the case
|
|
||||||
// and so we must sanitize the input to ensure that it is compatible.
|
|
||||||
|
|
||||||
import {
|
|
||||||
Field,
|
|
||||||
Utf8,
|
|
||||||
FixedSizeBinary,
|
|
||||||
FixedSizeList,
|
|
||||||
Schema,
|
|
||||||
List,
|
|
||||||
Struct,
|
|
||||||
Float,
|
|
||||||
Bool,
|
|
||||||
Date_,
|
|
||||||
Decimal,
|
|
||||||
type DataType,
|
|
||||||
Dictionary,
|
|
||||||
Binary,
|
|
||||||
Float32,
|
|
||||||
Interval,
|
|
||||||
Map_,
|
|
||||||
Duration,
|
|
||||||
Union,
|
|
||||||
Time,
|
|
||||||
Timestamp,
|
|
||||||
Type,
|
|
||||||
Null,
|
|
||||||
Int,
|
|
||||||
type Precision,
|
|
||||||
type DateUnit,
|
|
||||||
Int8,
|
|
||||||
Int16,
|
|
||||||
Int32,
|
|
||||||
Int64,
|
|
||||||
Uint8,
|
|
||||||
Uint16,
|
|
||||||
Uint32,
|
|
||||||
Uint64,
|
|
||||||
Float16,
|
|
||||||
Float64,
|
|
||||||
DateDay,
|
|
||||||
DateMillisecond,
|
|
||||||
DenseUnion,
|
|
||||||
SparseUnion,
|
|
||||||
TimeNanosecond,
|
|
||||||
TimeMicrosecond,
|
|
||||||
TimeMillisecond,
|
|
||||||
TimeSecond,
|
|
||||||
TimestampNanosecond,
|
|
||||||
TimestampMicrosecond,
|
|
||||||
TimestampMillisecond,
|
|
||||||
TimestampSecond,
|
|
||||||
IntervalDayTime,
|
|
||||||
IntervalYearMonth,
|
|
||||||
DurationNanosecond,
|
|
||||||
DurationMicrosecond,
|
|
||||||
DurationMillisecond,
|
|
||||||
DurationSecond
|
|
||||||
} from "apache-arrow";
|
|
||||||
import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
|
|
||||||
|
|
||||||
function sanitizeMetadata(
|
|
||||||
metadataLike?: unknown
|
|
||||||
): Map<string, string> | undefined {
|
|
||||||
if (metadataLike === undefined || metadataLike === null) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
if (!(metadataLike instanceof Map)) {
|
|
||||||
throw Error("Expected metadata, if present, to be a Map<string, string>");
|
|
||||||
}
|
|
||||||
for (const item of metadataLike) {
|
|
||||||
if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
|
|
||||||
throw Error(
|
|
||||||
"Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return metadataLike as Map<string, string>;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeInt(typeLike: object) {
|
|
||||||
if (
|
|
||||||
!("bitWidth" in typeLike) ||
|
|
||||||
typeof typeLike.bitWidth !== "number" ||
|
|
||||||
!("isSigned" in typeLike) ||
|
|
||||||
typeof typeLike.isSigned !== "boolean"
|
|
||||||
) {
|
|
||||||
throw Error(
|
|
||||||
"Expected an Int Type to have a `bitWidth` and `isSigned` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeFloat(typeLike: object) {
|
|
||||||
if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
|
|
||||||
throw Error("Expected a Float Type to have a `precision` property");
|
|
||||||
}
|
|
||||||
return new Float(typeLike.precision as Precision);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeDecimal(typeLike: object) {
|
|
||||||
if (
|
|
||||||
!("scale" in typeLike) ||
|
|
||||||
typeof typeLike.scale !== "number" ||
|
|
||||||
!("precision" in typeLike) ||
|
|
||||||
typeof typeLike.precision !== "number" ||
|
|
||||||
!("bitWidth" in typeLike) ||
|
|
||||||
typeof typeLike.bitWidth !== "number"
|
|
||||||
) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeDate(typeLike: object) {
|
|
||||||
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
||||||
throw Error("Expected a Date type to have a `unit` property");
|
|
||||||
}
|
|
||||||
return new Date_(typeLike.unit as DateUnit);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeTime(typeLike: object) {
|
|
||||||
if (
|
|
||||||
!("unit" in typeLike) ||
|
|
||||||
typeof typeLike.unit !== "number" ||
|
|
||||||
!("bitWidth" in typeLike) ||
|
|
||||||
typeof typeLike.bitWidth !== "number"
|
|
||||||
) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Time type to have `unit` and `bitWidth` properties"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeTimestamp(typeLike: object) {
|
|
||||||
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
||||||
throw Error("Expected a Timestamp type to have a `unit` property");
|
|
||||||
}
|
|
||||||
let timezone = null;
|
|
||||||
if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
|
|
||||||
timezone = typeLike.timezone;
|
|
||||||
}
|
|
||||||
return new Timestamp(typeLike.unit, timezone);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeTypedTimestamp(
|
|
||||||
typeLike: object,
|
|
||||||
Datatype:
|
|
||||||
| typeof TimestampNanosecond
|
|
||||||
| typeof TimestampMicrosecond
|
|
||||||
| typeof TimestampMillisecond
|
|
||||||
| typeof TimestampSecond
|
|
||||||
) {
|
|
||||||
let timezone = null;
|
|
||||||
if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
|
|
||||||
timezone = typeLike.timezone;
|
|
||||||
}
|
|
||||||
return new Datatype(timezone);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeInterval(typeLike: object) {
|
|
||||||
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
||||||
throw Error("Expected an Interval type to have a `unit` property");
|
|
||||||
}
|
|
||||||
return new Interval(typeLike.unit);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeList(typeLike: object) {
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a List type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (typeLike.children.length !== 1) {
|
|
||||||
throw Error("Expected a List type to have exactly one child");
|
|
||||||
}
|
|
||||||
return new List(sanitizeField(typeLike.children[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeStruct(typeLike: object) {
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Struct type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return new Struct(typeLike.children.map((child) => sanitizeField(child)));
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeUnion(typeLike: object) {
|
|
||||||
if (
|
|
||||||
!("typeIds" in typeLike) ||
|
|
||||||
!("mode" in typeLike) ||
|
|
||||||
typeof typeLike.mode !== "number"
|
|
||||||
) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Union type to have `typeIds` and `mode` properties"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Union type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Union(
|
|
||||||
typeLike.mode,
|
|
||||||
typeLike.typeIds as any,
|
|
||||||
typeLike.children.map((child) => sanitizeField(child))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeTypedUnion(
|
|
||||||
typeLike: object,
|
|
||||||
UnionType: typeof DenseUnion | typeof SparseUnion
|
|
||||||
) {
|
|
||||||
if (!("typeIds" in typeLike)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new UnionType(
|
|
||||||
typeLike.typeIds as any,
|
|
||||||
typeLike.children.map((child) => sanitizeField(child))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeFixedSizeBinary(typeLike: object) {
|
|
||||||
if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
|
|
||||||
throw Error(
|
|
||||||
"Expected a FixedSizeBinary type to have a `byteWidth` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return new FixedSizeBinary(typeLike.byteWidth);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeFixedSizeList(typeLike: object) {
|
|
||||||
if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
|
|
||||||
throw Error("Expected a FixedSizeList type to have a `listSize` property");
|
|
||||||
}
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a FixedSizeList type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (typeLike.children.length !== 1) {
|
|
||||||
throw Error("Expected a FixedSizeList type to have exactly one child");
|
|
||||||
}
|
|
||||||
return new FixedSizeList(
|
|
||||||
typeLike.listSize,
|
|
||||||
sanitizeField(typeLike.children[0])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeMap(typeLike: object) {
|
|
||||||
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
|
|
||||||
throw Error(
|
|
||||||
"Expected a Map type to have an array-like `children` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
|
|
||||||
throw Error("Expected a Map type to have a `keysSorted` property");
|
|
||||||
}
|
|
||||||
return new Map_(
|
|
||||||
typeLike.children.map((field) => sanitizeField(field)) as any,
|
|
||||||
typeLike.keysSorted
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeDuration(typeLike: object) {
|
|
||||||
if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
|
|
||||||
throw Error("Expected a Duration type to have a `unit` property");
|
|
||||||
}
|
|
||||||
return new Duration(typeLike.unit);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeDictionary(typeLike: object) {
|
|
||||||
if (!("id" in typeLike) || typeof typeLike.id !== "number") {
|
|
||||||
throw Error("Expected a Dictionary type to have an `id` property");
|
|
||||||
}
|
|
||||||
if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
|
|
||||||
throw Error("Expected a Dictionary type to have an `indices` property");
|
|
||||||
}
|
|
||||||
if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
|
|
||||||
throw Error("Expected a Dictionary type to have an `dictionary` property");
|
|
||||||
}
|
|
||||||
if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
|
|
||||||
throw Error("Expected a Dictionary type to have an `isOrdered` property");
|
|
||||||
}
|
|
||||||
return new Dictionary(
|
|
||||||
sanitizeType(typeLike.dictionary),
|
|
||||||
sanitizeType(typeLike.indices) as any,
|
|
||||||
typeLike.id,
|
|
||||||
typeLike.isOrdered
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeType(typeLike: unknown): DataType<any> {
|
|
||||||
if (typeof typeLike !== "object" || typeLike === null) {
|
|
||||||
throw Error("Expected a Type but object was null/undefined");
|
|
||||||
}
|
|
||||||
if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
|
|
||||||
throw Error("Expected a Type to have a typeId function");
|
|
||||||
}
|
|
||||||
let typeId: Type;
|
|
||||||
if (typeof typeLike.typeId === "function") {
|
|
||||||
typeId = (typeLike.typeId as () => unknown)() as Type;
|
|
||||||
} else if (typeof typeLike.typeId === "number") {
|
|
||||||
typeId = typeLike.typeId as Type;
|
|
||||||
} else {
|
|
||||||
throw Error("Type's typeId property was not a function or number");
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (typeId) {
|
|
||||||
case Type.NONE:
|
|
||||||
throw Error("Received a Type with a typeId of NONE");
|
|
||||||
case Type.Null:
|
|
||||||
return new Null();
|
|
||||||
case Type.Int:
|
|
||||||
return sanitizeInt(typeLike);
|
|
||||||
case Type.Float:
|
|
||||||
return sanitizeFloat(typeLike);
|
|
||||||
case Type.Binary:
|
|
||||||
return new Binary();
|
|
||||||
case Type.Utf8:
|
|
||||||
return new Utf8();
|
|
||||||
case Type.Bool:
|
|
||||||
return new Bool();
|
|
||||||
case Type.Decimal:
|
|
||||||
return sanitizeDecimal(typeLike);
|
|
||||||
case Type.Date:
|
|
||||||
return sanitizeDate(typeLike);
|
|
||||||
case Type.Time:
|
|
||||||
return sanitizeTime(typeLike);
|
|
||||||
case Type.Timestamp:
|
|
||||||
return sanitizeTimestamp(typeLike);
|
|
||||||
case Type.Interval:
|
|
||||||
return sanitizeInterval(typeLike);
|
|
||||||
case Type.List:
|
|
||||||
return sanitizeList(typeLike);
|
|
||||||
case Type.Struct:
|
|
||||||
return sanitizeStruct(typeLike);
|
|
||||||
case Type.Union:
|
|
||||||
return sanitizeUnion(typeLike);
|
|
||||||
case Type.FixedSizeBinary:
|
|
||||||
return sanitizeFixedSizeBinary(typeLike);
|
|
||||||
case Type.FixedSizeList:
|
|
||||||
return sanitizeFixedSizeList(typeLike);
|
|
||||||
case Type.Map:
|
|
||||||
return sanitizeMap(typeLike);
|
|
||||||
case Type.Duration:
|
|
||||||
return sanitizeDuration(typeLike);
|
|
||||||
case Type.Dictionary:
|
|
||||||
return sanitizeDictionary(typeLike);
|
|
||||||
case Type.Int8:
|
|
||||||
return new Int8();
|
|
||||||
case Type.Int16:
|
|
||||||
return new Int16();
|
|
||||||
case Type.Int32:
|
|
||||||
return new Int32();
|
|
||||||
case Type.Int64:
|
|
||||||
return new Int64();
|
|
||||||
case Type.Uint8:
|
|
||||||
return new Uint8();
|
|
||||||
case Type.Uint16:
|
|
||||||
return new Uint16();
|
|
||||||
case Type.Uint32:
|
|
||||||
return new Uint32();
|
|
||||||
case Type.Uint64:
|
|
||||||
return new Uint64();
|
|
||||||
case Type.Float16:
|
|
||||||
return new Float16();
|
|
||||||
case Type.Float32:
|
|
||||||
return new Float32();
|
|
||||||
case Type.Float64:
|
|
||||||
return new Float64();
|
|
||||||
case Type.DateMillisecond:
|
|
||||||
return new DateMillisecond();
|
|
||||||
case Type.DateDay:
|
|
||||||
return new DateDay();
|
|
||||||
case Type.TimeNanosecond:
|
|
||||||
return new TimeNanosecond();
|
|
||||||
case Type.TimeMicrosecond:
|
|
||||||
return new TimeMicrosecond();
|
|
||||||
case Type.TimeMillisecond:
|
|
||||||
return new TimeMillisecond();
|
|
||||||
case Type.TimeSecond:
|
|
||||||
return new TimeSecond();
|
|
||||||
case Type.TimestampNanosecond:
|
|
||||||
return sanitizeTypedTimestamp(typeLike, TimestampNanosecond);
|
|
||||||
case Type.TimestampMicrosecond:
|
|
||||||
return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond);
|
|
||||||
case Type.TimestampMillisecond:
|
|
||||||
return sanitizeTypedTimestamp(typeLike, TimestampMillisecond);
|
|
||||||
case Type.TimestampSecond:
|
|
||||||
return sanitizeTypedTimestamp(typeLike, TimestampSecond);
|
|
||||||
case Type.DenseUnion:
|
|
||||||
return sanitizeTypedUnion(typeLike, DenseUnion);
|
|
||||||
case Type.SparseUnion:
|
|
||||||
return sanitizeTypedUnion(typeLike, SparseUnion);
|
|
||||||
case Type.IntervalDayTime:
|
|
||||||
return new IntervalDayTime();
|
|
||||||
case Type.IntervalYearMonth:
|
|
||||||
return new IntervalYearMonth();
|
|
||||||
case Type.DurationNanosecond:
|
|
||||||
return new DurationNanosecond();
|
|
||||||
case Type.DurationMicrosecond:
|
|
||||||
return new DurationMicrosecond();
|
|
||||||
case Type.DurationMillisecond:
|
|
||||||
return new DurationMillisecond();
|
|
||||||
case Type.DurationSecond:
|
|
||||||
return new DurationSecond();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeField(fieldLike: unknown): Field {
|
|
||||||
if (fieldLike instanceof Field) {
|
|
||||||
return fieldLike;
|
|
||||||
}
|
|
||||||
if (typeof fieldLike !== "object" || fieldLike === null) {
|
|
||||||
throw Error("Expected a Field but object was null/undefined");
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
!("type" in fieldLike) ||
|
|
||||||
!("name" in fieldLike) ||
|
|
||||||
!("nullable" in fieldLike)
|
|
||||||
) {
|
|
||||||
throw Error(
|
|
||||||
"The field passed in is missing a `type`/`name`/`nullable` property"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const type = sanitizeType(fieldLike.type);
|
|
||||||
const name = fieldLike.name;
|
|
||||||
if (!(typeof name === "string")) {
|
|
||||||
throw Error("The field passed in had a non-string `name` property");
|
|
||||||
}
|
|
||||||
const nullable = fieldLike.nullable;
|
|
||||||
if (!(typeof nullable === "boolean")) {
|
|
||||||
throw Error("The field passed in had a non-boolean `nullable` property");
|
|
||||||
}
|
|
||||||
let metadata;
|
|
||||||
if ("metadata" in fieldLike) {
|
|
||||||
metadata = sanitizeMetadata(fieldLike.metadata);
|
|
||||||
}
|
|
||||||
return new Field(name, type, nullable, metadata);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert something schemaLike into a Schema instance
|
|
||||||
*
|
|
||||||
* This method is often needed even when the caller is using a Schema
|
|
||||||
* instance because they might be using a different instance of apache-arrow
|
|
||||||
* than lancedb is using.
|
|
||||||
*/
|
|
||||||
export function sanitizeSchema(schemaLike: unknown): Schema {
|
|
||||||
if (schemaLike instanceof Schema) {
|
|
||||||
return schemaLike;
|
|
||||||
}
|
|
||||||
if (typeof schemaLike !== "object" || schemaLike === null) {
|
|
||||||
throw Error("Expected a Schema but object was null/undefined");
|
|
||||||
}
|
|
||||||
if (!("fields" in schemaLike)) {
|
|
||||||
throw Error(
|
|
||||||
"The schema passed in does not appear to be a schema (no 'fields' property)"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let metadata;
|
|
||||||
if ("metadata" in schemaLike) {
|
|
||||||
metadata = sanitizeMetadata(schemaLike.metadata);
|
|
||||||
}
|
|
||||||
if (!Array.isArray(schemaLike.fields)) {
|
|
||||||
throw Error(
|
|
||||||
"The schema passed in had a 'fields' property but it was not an array"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const sanitizedFields = schemaLike.fields.map((field) =>
|
|
||||||
sanitizeField(field)
|
|
||||||
);
|
|
||||||
return new Schema(sanitizedFields, metadata);
|
|
||||||
}
|
|
||||||
@@ -1,360 +0,0 @@
|
|||||||
// Copyright 2024 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { describe } from 'mocha'
|
|
||||||
import { assert, expect, use as chaiUse } from 'chai'
|
|
||||||
import * as chaiAsPromised from 'chai-as-promised'
|
|
||||||
|
|
||||||
import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../arrow'
|
|
||||||
import {
|
|
||||||
Field,
|
|
||||||
FixedSizeList,
|
|
||||||
Float16,
|
|
||||||
Float32,
|
|
||||||
Int32,
|
|
||||||
tableFromIPC,
|
|
||||||
Schema,
|
|
||||||
Float64,
|
|
||||||
type Table,
|
|
||||||
Binary,
|
|
||||||
Bool,
|
|
||||||
Utf8,
|
|
||||||
Struct,
|
|
||||||
List,
|
|
||||||
DataType,
|
|
||||||
Dictionary,
|
|
||||||
Int64,
|
|
||||||
MetadataVersion
|
|
||||||
} from 'apache-arrow'
|
|
||||||
import {
|
|
||||||
Dictionary as OldDictionary,
|
|
||||||
Field as OldField,
|
|
||||||
FixedSizeList as OldFixedSizeList,
|
|
||||||
Float32 as OldFloat32,
|
|
||||||
Int32 as OldInt32,
|
|
||||||
Struct as OldStruct,
|
|
||||||
Schema as OldSchema,
|
|
||||||
TimestampNanosecond as OldTimestampNanosecond,
|
|
||||||
Utf8 as OldUtf8
|
|
||||||
} from 'apache-arrow-old'
|
|
||||||
import { type EmbeddingFunction } from '../embedding/embedding_function'
|
|
||||||
|
|
||||||
chaiUse(chaiAsPromised)
|
|
||||||
|
|
||||||
function sampleRecords (): Array<Record<string, any>> {
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
binary: Buffer.alloc(5),
|
|
||||||
boolean: false,
|
|
||||||
number: 7,
|
|
||||||
string: 'hello',
|
|
||||||
struct: { x: 0, y: 0 },
|
|
||||||
list: ['anime', 'action', 'comedy']
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper method to verify various ways to create a table
|
|
||||||
async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>): Promise<void> {
|
|
||||||
const records = sampleRecords()
|
|
||||||
const recordsReversed = [{
|
|
||||||
list: ['anime', 'action', 'comedy'],
|
|
||||||
struct: { x: 0, y: 0 },
|
|
||||||
string: 'hello',
|
|
||||||
number: 7,
|
|
||||||
boolean: false,
|
|
||||||
binary: Buffer.alloc(5)
|
|
||||||
}]
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('binary', new Binary(), false),
|
|
||||||
new Field('boolean', new Bool(), false),
|
|
||||||
new Field('number', new Float64(), false),
|
|
||||||
new Field('string', new Utf8(), false),
|
|
||||||
new Field('struct', new Struct([
|
|
||||||
new Field('x', new Float64(), false),
|
|
||||||
new Field('y', new Float64(), false)
|
|
||||||
])),
|
|
||||||
new Field('list', new List(new Field('item', new Utf8(), false)), false)
|
|
||||||
])
|
|
||||||
|
|
||||||
const table = await tableCreationMethod(records, recordsReversed, schema)
|
|
||||||
schema.fields.forEach((field, idx) => {
|
|
||||||
const actualField = table.schema.fields[idx]
|
|
||||||
assert.isFalse(actualField.nullable)
|
|
||||||
assert.equal(table.getChild(field.name)?.type.toString(), field.type.toString())
|
|
||||||
assert.equal(table.getChildAt(idx)?.type.toString(), field.type.toString())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
describe('The function makeArrowTable', function () {
|
|
||||||
it('will use data types from a provided schema instead of inference', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('a', new Int32()),
|
|
||||||
new Field('b', new Float32()),
|
|
||||||
new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
|
|
||||||
new Field('d', new Int64())
|
|
||||||
])
|
|
||||||
const table = makeArrowTable(
|
|
||||||
[
|
|
||||||
{ a: 1, b: 2, c: [1, 2, 3], d: 9 },
|
|
||||||
{ a: 4, b: 5, c: [4, 5, 6], d: 10 },
|
|
||||||
{ a: 7, b: 8, c: [7, 8, 9], d: null }
|
|
||||||
],
|
|
||||||
{ schema }
|
|
||||||
)
|
|
||||||
|
|
||||||
const buf = await fromTableToBuffer(table)
|
|
||||||
assert.isAbove(buf.byteLength, 0)
|
|
||||||
|
|
||||||
const actual = tableFromIPC(buf)
|
|
||||||
assert.equal(actual.numRows, 3)
|
|
||||||
const actualSchema = actual.schema
|
|
||||||
assert.deepEqual(actualSchema, schema)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('a', new Float64()),
|
|
||||||
new Field('b', new Float64()),
|
|
||||||
new Field(
|
|
||||||
'vector',
|
|
||||||
new FixedSizeList(3, new Field('item', new Float32(), true))
|
|
||||||
)
|
|
||||||
])
|
|
||||||
const table = makeArrowTable([
|
|
||||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
|
||||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
|
||||||
{ a: 7, b: 8, vector: [7, 8, 9] }
|
|
||||||
])
|
|
||||||
|
|
||||||
const buf = await fromTableToBuffer(table)
|
|
||||||
assert.isAbove(buf.byteLength, 0)
|
|
||||||
|
|
||||||
const actual = tableFromIPC(buf)
|
|
||||||
assert.equal(actual.numRows, 3)
|
|
||||||
const actualSchema = actual.schema
|
|
||||||
assert.deepEqual(actualSchema, schema)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('can support multiple vector columns', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('a', new Float64()),
|
|
||||||
new Field('b', new Float64()),
|
|
||||||
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true))),
|
|
||||||
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)))
|
|
||||||
])
|
|
||||||
const table = makeArrowTable(
|
|
||||||
[
|
|
||||||
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
|
||||||
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
|
||||||
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
|
||||||
],
|
|
||||||
{
|
|
||||||
vectorColumns: {
|
|
||||||
vec1: { type: new Float16() },
|
|
||||||
vec2: { type: new Float16() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
const buf = await fromTableToBuffer(table)
|
|
||||||
assert.isAbove(buf.byteLength, 0)
|
|
||||||
|
|
||||||
const actual = tableFromIPC(buf)
|
|
||||||
assert.equal(actual.numRows, 3)
|
|
||||||
const actualSchema = actual.schema
|
|
||||||
assert.deepEqual(actualSchema, schema)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will allow different vector column types', async function () {
|
|
||||||
const table = makeArrowTable(
|
|
||||||
[
|
|
||||||
{ fp16: [1], fp32: [1], fp64: [1] }
|
|
||||||
],
|
|
||||||
{
|
|
||||||
vectorColumns: {
|
|
||||||
fp16: { type: new Float16() },
|
|
||||||
fp32: { type: new Float32() },
|
|
||||||
fp64: { type: new Float64() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
assert.equal(table.getChild('fp16')?.type.children[0].type.toString(), new Float16().toString())
|
|
||||||
assert.equal(table.getChild('fp32')?.type.children[0].type.toString(), new Float32().toString())
|
|
||||||
assert.equal(table.getChild('fp64')?.type.children[0].type.toString(), new Float64().toString())
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will use dictionary encoded strings if asked', async function () {
|
|
||||||
const table = makeArrowTable([{ str: 'hello' }])
|
|
||||||
assert.isTrue(DataType.isUtf8(table.getChild('str')?.type))
|
|
||||||
|
|
||||||
const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
|
|
||||||
assert.isTrue(DataType.isDictionary(tableWithDict.getChild('str')?.type))
|
|
||||||
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('str', new Dictionary(new Utf8(), new Int32()))
|
|
||||||
])
|
|
||||||
|
|
||||||
const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
|
|
||||||
assert.isTrue(DataType.isDictionary(tableWithDict2.getChild('str')?.type))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will infer data types correctly', async function () {
|
|
||||||
await checkTableCreation(async (records) => makeArrowTable(records))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will allow a schema to be provided', async function () {
|
|
||||||
await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will use the field order of any provided schema', async function () {
|
|
||||||
await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will make an empty table', async function () {
|
|
||||||
await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
class DummyEmbedding implements EmbeddingFunction<string> {
|
|
||||||
public readonly sourceColumn = 'string'
|
|
||||||
public readonly embeddingDimension = 2
|
|
||||||
public readonly embeddingDataType = new Float16()
|
|
||||||
|
|
||||||
async embed (data: string[]): Promise<number[][]> {
|
|
||||||
return data.map(
|
|
||||||
() => [0.0, 0.0]
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
|
|
||||||
public readonly sourceColumn = 'string'
|
|
||||||
|
|
||||||
async embed (data: string[]): Promise<number[][]> {
|
|
||||||
return data.map(
|
|
||||||
() => [0.0, 0.0]
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
describe('convertToTable', function () {
|
|
||||||
it('will infer data types correctly', async function () {
|
|
||||||
await checkTableCreation(async (records) => await convertToTable(records))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will allow a schema to be provided', async function () {
|
|
||||||
await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will use the field order of any provided schema', async function () {
|
|
||||||
await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will make an empty table', async function () {
|
|
||||||
await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }))
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will apply embeddings', async function () {
|
|
||||||
const records = sampleRecords()
|
|
||||||
const table = await convertToTable(records, new DummyEmbedding())
|
|
||||||
assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
|
|
||||||
assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will fail if missing the embedding source column', async function () {
|
|
||||||
return await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).to.be.rejectedWith("'string' was not present")
|
|
||||||
})
|
|
||||||
|
|
||||||
it('use embeddingDimension if embedding missing from table', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('string', new Utf8(), false)
|
|
||||||
])
|
|
||||||
// Simulate getting an empty Arrow table (minus embedding) from some other source
|
|
||||||
// In other words, we aren't starting with records
|
|
||||||
const table = makeEmptyTable(schema)
|
|
||||||
|
|
||||||
// If the embedding specifies the dimension we are fine
|
|
||||||
await fromTableToBuffer(table, new DummyEmbedding())
|
|
||||||
|
|
||||||
// We can also supply a schema and should be ok
|
|
||||||
const schemaWithEmbedding = new Schema([
|
|
||||||
new Field('string', new Utf8(), false),
|
|
||||||
new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
|
|
||||||
])
|
|
||||||
await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
|
|
||||||
|
|
||||||
// Otherwise we will get an error
|
|
||||||
return await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).to.be.rejectedWith('does not specify `embeddingDimension`')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will apply embeddings to an empty table', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('string', new Utf8(), false),
|
|
||||||
new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
|
|
||||||
])
|
|
||||||
const table = await convertToTable([], new DummyEmbedding(), { schema })
|
|
||||||
assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
|
|
||||||
assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will complain if embeddings present but schema missing embedding column', async function () {
|
|
||||||
const schema = new Schema([
|
|
||||||
new Field('string', new Utf8(), false)
|
|
||||||
])
|
|
||||||
return await expect(convertToTable([], new DummyEmbedding(), { schema })).to.be.rejectedWith('column vector was missing')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('will provide a nice error if run twice', async function () {
|
|
||||||
const records = sampleRecords()
|
|
||||||
const table = await convertToTable(records, new DummyEmbedding())
|
|
||||||
// fromTableToBuffer will try and apply the embeddings again
|
|
||||||
return await expect(fromTableToBuffer(table, new DummyEmbedding())).to.be.rejectedWith('already existed')
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('makeEmptyTable', function () {
|
|
||||||
it('will make an empty table', async function () {
|
|
||||||
await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('when using two versions of arrow', function () {
|
|
||||||
it('can still import data', async function() {
|
|
||||||
const schema = new OldSchema([
|
|
||||||
new OldField('id', new OldInt32()),
|
|
||||||
new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))),
|
|
||||||
new OldField('struct', new OldStruct([
|
|
||||||
new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)),
|
|
||||||
new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")),
|
|
||||||
new OldField('ts_no_tz', new OldTimestampNanosecond(null))
|
|
||||||
]))
|
|
||||||
]) as any
|
|
||||||
// We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion
|
|
||||||
// In theory, this wouldn't matter. We don't rely on that property. However, it causes deepEqual to
|
|
||||||
// fail so we patch it back in
|
|
||||||
schema.metadataVersion = MetadataVersion.V5
|
|
||||||
const table = makeArrowTable(
|
|
||||||
[],
|
|
||||||
{ schema }
|
|
||||||
)
|
|
||||||
|
|
||||||
const buf = await fromTableToBuffer(table)
|
|
||||||
assert.isAbove(buf.byteLength, 0)
|
|
||||||
const actual = tableFromIPC(buf)
|
|
||||||
const actualSchema = actual.schema
|
|
||||||
assert.deepEqual(actualSchema, schema)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { describe } from 'mocha'
|
|
||||||
import { assert } from 'chai'
|
|
||||||
|
|
||||||
import { OpenAIEmbeddingFunction } from '../../embedding/openai'
|
|
||||||
import { isEmbeddingFunction } from '../../embedding/embedding_function'
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
||||||
const OpenAIApi = require('openai')
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
||||||
const { stub } = require('sinon')
|
|
||||||
|
|
||||||
describe('OpenAPIEmbeddings', function () {
|
|
||||||
const stubValue = {
|
|
||||||
data: [
|
|
||||||
{
|
|
||||||
embedding: Array(1536).fill(1.0)
|
|
||||||
},
|
|
||||||
{
|
|
||||||
embedding: Array(1536).fill(2.0)
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
describe('#embed', function () {
|
|
||||||
it('should create vector embeddings', async function () {
|
|
||||||
const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
|
|
||||||
const f = new OpenAIEmbeddingFunction('text', 'sk-key')
|
|
||||||
const vectors = await f.embed(['abc', 'def'])
|
|
||||||
assert.isTrue(openAIStub.calledOnce)
|
|
||||||
assert.equal(vectors.length, 2)
|
|
||||||
assert.deepEqual(vectors[0], stubValue.data[0].embedding)
|
|
||||||
assert.deepEqual(vectors[1], stubValue.data[1].embedding)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('isEmbeddingFunction', function () {
|
|
||||||
it('should match the isEmbeddingFunction guard', function () {
|
|
||||||
assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
// Copyright 2023 Lance Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
// IO tests
|
|
||||||
|
|
||||||
import { describe } from 'mocha'
|
|
||||||
import { assert } from 'chai'
|
|
||||||
|
|
||||||
import * as lancedb from '../index'
|
|
||||||
import { type ConnectionOptions } from '../index'
|
|
||||||
|
|
||||||
describe('LanceDB S3 client', function () {
|
|
||||||
if (process.env.TEST_S3_BASE_URL != null) {
|
|
||||||
const baseUri = process.env.TEST_S3_BASE_URL
|
|
||||||
it('should have a valid url', async function () {
|
|
||||||
const opts = { uri: `${baseUri}/valid_url` }
|
|
||||||
const table = await createTestDB(opts, 2, 20)
|
|
||||||
const con = await lancedb.connect(opts)
|
|
||||||
assert.equal(con.uri, opts.uri)
|
|
||||||
|
|
||||||
const results = await table.search([0.1, 0.3]).limit(5).execute()
|
|
||||||
assert.equal(results.length, 5)
|
|
||||||
}).timeout(10_000)
|
|
||||||
} else {
|
|
||||||
describe.skip('Skip S3 test', function () {})
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
|
|
||||||
const baseUri = process.env.TEST_S3_BASE_URL
|
|
||||||
it('use custom credentials', async function () {
|
|
||||||
const opts: ConnectionOptions = {
|
|
||||||
uri: `${baseUri}/custom_credentials`,
|
|
||||||
awsCredentials: {
|
|
||||||
accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
|
|
||||||
secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const table = await createTestDB(opts, 2, 20)
|
|
||||||
console.log(table)
|
|
||||||
const con = await lancedb.connect(opts)
|
|
||||||
console.log(con)
|
|
||||||
assert.equal(con.uri, opts.uri)
|
|
||||||
|
|
||||||
const results = await table.search([0.1, 0.3]).limit(5).execute()
|
|
||||||
assert.equal(results.length, 5)
|
|
||||||
}).timeout(10_000)
|
|
||||||
} else {
|
|
||||||
describe.skip('Skip S3 test', function () {})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
|
|
||||||
const con = await lancedb.connect(opts)
|
|
||||||
|
|
||||||
const data = []
|
|
||||||
for (let i = 0; i < numRows; i++) {
|
|
||||||
const vector = []
|
|
||||||
for (let j = 0; j < numDimensions; j++) {
|
|
||||||
vector.push(i + (j * 0.1))
|
|
||||||
}
|
|
||||||
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
|
|
||||||
}
|
|
||||||
|
|
||||||
return await con.createTable('vectors_2', data)
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,45 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
import { toSQL } from '../util'
|
|
||||||
import * as chai from 'chai'
|
|
||||||
|
|
||||||
const expect = chai.expect
|
|
||||||
|
|
||||||
describe('toSQL', function () {
|
|
||||||
it('should turn string to SQL expression', function () {
|
|
||||||
expect(toSQL('foo')).to.equal("'foo'")
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should turn number to SQL expression', function () {
|
|
||||||
expect(toSQL(123)).to.equal('123')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should turn boolean to SQL expression', function () {
|
|
||||||
expect(toSQL(true)).to.equal('TRUE')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should turn null to SQL expression', function () {
|
|
||||||
expect(toSQL(null)).to.equal('NULL')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should turn Date to SQL expression', function () {
|
|
||||||
const date = new Date('05 October 2011 14:48 UTC')
|
|
||||||
expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should turn array to SQL expression', function () {
|
|
||||||
expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
// Copyright 2023 LanceDB Developers.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
export type Literal = string | number | boolean | null | Date | Literal[]
|
|
||||||
|
|
||||||
export function toSQL (value: Literal): string {
|
|
||||||
if (typeof value === 'string') {
|
|
||||||
return `'${value}'`
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value === 'number') {
|
|
||||||
return value.toString()
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value === 'boolean') {
|
|
||||||
return value ? 'TRUE' : 'FALSE'
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === null) {
|
|
||||||
return 'NULL'
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value instanceof Date) {
|
|
||||||
return `'${value.toISOString()}'`
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(value)) {
|
|
||||||
return `[${value.map(toSQL).join(', ')}]`
|
|
||||||
}
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
|
||||||
throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
|
|
||||||
}
|
|
||||||
|
|
||||||
export class TTLCache {
|
|
||||||
private readonly cache: Map<string, { value: any, expires: number }>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param ttl Time to live in milliseconds
|
|
||||||
*/
|
|
||||||
constructor (private readonly ttl: number) {
|
|
||||||
this.cache = new Map()
|
|
||||||
}
|
|
||||||
|
|
||||||
get (key: string): any | undefined {
|
|
||||||
const entry = this.cache.get(key)
|
|
||||||
if (entry === undefined) {
|
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
if (entry.expires < Date.now()) {
|
|
||||||
this.cache.delete(key)
|
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
return entry.value
|
|
||||||
}
|
|
||||||
|
|
||||||
set (key: string, value: any): void {
|
|
||||||
this.cache.set(key, { value, expires: Date.now() + this.ttl })
|
|
||||||
}
|
|
||||||
|
|
||||||
delete (key: string): void {
|
|
||||||
this.cache.delete(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
"include": [
|
|
||||||
"src/**/*.ts",
|
|
||||||
"src/*.ts"
|
|
||||||
],
|
|
||||||
"compilerOptions": {
|
|
||||||
"target": "ES2020",
|
|
||||||
"module": "commonjs",
|
|
||||||
"declaration": true,
|
|
||||||
"outDir": "./dist",
|
|
||||||
"strict": true,
|
|
||||||
"sourceMap": true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "lancedb-node"
|
|
||||||
version = "0.21.2"
|
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
|
||||||
license.workspace = true
|
|
||||||
edition.workspace = true
|
|
||||||
repository.workspace = true
|
|
||||||
keywords.workspace = true
|
|
||||||
categories.workspace = true
|
|
||||||
exclude = ["index.node"]
|
|
||||||
rust-version = "1.75"
|
|
||||||
|
|
||||||
[lib]
|
|
||||||
crate-type = ["cdylib"]
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
arrow-array = { workspace = true }
|
|
||||||
arrow-ipc = { workspace = true }
|
|
||||||
arrow-schema = { workspace = true }
|
|
||||||
chrono = { workspace = true }
|
|
||||||
conv = "0.3.3"
|
|
||||||
once_cell = "1"
|
|
||||||
futures = "0.3"
|
|
||||||
half = { workspace = true }
|
|
||||||
lance = { workspace = true }
|
|
||||||
lance-index = { workspace = true }
|
|
||||||
lance-linalg = { workspace = true }
|
|
||||||
lancedb = { path = "../../lancedb" }
|
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
|
||||||
neon = { version = "0.10.1", default-features = false, features = [
|
|
||||||
"channel-api",
|
|
||||||
"napi-6",
|
|
||||||
"promise-api",
|
|
||||||
"task-api",
|
|
||||||
] }
|
|
||||||
object_store = { workspace = true, features = ["aws"] }
|
|
||||||
snafu = { workspace = true }
|
|
||||||
async-trait = "0"
|
|
||||||
env_logger = "0"
|
|
||||||
|
|
||||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
|
||||||
lzma-sys = { version = "*", features = ["static"] }
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
The LanceDB node bridge (lancedb-node) allows javascript applications to access LanceDB datasets.
|
|
||||||
|
|
||||||
It is build using [Neon](https://neon-bindings.com). See the node project for an example of how it is used / tests
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use std::io::Cursor;
|
|
||||||
use std::ops::Deref;
|
|
||||||
|
|
||||||
use arrow_array::RecordBatch;
|
|
||||||
use arrow_ipc::reader::FileReader;
|
|
||||||
use arrow_ipc::writer::FileWriter;
|
|
||||||
use arrow_schema::SchemaRef;
|
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
|
|
||||||
pub fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<(Vec<RecordBatch>, SchemaRef)> {
|
|
||||||
let mut batches: Vec<RecordBatch> = Vec::new();
|
|
||||||
let file_reader = FileReader::try_new(Cursor::new(slice), None)?;
|
|
||||||
let schema = file_reader.schema();
|
|
||||||
for b in file_reader {
|
|
||||||
let record_batch = b?;
|
|
||||||
batches.push(record_batch);
|
|
||||||
}
|
|
||||||
Ok((batches, schema))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8>> {
|
|
||||||
if batches.is_empty() {
|
|
||||||
return Ok(Vec::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema = batches.first().unwrap().schema();
|
|
||||||
let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
|
|
||||||
for batch in batches.iter() {
|
|
||||||
fr.write(batch)?
|
|
||||||
}
|
|
||||||
fr.finish()?;
|
|
||||||
Ok(fr.into_inner()?)
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use neon::prelude::*;
|
|
||||||
use neon::types::buffer::TypedArray;
|
|
||||||
|
|
||||||
use crate::error::ResultExt;
|
|
||||||
|
|
||||||
pub fn vec_str_to_array<'a, C: Context<'a>>(vec: &[String], cx: &mut C) -> JsResult<'a, JsArray> {
|
|
||||||
let a = JsArray::new(cx, vec.len() as u32);
|
|
||||||
for (i, s) in vec.iter().enumerate() {
|
|
||||||
let v = cx.string(s);
|
|
||||||
a.set(cx, i as u32, v)?;
|
|
||||||
}
|
|
||||||
Ok(a)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn js_array_to_vec(array: &JsArray, cx: &mut FunctionContext) -> Vec<f32> {
|
|
||||||
let mut query_vec: Vec<f32> = Vec::new();
|
|
||||||
for i in 0..array.len(cx) {
|
|
||||||
let entry: Handle<JsNumber> = array.get(cx, i).unwrap();
|
|
||||||
query_vec.push(entry.value(cx) as f32);
|
|
||||||
}
|
|
||||||
query_vec
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a new JsBuffer from a rust buffer with a special logic for electron
|
|
||||||
pub fn new_js_buffer<'a>(
|
|
||||||
buffer: Vec<u8>,
|
|
||||||
cx: &mut TaskContext<'a>,
|
|
||||||
is_electron: bool,
|
|
||||||
) -> NeonResult<Handle<'a, JsBuffer>> {
|
|
||||||
if is_electron {
|
|
||||||
// Electron does not support `external`: https://github.com/neon-bindings/neon/pull/937
|
|
||||||
let mut js_buffer = JsBuffer::new(cx, buffer.len()).or_throw(cx)?;
|
|
||||||
let buffer_data = js_buffer.as_mut_slice(cx);
|
|
||||||
buffer_data.copy_from_slice(buffer.as_slice());
|
|
||||||
Ok(js_buffer)
|
|
||||||
} else {
|
|
||||||
Ok(JsBuffer::external(cx, buffer))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use arrow_schema::ArrowError;
|
|
||||||
use neon::context::Context;
|
|
||||||
use neon::prelude::NeonResult;
|
|
||||||
use snafu::Snafu;
|
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
|
||||||
pub enum Error {
|
|
||||||
#[allow(dead_code)]
|
|
||||||
#[snafu(display("column '{name}' is missing"))]
|
|
||||||
MissingColumn { name: String },
|
|
||||||
#[snafu(display("{name}: {message}"))]
|
|
||||||
OutOfRange { name: String, message: String },
|
|
||||||
#[allow(dead_code)]
|
|
||||||
#[snafu(display("{index_type} is not a valid index type"))]
|
|
||||||
InvalidIndexType { index_type: String },
|
|
||||||
|
|
||||||
#[snafu(display("{message}"))]
|
|
||||||
LanceDB { message: String },
|
|
||||||
#[snafu(display("{message}"))]
|
|
||||||
Neon { message: String },
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
|
||||||
|
|
||||||
impl From<lancedb::error::Error> for Error {
|
|
||||||
fn from(e: lancedb::error::Error) -> Self {
|
|
||||||
Self::LanceDB {
|
|
||||||
message: e.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<lance::Error> for Error {
|
|
||||||
fn from(e: lance::Error) -> Self {
|
|
||||||
Self::LanceDB {
|
|
||||||
message: e.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<ArrowError> for Error {
|
|
||||||
fn from(value: ArrowError) -> Self {
|
|
||||||
Self::LanceDB {
|
|
||||||
message: value.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<neon::result::Throw> for Error {
|
|
||||||
fn from(value: neon::result::Throw) -> Self {
|
|
||||||
Self::Neon {
|
|
||||||
message: value.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> From<std::sync::mpsc::SendError<T>> for Error {
|
|
||||||
fn from(value: std::sync::mpsc::SendError<T>) -> Self {
|
|
||||||
Self::Neon {
|
|
||||||
message: value.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
|
|
||||||
/// so it can be returned as a JavaScript error
|
|
||||||
/// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
|
|
||||||
pub trait ResultExt<T> {
|
|
||||||
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Implement ResultExt for the std Result so it can be used any Result type
|
|
||||||
impl<T, E> ResultExt<T> for std::result::Result<T, E>
|
|
||||||
where
|
|
||||||
E: std::fmt::Display,
|
|
||||||
{
|
|
||||||
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T> {
|
|
||||||
match self {
|
|
||||||
Ok(value) => Ok(value),
|
|
||||||
Err(error) => cx.throw_error(error.to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
pub mod scalar;
|
|
||||||
pub mod vector;
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use lancedb::index::{scalar::BTreeIndexBuilder, Index};
|
|
||||||
use neon::{
|
|
||||||
context::{Context, FunctionContext},
|
|
||||||
result::JsResult,
|
|
||||||
types::{JsBoolean, JsBox, JsPromise, JsString},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{error::ResultExt, runtime, table::JsTable};
|
|
||||||
|
|
||||||
pub fn table_create_scalar_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
|
||||||
let column = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let replace = cx.argument::<JsBoolean>(1)?.value(&mut cx);
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let idx_result = table
|
|
||||||
.create_index(&[column], Index::BTree(BTreeIndexBuilder::default()))
|
|
||||||
.replace(replace)
|
|
||||||
.execute()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
idx_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.undefined())
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
|
||||||
use lancedb::index::Index;
|
|
||||||
use lancedb::DistanceType;
|
|
||||||
use neon::context::FunctionContext;
|
|
||||||
use neon::prelude::*;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
|
|
||||||
use crate::error::ResultExt;
|
|
||||||
use crate::neon_ext::js_object_ext::JsObjectExt;
|
|
||||||
use crate::runtime;
|
|
||||||
use crate::table::JsTable;
|
|
||||||
|
|
||||||
pub fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
|
||||||
let index_params = cx.argument::<JsObject>(0)?;
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let column_name = index_params
|
|
||||||
.get_opt::<JsString, _, _>(&mut cx, "column")?
|
|
||||||
.map(|s| s.value(&mut cx))
|
|
||||||
.unwrap_or("vector".to_string()); // Backward compatibility
|
|
||||||
|
|
||||||
let replace = index_params
|
|
||||||
.get_opt::<JsBoolean, _, _>(&mut cx, "replace")?
|
|
||||||
.map(|r| r.value(&mut cx));
|
|
||||||
|
|
||||||
let tbl = table.clone();
|
|
||||||
let ivf_pq_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
let mut index_builder = tbl.create_index(&[column_name], Index::IvfPq(ivf_pq_builder));
|
|
||||||
if let Some(replace) = replace {
|
|
||||||
index_builder = index_builder.replace(replace);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let idx_result = index_builder.execute().await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
idx_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(JsTable::from(table)))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_index_params_builder(
|
|
||||||
cx: &mut FunctionContext,
|
|
||||||
obj: Handle<JsObject>,
|
|
||||||
) -> crate::error::Result<IvfPqIndexBuilder> {
|
|
||||||
if obj.get_opt::<JsString, _, _>(cx, "index_name")?.is_some() {
|
|
||||||
return Err(crate::error::Error::LanceDB {
|
|
||||||
message: "Setting the index_name is no longer supported".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let mut builder = IvfPqIndexBuilder::default();
|
|
||||||
if let Some(metric_type) = obj.get_opt::<JsString, _, _>(cx, "metric_type")? {
|
|
||||||
let distance_type = DistanceType::try_from(metric_type.value(cx).as_str())?;
|
|
||||||
builder = builder.distance_type(distance_type);
|
|
||||||
}
|
|
||||||
if let Some(np) = obj.get_opt_u32(cx, "num_partitions")? {
|
|
||||||
builder = builder.num_partitions(np);
|
|
||||||
}
|
|
||||||
if let Some(ns) = obj.get_opt_u32(cx, "num_sub_vectors")? {
|
|
||||||
builder = builder.num_sub_vectors(ns);
|
|
||||||
}
|
|
||||||
if let Some(max_iters) = obj.get_opt_u32(cx, "max_iters")? {
|
|
||||||
builder = builder.max_iterations(max_iters);
|
|
||||||
}
|
|
||||||
Ok(builder)
|
|
||||||
}
|
|
||||||
@@ -1,174 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use neon::prelude::*;
|
|
||||||
use once_cell::sync::OnceCell;
|
|
||||||
use tokio::runtime::Runtime;
|
|
||||||
|
|
||||||
use lancedb::connect;
|
|
||||||
use lancedb::connection::Connection;
|
|
||||||
|
|
||||||
use crate::error::ResultExt;
|
|
||||||
use crate::query::JsQuery;
|
|
||||||
use crate::table::JsTable;
|
|
||||||
|
|
||||||
mod arrow;
|
|
||||||
mod convert;
|
|
||||||
mod error;
|
|
||||||
mod index;
|
|
||||||
mod neon_ext;
|
|
||||||
mod query;
|
|
||||||
mod table;
|
|
||||||
|
|
||||||
struct JsDatabase {
|
|
||||||
database: Connection,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Finalize for JsDatabase {}
|
|
||||||
|
|
||||||
fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
|
|
||||||
static RUNTIME: OnceCell<Runtime> = OnceCell::new();
|
|
||||||
static LOG: OnceCell<()> = OnceCell::new();
|
|
||||||
|
|
||||||
LOG.get_or_init(env_logger::init);
|
|
||||||
|
|
||||||
RUNTIME.get_or_try_init(|| Runtime::new().or_throw(cx))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let path = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let read_consistency_interval = cx
|
|
||||||
.argument_opt(2)
|
|
||||||
.and_then(|arg| arg.downcast::<JsNumber, _>(&mut cx).ok())
|
|
||||||
.map(|v| v.value(&mut cx))
|
|
||||||
.map(std::time::Duration::from_secs_f64);
|
|
||||||
|
|
||||||
let storage_options_js = cx.argument::<JsArray>(1)?.to_vec(&mut cx)?;
|
|
||||||
let mut storage_options: Vec<(String, String)> = Vec::with_capacity(storage_options_js.len());
|
|
||||||
for handle in storage_options_js {
|
|
||||||
let obj = handle.downcast::<JsArray, _>(&mut cx).unwrap();
|
|
||||||
let key = obj.get::<JsString, _, _>(&mut cx, 0)?.value(&mut cx);
|
|
||||||
let value = obj.get::<JsString, _, _>(&mut cx, 1)?.value(&mut cx);
|
|
||||||
|
|
||||||
storage_options.push((key, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let channel = cx.channel();
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
|
|
||||||
let mut conn_builder = connect(&path).storage_options(storage_options);
|
|
||||||
|
|
||||||
if let Some(interval) = read_consistency_interval {
|
|
||||||
conn_builder = conn_builder.read_consistency_interval(interval);
|
|
||||||
}
|
|
||||||
rt.spawn(async move {
|
|
||||||
let database = conn_builder.execute().await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let db = JsDatabase {
|
|
||||||
database: database.or_throw(&mut cx)?,
|
|
||||||
};
|
|
||||||
Ok(cx.boxed(db))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let db = cx
|
|
||||||
.this()
|
|
||||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let database = db.database.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let tables_rst = database.table_names().execute().await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let tables = tables_rst.or_throw(&mut cx)?;
|
|
||||||
let table_names = convert::vec_str_to_array(&tables, &mut cx);
|
|
||||||
table_names
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let db = cx
|
|
||||||
.this()
|
|
||||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
|
||||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let channel = cx.channel();
|
|
||||||
let database = db.database.clone();
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
rt.spawn(async move {
|
|
||||||
let table_rst = database.open_table(&table_name).execute().await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let js_table = JsTable::from(table_rst.or_throw(&mut cx)?);
|
|
||||||
Ok(cx.boxed(js_table))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let db = cx
|
|
||||||
.this()
|
|
||||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
|
||||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let channel = cx.channel();
|
|
||||||
let database = db.database.clone();
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
rt.spawn(async move {
|
|
||||||
let result = database.drop_table(&table_name).await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.null())
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[neon::main]
|
|
||||||
fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
|
||||||
cx.export_function("databaseNew", database_new)?;
|
|
||||||
cx.export_function("databaseTableNames", database_table_names)?;
|
|
||||||
cx.export_function("databaseOpenTable", database_open_table)?;
|
|
||||||
cx.export_function("databaseDropTable", database_drop_table)?;
|
|
||||||
cx.export_function("tableSearch", JsQuery::js_search)?;
|
|
||||||
cx.export_function("tableCreate", JsTable::js_create)?;
|
|
||||||
cx.export_function("tableAdd", JsTable::js_add)?;
|
|
||||||
cx.export_function("tableCountRows", JsTable::js_count_rows)?;
|
|
||||||
cx.export_function("tableDelete", JsTable::js_delete)?;
|
|
||||||
cx.export_function("tableUpdate", JsTable::js_update)?;
|
|
||||||
cx.export_function("tableMergeInsert", JsTable::js_merge_insert)?;
|
|
||||||
cx.export_function("tableCleanupOldVersions", JsTable::js_cleanup)?;
|
|
||||||
cx.export_function("tableCompactFiles", JsTable::js_compact)?;
|
|
||||||
cx.export_function("tableListIndices", JsTable::js_list_indices)?;
|
|
||||||
cx.export_function("tableIndexStats", JsTable::js_index_stats)?;
|
|
||||||
cx.export_function(
|
|
||||||
"tableCreateScalarIndex",
|
|
||||||
index::scalar::table_create_scalar_index,
|
|
||||||
)?;
|
|
||||||
cx.export_function(
|
|
||||||
"tableCreateVectorIndex",
|
|
||||||
index::vector::table_create_vector_index,
|
|
||||||
)?;
|
|
||||||
cx.export_function("tableSchema", JsTable::js_schema)?;
|
|
||||||
cx.export_function("tableAddColumns", JsTable::js_add_columns)?;
|
|
||||||
cx.export_function("tableAlterColumns", JsTable::js_alter_columns)?;
|
|
||||||
cx.export_function("tableDropColumns", JsTable::js_drop_columns)?;
|
|
||||||
cx.export_function("tableDropIndex", JsTable::js_drop_index)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
pub mod js_object_ext;
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
|
||||||
use neon::prelude::*;
|
|
||||||
|
|
||||||
// extends neon's [JsObject] with helper functions to extract properties
|
|
||||||
pub trait JsObjectExt {
|
|
||||||
fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
|
|
||||||
fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl JsObjectExt for JsObject {
|
|
||||||
fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
|
|
||||||
let val_opt = self
|
|
||||||
.get_opt::<JsNumber, _, _>(cx, key)?
|
|
||||||
.map(|s| f64_to_u32_safe(s.value(cx), key));
|
|
||||||
val_opt.transpose()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
|
|
||||||
let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
|
|
||||||
f64_to_usize_safe(val, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
|
|
||||||
let val_opt = self
|
|
||||||
.get_opt::<JsNumber, _, _>(cx, key)?
|
|
||||||
.map(|s| f64_to_usize_safe(s.value(cx), key));
|
|
||||||
val_opt.transpose()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
|
|
||||||
use conv::*;
|
|
||||||
|
|
||||||
n.approx_as::<u32>().map_err(|e| match e {
|
|
||||||
FloatError::NegOverflow(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: "must be > 0".to_string(),
|
|
||||||
},
|
|
||||||
FloatError::PosOverflow(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: format!("must be < {}", u32::MAX),
|
|
||||||
},
|
|
||||||
FloatError::NotANumber(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: "not a valid number".to_string(),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
|
|
||||||
use conv::*;
|
|
||||||
|
|
||||||
n.approx_as::<usize>().map_err(|e| match e {
|
|
||||||
FloatError::NegOverflow(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: "must be > 0".to_string(),
|
|
||||||
},
|
|
||||||
FloatError::PosOverflow(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: format!("must be < {}", usize::MAX),
|
|
||||||
},
|
|
||||||
FloatError::NotANumber(_) => Error::OutOfRange {
|
|
||||||
name: key.into(),
|
|
||||||
message: "not a valid number".to_string(),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -1,138 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::ops::Deref;
|
|
||||||
|
|
||||||
use futures::{TryFutureExt, TryStreamExt};
|
|
||||||
use lancedb::query::{ExecutableQuery, QueryBase, Select};
|
|
||||||
use lancedb::DistanceType;
|
|
||||||
use neon::context::FunctionContext;
|
|
||||||
use neon::handle::Handle;
|
|
||||||
use neon::prelude::*;
|
|
||||||
|
|
||||||
use crate::arrow::record_batch_to_buffer;
|
|
||||||
use crate::error::ResultExt;
|
|
||||||
use crate::neon_ext::js_object_ext::JsObjectExt;
|
|
||||||
use crate::table::JsTable;
|
|
||||||
use crate::{convert, runtime};
|
|
||||||
|
|
||||||
pub struct JsQuery {}
|
|
||||||
|
|
||||||
impl JsQuery {
|
|
||||||
pub(crate) fn js_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
|
||||||
let query_obj = cx.argument::<JsObject>(0)?;
|
|
||||||
|
|
||||||
let limit = query_obj
|
|
||||||
.get_opt::<JsNumber, _, _>(&mut cx, "_limit")?
|
|
||||||
.map(|value| {
|
|
||||||
let limit = value.value(&mut cx);
|
|
||||||
if limit <= 0.0 {
|
|
||||||
panic!("Limit must be a positive integer");
|
|
||||||
}
|
|
||||||
limit as u64
|
|
||||||
});
|
|
||||||
let select = query_obj
|
|
||||||
.get_opt::<JsArray, _, _>(&mut cx, "_select")?
|
|
||||||
.map(|arr| {
|
|
||||||
let js_array = arr.deref();
|
|
||||||
let mut projection_vec: Vec<String> = Vec::new();
|
|
||||||
for i in 0..js_array.len(&mut cx) {
|
|
||||||
let entry: Handle<JsString> = js_array.get(&mut cx, i).unwrap();
|
|
||||||
projection_vec.push(entry.value(&mut cx));
|
|
||||||
}
|
|
||||||
projection_vec
|
|
||||||
});
|
|
||||||
|
|
||||||
let prefilter = query_obj
|
|
||||||
.get::<JsBoolean, _, _>(&mut cx, "_prefilter")?
|
|
||||||
.value(&mut cx);
|
|
||||||
|
|
||||||
let fast_search = query_obj
|
|
||||||
.get_opt::<JsBoolean, _, _>(&mut cx, "_fastSearch")?
|
|
||||||
.map(|val| val.value(&mut cx));
|
|
||||||
|
|
||||||
let is_electron = cx
|
|
||||||
.argument::<JsBoolean>(1)
|
|
||||||
.or_throw(&mut cx)?
|
|
||||||
.value(&mut cx);
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let mut builder = table.query();
|
|
||||||
if let Some(filter) = query_obj
|
|
||||||
.get_opt::<JsString, _, _>(&mut cx, "_filter")?
|
|
||||||
.map(|s| s.value(&mut cx))
|
|
||||||
{
|
|
||||||
builder = builder.only_if(filter);
|
|
||||||
}
|
|
||||||
if let Some(select) = select {
|
|
||||||
builder = builder.select(Select::columns(select.as_slice()));
|
|
||||||
}
|
|
||||||
if let Some(limit) = limit {
|
|
||||||
builder = builder.limit(limit as usize);
|
|
||||||
};
|
|
||||||
if let Some(true) = fast_search {
|
|
||||||
builder = builder.fast_search();
|
|
||||||
}
|
|
||||||
|
|
||||||
let query_vector = query_obj.get_opt::<JsArray, _, _>(&mut cx, "_queryVector")?;
|
|
||||||
if let Some(query) = query_vector.map(|q| convert::js_array_to_vec(q.deref(), &mut cx)) {
|
|
||||||
let mut vector_builder = builder.nearest_to(query).unwrap();
|
|
||||||
if let Some(distance_type) = query_obj
|
|
||||||
.get_opt::<JsString, _, _>(&mut cx, "_metricType")?
|
|
||||||
.map(|s| s.value(&mut cx))
|
|
||||||
.map(|s| DistanceType::try_from(s.as_str()).unwrap())
|
|
||||||
{
|
|
||||||
vector_builder = vector_builder.distance_type(distance_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
|
|
||||||
vector_builder = vector_builder.nprobes(nprobes);
|
|
||||||
|
|
||||||
if !prefilter {
|
|
||||||
vector_builder = vector_builder.postfilter();
|
|
||||||
}
|
|
||||||
rt.spawn(async move {
|
|
||||||
let results = vector_builder
|
|
||||||
.execute()
|
|
||||||
.and_then(|stream| {
|
|
||||||
stream
|
|
||||||
.try_collect::<Vec<_>>()
|
|
||||||
.map_err(lancedb::error::Error::from)
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let results = results.or_throw(&mut cx)?;
|
|
||||||
let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
|
|
||||||
convert::new_js_buffer(buffer, &mut cx, is_electron)
|
|
||||||
});
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
rt.spawn(async move {
|
|
||||||
let results = builder
|
|
||||||
.execute()
|
|
||||||
.and_then(|stream| {
|
|
||||||
stream
|
|
||||||
.try_collect::<Vec<_>>()
|
|
||||||
.map_err(lancedb::error::Error::from)
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let results = results.or_throw(&mut cx)?;
|
|
||||||
let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
|
|
||||||
convert::new_js_buffer(buffer, &mut cx, is_electron)
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,645 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
use std::ops::Deref;
|
|
||||||
|
|
||||||
use arrow_array::{RecordBatch, RecordBatchIterator};
|
|
||||||
use lance::dataset::optimize::CompactionOptions;
|
|
||||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, WriteMode, WriteParams};
|
|
||||||
use lancedb::table::{OptimizeAction, WriteOptions};
|
|
||||||
|
|
||||||
use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
|
|
||||||
use lancedb::table::Table as LanceDbTable;
|
|
||||||
use neon::prelude::*;
|
|
||||||
use neon::types::buffer::TypedArray;
|
|
||||||
|
|
||||||
use crate::error::ResultExt;
|
|
||||||
use crate::{convert, runtime, JsDatabase};
|
|
||||||
|
|
||||||
pub struct JsTable {
|
|
||||||
pub table: LanceDbTable,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Finalize for JsTable {}
|
|
||||||
|
|
||||||
impl From<LanceDbTable> for JsTable {
|
|
||||||
fn from(table: LanceDbTable) -> Self {
|
|
||||||
Self { table }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl JsTable {
|
|
||||||
pub(crate) fn js_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let db = cx
|
|
||||||
.this()
|
|
||||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
|
||||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let buffer = cx.argument::<JsBuffer>(1)?;
|
|
||||||
let (batches, schema) =
|
|
||||||
arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
// Write mode
|
|
||||||
let mode = match cx.argument::<JsString>(2)?.value(&mut cx).as_str() {
|
|
||||||
"overwrite" => WriteMode::Overwrite,
|
|
||||||
"append" => WriteMode::Append,
|
|
||||||
"create" => WriteMode::Create,
|
|
||||||
_ => {
|
|
||||||
return cx.throw_error("Table::create only supports 'overwrite' and 'create' modes")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let params = WriteParams {
|
|
||||||
mode,
|
|
||||||
..WriteParams::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let channel = cx.channel();
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let database = db.database.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
|
|
||||||
let table_rst = database
|
|
||||||
.create_table(&table_name, batch_reader)
|
|
||||||
.write_options(WriteOptions {
|
|
||||||
lance_write_params: Some(params),
|
|
||||||
})
|
|
||||||
.execute()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let table = table_rst.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(Self::from(table)))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let buffer = cx.argument::<JsBuffer>(0)?;
|
|
||||||
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
|
||||||
let (batches, schema) =
|
|
||||||
arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let write_mode = match write_mode.as_str() {
|
|
||||||
"create" => WriteMode::Create,
|
|
||||||
"append" => WriteMode::Append,
|
|
||||||
"overwrite" => WriteMode::Overwrite,
|
|
||||||
s => return cx.throw_error(format!("invalid write mode {}", s)),
|
|
||||||
};
|
|
||||||
|
|
||||||
let params = WriteParams {
|
|
||||||
mode: write_mode,
|
|
||||||
..WriteParams::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
|
|
||||||
let add_result = table
|
|
||||||
.add(batch_reader)
|
|
||||||
.write_options(WriteOptions {
|
|
||||||
lance_write_params: Some(params),
|
|
||||||
})
|
|
||||||
.execute()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
add_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(Self::from(table)))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let filter = cx
|
|
||||||
.argument_opt(0)
|
|
||||||
.and_then(|filt| {
|
|
||||||
if filt.is_a::<JsUndefined, _>(&mut cx) || filt.is_a::<JsNull, _>(&mut cx) {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(
|
|
||||||
filt.downcast_or_throw::<JsString, _>(&mut cx)
|
|
||||||
.map(|js_filt| js_filt.deref().value(&mut cx)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.transpose()?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let num_rows_result = table.count_rows(filter).await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let num_rows = num_rows_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.number(num_rows as f64))
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let delete_result = table.delete(&predicate).await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
delete_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(Self::from(table)))
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_merge_insert(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let key = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let mut builder = table.merge_insert(&[&key]);
|
|
||||||
if cx.argument::<JsBoolean>(1)?.value(&mut cx) {
|
|
||||||
let filter = cx.argument_opt(2).unwrap();
|
|
||||||
if filter.is_a::<JsNull, _>(&mut cx) {
|
|
||||||
builder.when_matched_update_all(None);
|
|
||||||
} else {
|
|
||||||
let filter = filter
|
|
||||||
.downcast_or_throw::<JsString, _>(&mut cx)?
|
|
||||||
.deref()
|
|
||||||
.value(&mut cx);
|
|
||||||
builder.when_matched_update_all(Some(filter));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if cx.argument::<JsBoolean>(3)?.value(&mut cx) {
|
|
||||||
builder.when_not_matched_insert_all();
|
|
||||||
}
|
|
||||||
if cx.argument::<JsBoolean>(4)?.value(&mut cx) {
|
|
||||||
let filter = cx.argument_opt(5).unwrap();
|
|
||||||
if filter.is_a::<JsNull, _>(&mut cx) {
|
|
||||||
builder.when_not_matched_by_source_delete(None);
|
|
||||||
} else {
|
|
||||||
let filter = filter
|
|
||||||
.downcast_or_throw::<JsString, _>(&mut cx)?
|
|
||||||
.deref()
|
|
||||||
.value(&mut cx);
|
|
||||||
builder.when_not_matched_by_source_delete(Some(filter));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let buffer = cx.argument::<JsBuffer>(6)?;
|
|
||||||
let (batches, schema) =
|
|
||||||
arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let new_data = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
|
|
||||||
let merge_insert_result = builder.execute(Box::new(new_data)).await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
merge_insert_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(Self::from(table)))
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_update(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
|
|
||||||
// create a vector of updates from the passed map
|
|
||||||
let updates_arg = cx.argument::<JsObject>(1)?;
|
|
||||||
let properties = updates_arg.get_own_property_names(&mut cx)?;
|
|
||||||
let mut updates: Vec<(String, String)> =
|
|
||||||
Vec::with_capacity(properties.len(&mut cx) as usize);
|
|
||||||
|
|
||||||
let len_properties = properties.len(&mut cx);
|
|
||||||
for i in 0..len_properties {
|
|
||||||
let property = properties
|
|
||||||
.get_value(&mut cx, i)?
|
|
||||||
.downcast_or_throw::<JsString, _>(&mut cx)?;
|
|
||||||
|
|
||||||
let value = updates_arg
|
|
||||||
.get_value(&mut cx, property)?
|
|
||||||
.downcast_or_throw::<JsString, _>(&mut cx)?;
|
|
||||||
|
|
||||||
let property = property.value(&mut cx);
|
|
||||||
let value = value.value(&mut cx);
|
|
||||||
updates.push((property, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the filter/predicate if the user passed one
|
|
||||||
let predicate = cx.argument_opt(0);
|
|
||||||
let predicate = predicate.unwrap().downcast::<JsString, _>(&mut cx);
|
|
||||||
let predicate = match predicate {
|
|
||||||
Ok(_) => {
|
|
||||||
let val = predicate.map(|s| s.value(&mut cx)).unwrap();
|
|
||||||
Some(val)
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
// if the predicate is not string, check it's null otherwise an invalid
|
|
||||||
// type was passed
|
|
||||||
cx.argument::<JsNull>(0)?;
|
|
||||||
None
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let updates_arg = updates
|
|
||||||
.iter()
|
|
||||||
.map(|(k, v)| (k.as_str(), v.as_str()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let predicate = predicate.as_deref();
|
|
||||||
|
|
||||||
let mut update_op = table.update();
|
|
||||||
if let Some(predicate) = predicate {
|
|
||||||
update_op = update_op.only_if(predicate);
|
|
||||||
}
|
|
||||||
for (column, value) in updates_arg {
|
|
||||||
update_op = update_op.column(column, value);
|
|
||||||
}
|
|
||||||
let update_result = update_op.execute().await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
update_result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.boxed(Self::from(table)))
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_cleanup(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
let channel = cx.channel();
|
|
||||||
|
|
||||||
let older_than: i64 = cx
|
|
||||||
.argument_opt(0)
|
|
||||||
.and_then(|val| val.downcast::<JsNumber, _>(&mut cx).ok())
|
|
||||||
.map(|val| val.value(&mut cx) as i64)
|
|
||||||
.unwrap_or_else(|| 2 * 7 * 24 * 60); // 2 weeks
|
|
||||||
let older_than = chrono::Duration::try_minutes(older_than).unwrap();
|
|
||||||
let delete_unverified: Option<bool> = Some(
|
|
||||||
cx.argument_opt(1)
|
|
||||||
.and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok())
|
|
||||||
.map(|val| val.value(&mut cx))
|
|
||||||
.unwrap_or_default(),
|
|
||||||
);
|
|
||||||
let error_if_tagged_old_versions: Option<bool> = Some(
|
|
||||||
cx.argument_opt(2)
|
|
||||||
.and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok())
|
|
||||||
.map(|val| val.value(&mut cx))
|
|
||||||
.unwrap_or_default(),
|
|
||||||
);
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let stats = table
|
|
||||||
.optimize(OptimizeAction::Prune {
|
|
||||||
older_than: Some(older_than),
|
|
||||||
delete_unverified,
|
|
||||||
error_if_tagged_old_versions,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let stats = stats.or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
let prune_stats = stats.prune.as_ref().expect("Prune stats missing");
|
|
||||||
let output_metrics = JsObject::new(&mut cx);
|
|
||||||
let bytes_removed = cx.number(prune_stats.bytes_removed as f64);
|
|
||||||
output_metrics.set(&mut cx, "bytesRemoved", bytes_removed)?;
|
|
||||||
|
|
||||||
let old_versions = cx.number(prune_stats.old_versions as f64);
|
|
||||||
output_metrics.set(&mut cx, "oldVersions", old_versions)?;
|
|
||||||
|
|
||||||
let output_table = cx.boxed(Self::from(table));
|
|
||||||
|
|
||||||
let output = JsObject::new(&mut cx);
|
|
||||||
output.set(&mut cx, "metrics", output_metrics)?;
|
|
||||||
output.set(&mut cx, "newTable", output_table)?;
|
|
||||||
|
|
||||||
Ok(output)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_compact(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
let channel = cx.channel();
|
|
||||||
|
|
||||||
let js_options = cx.argument::<JsObject>(0)?;
|
|
||||||
let mut options = CompactionOptions::default();
|
|
||||||
|
|
||||||
if let Some(target_rows) =
|
|
||||||
js_options.get_opt::<JsNumber, _, _>(&mut cx, "targetRowsPerFragment")?
|
|
||||||
{
|
|
||||||
options.target_rows_per_fragment = target_rows.value(&mut cx) as usize;
|
|
||||||
}
|
|
||||||
if let Some(max_per_group) =
|
|
||||||
js_options.get_opt::<JsNumber, _, _>(&mut cx, "maxRowsPerGroup")?
|
|
||||||
{
|
|
||||||
options.max_rows_per_group = max_per_group.value(&mut cx) as usize;
|
|
||||||
}
|
|
||||||
if let Some(materialize_deletions) =
|
|
||||||
js_options.get_opt::<JsBoolean, _, _>(&mut cx, "materializeDeletions")?
|
|
||||||
{
|
|
||||||
options.materialize_deletions = materialize_deletions.value(&mut cx);
|
|
||||||
}
|
|
||||||
if let Some(materialize_deletions_threshold) =
|
|
||||||
js_options.get_opt::<JsNumber, _, _>(&mut cx, "materializeDeletionsThreshold")?
|
|
||||||
{
|
|
||||||
options.materialize_deletions_threshold =
|
|
||||||
materialize_deletions_threshold.value(&mut cx) as f32;
|
|
||||||
}
|
|
||||||
if let Some(num_threads) = js_options.get_opt::<JsNumber, _, _>(&mut cx, "numThreads")? {
|
|
||||||
options.num_threads = Some(num_threads.value(&mut cx) as usize);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let stats = table
|
|
||||||
.optimize(OptimizeAction::Compact {
|
|
||||||
options,
|
|
||||||
remap_options: None,
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let stats = stats.or_throw(&mut cx)?;
|
|
||||||
let stats = stats.compaction.as_ref().expect("Compact stats missing");
|
|
||||||
|
|
||||||
let output_metrics = JsObject::new(&mut cx);
|
|
||||||
let fragments_removed = cx.number(stats.fragments_removed as f64);
|
|
||||||
output_metrics.set(&mut cx, "fragmentsRemoved", fragments_removed)?;
|
|
||||||
|
|
||||||
let fragments_added = cx.number(stats.fragments_added as f64);
|
|
||||||
output_metrics.set(&mut cx, "fragmentsAdded", fragments_added)?;
|
|
||||||
|
|
||||||
let files_removed = cx.number(stats.files_removed as f64);
|
|
||||||
output_metrics.set(&mut cx, "filesRemoved", files_removed)?;
|
|
||||||
|
|
||||||
let files_added = cx.number(stats.files_added as f64);
|
|
||||||
output_metrics.set(&mut cx, "filesAdded", files_added)?;
|
|
||||||
|
|
||||||
let output_table = cx.boxed(Self::from(table));
|
|
||||||
|
|
||||||
let output = JsObject::new(&mut cx);
|
|
||||||
output.set(&mut cx, "metrics", output_metrics)?;
|
|
||||||
output.set(&mut cx, "newTable", output_table)?;
|
|
||||||
|
|
||||||
Ok(output)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_list_indices(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
// let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let indices = table.as_native().unwrap().load_indices().await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let indices = indices.or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
let output = JsArray::new(&mut cx, indices.len() as u32);
|
|
||||||
for (i, index) in indices.iter().enumerate() {
|
|
||||||
let js_index = JsObject::new(&mut cx);
|
|
||||||
let index_name = cx.string(index.index_name.clone());
|
|
||||||
js_index.set(&mut cx, "name", index_name)?;
|
|
||||||
|
|
||||||
let index_uuid = cx.string(index.index_uuid.clone());
|
|
||||||
js_index.set(&mut cx, "uuid", index_uuid)?;
|
|
||||||
|
|
||||||
let js_index_columns = JsArray::new(&mut cx, index.columns.len() as u32);
|
|
||||||
for (j, column) in index.columns.iter().enumerate() {
|
|
||||||
let js_column = cx.string(column.clone());
|
|
||||||
js_index_columns.set(&mut cx, j as u32, js_column)?;
|
|
||||||
}
|
|
||||||
js_index.set(&mut cx, "columns", js_index_columns)?;
|
|
||||||
|
|
||||||
output.set(&mut cx, i as u32, js_index)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(output)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let index_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let load_stats = table.index_stats(index_name).await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let stats = load_stats.or_throw(&mut cx)?;
|
|
||||||
|
|
||||||
if let Some(stats) = stats {
|
|
||||||
let output = JsObject::new(&mut cx);
|
|
||||||
let num_indexed_rows = cx.number(stats.num_indexed_rows as f64);
|
|
||||||
output.set(&mut cx, "numIndexedRows", num_indexed_rows)?;
|
|
||||||
let num_unindexed_rows = cx.number(stats.num_unindexed_rows as f64);
|
|
||||||
output.set(&mut cx, "numUnindexedRows", num_unindexed_rows)?;
|
|
||||||
if let Some(distance_type) = stats.distance_type {
|
|
||||||
let distance_type = cx.string(distance_type.to_string());
|
|
||||||
output.set(&mut cx, "distanceType", distance_type)?;
|
|
||||||
}
|
|
||||||
let index_type = cx.string(stats.index_type.to_string());
|
|
||||||
output.set(&mut cx, "indexType", index_type)?;
|
|
||||||
|
|
||||||
if let Some(num_indices) = stats.num_indices {
|
|
||||||
let num_indices = cx.number(num_indices as f64);
|
|
||||||
output.set(&mut cx, "numIndices", num_indices)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(output.as_value(&mut cx))
|
|
||||||
} else {
|
|
||||||
Ok(JsNull::new(&mut cx).as_value(&mut cx))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_schema(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
let is_electron = cx
|
|
||||||
.argument::<JsBoolean>(0)
|
|
||||||
.or_throw(&mut cx)?
|
|
||||||
.value(&mut cx);
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let schema = table.schema().await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
let schema = schema.or_throw(&mut cx)?;
|
|
||||||
let batches = vec![RecordBatch::new_empty(schema)];
|
|
||||||
let buffer = record_batch_to_buffer(batches).or_throw(&mut cx)?;
|
|
||||||
convert::new_js_buffer(buffer, &mut cx, is_electron)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_add_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let expressions = cx
|
|
||||||
.argument::<JsArray>(0)?
|
|
||||||
.to_vec(&mut cx)?
|
|
||||||
.into_iter()
|
|
||||||
.map(|val| {
|
|
||||||
let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
|
|
||||||
let name = obj.get::<JsString, _, _>(&mut cx, "name")?.value(&mut cx);
|
|
||||||
let sql = obj
|
|
||||||
.get::<JsString, _, _>(&mut cx, "valueSql")?
|
|
||||||
.value(&mut cx);
|
|
||||||
Ok((name, sql))
|
|
||||||
})
|
|
||||||
.collect::<NeonResult<Vec<(String, String)>>>()?;
|
|
||||||
|
|
||||||
let transforms = NewColumnTransform::SqlExpressions(expressions);
|
|
||||||
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let result = table.add_columns(transforms, None).await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.undefined())
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_alter_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let alterations = cx
|
|
||||||
.argument::<JsArray>(0)?
|
|
||||||
.to_vec(&mut cx)?
|
|
||||||
.into_iter()
|
|
||||||
.map(|val| {
|
|
||||||
let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
|
|
||||||
let path = obj.get::<JsString, _, _>(&mut cx, "path")?.value(&mut cx);
|
|
||||||
let rename = obj
|
|
||||||
.get_opt::<JsString, _, _>(&mut cx, "rename")?
|
|
||||||
.map(|val| val.value(&mut cx));
|
|
||||||
let nullable = obj
|
|
||||||
.get_opt::<JsBoolean, _, _>(&mut cx, "nullable")?
|
|
||||||
.map(|val| val.value(&mut cx));
|
|
||||||
// TODO: support data type here. Will need to do some serialization/deserialization
|
|
||||||
|
|
||||||
if rename.is_none() && nullable.is_none() {
|
|
||||||
return cx.throw_error("At least one of 'name' or 'nullable' must be provided");
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(ColumnAlteration {
|
|
||||||
path,
|
|
||||||
rename,
|
|
||||||
nullable,
|
|
||||||
// TODO: wire up this field
|
|
||||||
data_type: None,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<NeonResult<Vec<ColumnAlteration>>>()?;
|
|
||||||
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let result = table.alter_columns(&alterations).await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.undefined())
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_drop_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
let columns = cx
|
|
||||||
.argument::<JsArray>(0)?
|
|
||||||
.to_vec(&mut cx)?
|
|
||||||
.into_iter()
|
|
||||||
.map(|val| {
|
|
||||||
Ok(val
|
|
||||||
.downcast_or_throw::<JsString, _>(&mut cx)?
|
|
||||||
.value(&mut cx))
|
|
||||||
})
|
|
||||||
.collect::<NeonResult<Vec<String>>>()?;
|
|
||||||
|
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
|
|
||||||
let rt = runtime(&mut cx)?;
|
|
||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
|
||||||
let channel = cx.channel();
|
|
||||||
let table = js_table.table.clone();
|
|
||||||
|
|
||||||
rt.spawn(async move {
|
|
||||||
let col_refs = columns.iter().map(|s| s.as_str()).collect::<Vec<_>>();
|
|
||||||
let result = table.drop_columns(&col_refs).await;
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
|
||||||
result.or_throw(&mut cx)?;
|
|
||||||
Ok(cx.undefined())
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(promise)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn js_drop_index(_cx: FunctionContext) -> JsResult<JsPromise> {
|
|
||||||
todo!("not implemented")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user