mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
268 Commits
docs/quick
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5cbbaa2e4a | ||
|
|
1b6bd2498e | ||
|
|
285da9db1d | ||
|
|
ad8306c96b | ||
|
|
3594538509 | ||
|
|
917aabd077 | ||
|
|
5ec12c9971 | ||
|
|
d0ce489b21 | ||
|
|
d7e02c8181 | ||
|
|
70958f6366 | ||
|
|
1ac745eb18 | ||
|
|
1357fe8aa1 | ||
|
|
0d78929893 | ||
|
|
9e2a68541e | ||
|
|
1aa0fd16e7 | ||
|
|
fec2a05629 | ||
|
|
79a1cd60ee | ||
|
|
88807a59a4 | ||
|
|
e0e7e01ea8 | ||
|
|
a416ebc11d | ||
|
|
f941054baf | ||
|
|
1a81c46505 | ||
|
|
82b25a71e9 | ||
|
|
13c613d45f | ||
|
|
e07389a36c | ||
|
|
e7e9e80b1d | ||
|
|
247fb58400 | ||
|
|
504bdc471c | ||
|
|
d617cdef4a | ||
|
|
356d7046fd | ||
|
|
48e5caabda | ||
|
|
d6cc68f671 | ||
|
|
55eacfa685 | ||
|
|
222e3264ab | ||
|
|
13505026cb | ||
|
|
b0800b4b71 | ||
|
|
1befebf614 | ||
|
|
1ab60fae7f | ||
|
|
e921c90c1b | ||
|
|
05a4ea646a | ||
|
|
ebbeeff4e0 | ||
|
|
407ca53f92 | ||
|
|
ff71d7e552 | ||
|
|
2261eb95a0 | ||
|
|
5b397e410b | ||
|
|
b5a39bffec | ||
|
|
5e1e9add07 | ||
|
|
97e9938dfe | ||
|
|
1d4b92e01e | ||
|
|
4c9fc3044b | ||
|
|
0ebc8d45a8 | ||
|
|
f7d78c3420 | ||
|
|
6ea6884260 | ||
|
|
b1d791a299 | ||
|
|
8da74dcb37 | ||
|
|
3c7419b392 | ||
|
|
e612686fdb | ||
|
|
e77d57a5b6 | ||
|
|
9391ad1450 | ||
|
|
79960b254e | ||
|
|
d19c64e29b | ||
|
|
06d5612443 | ||
|
|
45f96f4151 | ||
|
|
f744b785f8 | ||
|
|
2e3f745820 | ||
|
|
683aaed716 | ||
|
|
48f7b20daa | ||
|
|
4dd399ca29 | ||
|
|
e6f1da31dc | ||
|
|
a9ea785b15 | ||
|
|
cc38453391 | ||
|
|
47747287b6 | ||
|
|
0847e666a0 | ||
|
|
981f8427e6 | ||
|
|
f6846004ca | ||
|
|
faf8973624 | ||
|
|
fabe37274f | ||
|
|
6839ac3509 | ||
|
|
b88422e515 | ||
|
|
8d60685ede | ||
|
|
04285a4a4e | ||
|
|
d4a41b5663 | ||
|
|
adc3daa462 | ||
|
|
acbfa6c012 | ||
|
|
d602e9f98c | ||
|
|
ad09234d59 | ||
|
|
0c34ffb252 | ||
|
|
d9f333d828 | ||
|
|
bb809abd4b | ||
|
|
c87530f7a3 | ||
|
|
1eb1beecd6 | ||
|
|
ce550e6c45 | ||
|
|
d3bae1f3a3 | ||
|
|
dcf53c4506 | ||
|
|
941eada703 | ||
|
|
ed640a76d9 | ||
|
|
296205ef96 | ||
|
|
16beaaa656 | ||
|
|
4ff87b1f4a | ||
|
|
0532ef2358 | ||
|
|
dcf7334c1f | ||
|
|
8ffe992a6f | ||
|
|
9d683e4f0b | ||
|
|
0a1ea1858d | ||
|
|
7d0127b376 | ||
|
|
02595dc475 | ||
|
|
f23327af79 | ||
|
|
c7afa724dd | ||
|
|
c359cec504 | ||
|
|
fe76496a59 | ||
|
|
67ec1fe75c | ||
|
|
70d9b04ba5 | ||
|
|
b0d4a79c35 | ||
|
|
f79295c697 | ||
|
|
381fad9b65 | ||
|
|
055bf91d3e | ||
|
|
050f0086b8 | ||
|
|
10fa23e0d6 | ||
|
|
43d9fc28b0 | ||
|
|
f45f0d0431 | ||
|
|
b9e3c36d82 | ||
|
|
3cd7dd3375 | ||
|
|
12d4ce4cfe | ||
|
|
3d1f102087 | ||
|
|
81afd8a42f | ||
|
|
c2aa03615a | ||
|
|
d2c6759e7f | ||
|
|
94fb9f364a | ||
|
|
fbff244ed8 | ||
|
|
7e7466d224 | ||
|
|
cceaf27d79 | ||
|
|
7a15337e03 | ||
|
|
96c66fd087 | ||
|
|
0579303602 | ||
|
|
75edb8756c | ||
|
|
88283110f4 | ||
|
|
b3a637fdeb | ||
|
|
ce24457531 | ||
|
|
087fe6343d | ||
|
|
ab8cbe62dd | ||
|
|
f076bb41f4 | ||
|
|
902fb83d54 | ||
|
|
779118339f | ||
|
|
03b62599d7 | ||
|
|
4c999fb651 | ||
|
|
6d23d32ab5 | ||
|
|
704cec34e1 | ||
|
|
a300a238db | ||
|
|
a41ff1df0a | ||
|
|
77b005d849 | ||
|
|
167fccc427 | ||
|
|
2bffbcefa5 | ||
|
|
905552f993 | ||
|
|
e4898c9313 | ||
|
|
cab36d94b2 | ||
|
|
b64252d4fd | ||
|
|
6fc006072c | ||
|
|
d4bb59b542 | ||
|
|
6b2dd6de51 | ||
|
|
dbccd9e4f1 | ||
|
|
b12ebfed4c | ||
|
|
1dadb2aefa | ||
|
|
eb9784d7f2 | ||
|
|
ba755626cc | ||
|
|
7760799cb8 | ||
|
|
4beb2d2877 | ||
|
|
a00b8595d1 | ||
|
|
9c8314b4fd | ||
|
|
c625b6f2b2 | ||
|
|
bec8fe6547 | ||
|
|
dc1150c011 | ||
|
|
afaefc6264 | ||
|
|
cb70ff8cee | ||
|
|
cbb5a841b1 | ||
|
|
c72f6770fd | ||
|
|
e5a80a5e86 | ||
|
|
8d0a7fad1f | ||
|
|
b80d4d0134 | ||
|
|
9645fe52c2 | ||
|
|
b77314168d | ||
|
|
e08d45e090 | ||
|
|
2e3ddb8382 | ||
|
|
627ca4c810 | ||
|
|
f8dae4ffe9 | ||
|
|
9eb6119468 | ||
|
|
59b57e30ed | ||
|
|
fec8d58f06 | ||
|
|
84ded9d678 | ||
|
|
65696d9713 | ||
|
|
e2f2ea32e4 | ||
|
|
d5f2eca754 | ||
|
|
7fa455a8a5 | ||
|
|
8f42b5874e | ||
|
|
274f19f560 | ||
|
|
fbcbc75b5b | ||
|
|
008f389bd0 | ||
|
|
91af6518d9 | ||
|
|
af6819762c | ||
|
|
7acece493d | ||
|
|
20e017fedc | ||
|
|
74e578b3c8 | ||
|
|
d92d9eb3d2 | ||
|
|
b6cdce7bc9 | ||
|
|
316b406265 | ||
|
|
8825c7c1dd | ||
|
|
81c85ff702 | ||
|
|
570f2154d5 | ||
|
|
0525c055fc | ||
|
|
38d11291da | ||
|
|
258e682574 | ||
|
|
d7afa600b8 | ||
|
|
5c7303ab2e | ||
|
|
5895ef4039 | ||
|
|
0528cd858a | ||
|
|
6582f43422 | ||
|
|
5c7f63388d | ||
|
|
d0bc671cac | ||
|
|
d37e17593d | ||
|
|
cb726d370e | ||
|
|
23ee132546 | ||
|
|
7fa090d330 | ||
|
|
07bc1c5397 | ||
|
|
d7a9dbb9fc | ||
|
|
00487afc7d | ||
|
|
1902d65aad | ||
|
|
c4fbb65b8e | ||
|
|
875ed7ae6f | ||
|
|
95a46a57ba | ||
|
|
51561e31a0 | ||
|
|
7b19120578 | ||
|
|
745c34a6a9 | ||
|
|
db8fa2454d | ||
|
|
a67a7b4b42 | ||
|
|
496846e532 | ||
|
|
dadcfebf8e | ||
|
|
67033dbd7f | ||
|
|
05a85cfc2a | ||
|
|
40c5d3d72b | ||
|
|
198f0f80c6 | ||
|
|
e3f2fd3892 | ||
|
|
f401ccc599 | ||
|
|
81b59139f8 | ||
|
|
1026781ab6 | ||
|
|
9c699b8cd9 | ||
|
|
34bec59bc3 | ||
|
|
a5fbbf0d66 | ||
|
|
b42721167b | ||
|
|
543dec9ff0 | ||
|
|
04f962f6b0 | ||
|
|
19e896ff69 | ||
|
|
272e4103b2 | ||
|
|
75c257ebb6 | ||
|
|
9ee152eb42 | ||
|
|
c9ae1b1737 | ||
|
|
89dc80c42a | ||
|
|
7b020ac799 | ||
|
|
529e774bbb | ||
|
|
7c12239305 | ||
|
|
d83424d6b4 | ||
|
|
8bf89f887c | ||
|
|
b2160b2304 | ||
|
|
1bb82597be | ||
|
|
e4eee38b3c | ||
|
|
64fc2be503 | ||
|
|
dc8054e90d | ||
|
|
1684940946 | ||
|
|
695813463c | ||
|
|
ed594b0f76 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.19.1-beta.1"
|
current_version = "0.22.2-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
@@ -50,11 +50,6 @@ pre_commit_hooks = [
|
|||||||
optional_value = "final"
|
optional_value = "final"
|
||||||
values = ["beta", "final"]
|
values = ["beta", "final"]
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
filename = "node/package.json"
|
|
||||||
replace = "\"version\": \"{new_version}\","
|
|
||||||
search = "\"version\": \"{current_version}\","
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
[[tool.bumpversion.files]]
|
||||||
filename = "nodejs/package.json"
|
filename = "nodejs/package.json"
|
||||||
replace = "\"version\": \"{new_version}\","
|
replace = "\"version\": \"{new_version}\","
|
||||||
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
|
|||||||
replace = "\"version\": \"{new_version}\","
|
replace = "\"version\": \"{new_version}\","
|
||||||
search = "\"version\": \"{current_version}\","
|
search = "\"version\": \"{current_version}\","
|
||||||
|
|
||||||
# vectodb node binary packages
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
glob = "node/package.json"
|
|
||||||
replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
|
|
||||||
search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
|
|
||||||
|
|
||||||
# Cargo files
|
# Cargo files
|
||||||
# ------------
|
# ------------
|
||||||
[[tool.bumpversion.files]]
|
|
||||||
filename = "rust/ffi/node/Cargo.toml"
|
|
||||||
replace = "\nversion = \"{new_version}\""
|
|
||||||
search = "\nversion = \"{current_version}\""
|
|
||||||
|
|
||||||
[[tool.bumpversion.files]]
|
[[tool.bumpversion.files]]
|
||||||
filename = "rust/lancedb/Cargo.toml"
|
filename = "rust/lancedb/Cargo.toml"
|
||||||
replace = "\nversion = \"{new_version}\""
|
replace = "\nversion = \"{new_version}\""
|
||||||
|
|||||||
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
name: Create Failure Issue
|
||||||
|
description: Creates a GitHub issue if any jobs in the workflow failed
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
job-results:
|
||||||
|
description: 'JSON string of job results from needs context'
|
||||||
|
required: true
|
||||||
|
workflow-name:
|
||||||
|
description: 'Name of the workflow'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Check for failures and create issue
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
JOB_RESULTS: ${{ inputs.job-results }}
|
||||||
|
WORKFLOW_NAME: ${{ inputs.workflow-name }}
|
||||||
|
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
run: |
|
||||||
|
# Check if any job failed
|
||||||
|
if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
|
||||||
|
echo "Detected job failures, creating issue..."
|
||||||
|
|
||||||
|
# Extract failed job names
|
||||||
|
FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
|
||||||
|
|
||||||
|
# Create issue with workflow name, failed jobs, and run URL
|
||||||
|
gh issue create \
|
||||||
|
--title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
|
||||||
|
--body "The workflow **$WORKFLOW_NAME** failed during execution.
|
||||||
|
|
||||||
|
**Failed jobs:** $FAILED_JOBS
|
||||||
|
|
||||||
|
**Run URL:** $RUN_URL
|
||||||
|
|
||||||
|
Please investigate the failed jobs and address any issues." \
|
||||||
|
--label "ci"
|
||||||
|
|
||||||
|
echo "Issue created successfully"
|
||||||
|
else
|
||||||
|
echo "No job failures detected, skipping issue creation"
|
||||||
|
fi
|
||||||
24
.github/workflows/cargo-publish.yml
vendored
24
.github/workflows/cargo-publish.yml
vendored
@@ -5,8 +5,8 @@ on:
|
|||||||
tags-ignore:
|
tags-ignore:
|
||||||
# We don't publish pre-releases for Rust. Crates.io is just a source
|
# We don't publish pre-releases for Rust. Crates.io is just a source
|
||||||
# distribution, so we don't need to publish pre-releases.
|
# distribution, so we don't need to publish pre-releases.
|
||||||
- 'v*-beta*'
|
- "v*-beta*"
|
||||||
- '*-v*' # for example, python-vX.Y.Z
|
- "*-v*" # for example, python-vX.Y.Z
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||||
@@ -19,6 +19,8 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -31,6 +33,22 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- uses: rust-lang/crates-io-auth-action@v1
|
||||||
|
id: auth
|
||||||
- name: Publish the package
|
- name: Publish the package
|
||||||
run: |
|
run: |
|
||||||
cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
|
||||||
|
report-failure:
|
||||||
|
name: Report Workflow Failure
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [build]
|
||||||
|
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/create-failure-issue
|
||||||
|
with:
|
||||||
|
job-results: ${{ toJSON(needs) }}
|
||||||
|
workflow-name: ${{ github.workflow }}
|
||||||
|
|||||||
14
.github/workflows/docs.yml
vendored
14
.github/workflows/docs.yml
vendored
@@ -56,22 +56,12 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: node/package-lock.json
|
cache-dependency-path: docs/package-lock.json
|
||||||
- name: Install node dependencies
|
- name: Install node dependencies
|
||||||
working-directory: node
|
working-directory: nodejs
|
||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
- name: Build node
|
|
||||||
working-directory: node
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run tsc
|
|
||||||
- name: Create markdown files
|
|
||||||
working-directory: node
|
|
||||||
run: |
|
|
||||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
|
||||||
- name: Build docs
|
- name: Build docs
|
||||||
working-directory: docs
|
working-directory: docs
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
51
.github/workflows/docs_test.yml
vendored
51
.github/workflows/docs_test.yml
vendored
@@ -24,7 +24,8 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
test-python:
|
test-python:
|
||||||
name: Test doc python code
|
name: Test doc python code
|
||||||
runs-on: ubuntu-24.04
|
runs-on: warp-ubuntu-2204-x64-8x
|
||||||
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -58,51 +59,3 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cd docs/test/python
|
cd docs/test/python
|
||||||
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
|
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
|
||||||
test-node:
|
|
||||||
name: Test doc nodejs code
|
|
||||||
runs-on: ubuntu-24.04
|
|
||||||
timeout-minutes: 60
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- name: Print CPU capabilities
|
|
||||||
run: cat /proc/cpuinfo
|
|
||||||
- name: Set up Node
|
|
||||||
uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
- name: Install protobuf
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler
|
|
||||||
- name: Install dependecies needed for ubuntu
|
|
||||||
run: |
|
|
||||||
sudo apt install -y libssl-dev
|
|
||||||
rustup update && rustup default
|
|
||||||
- name: Rust cache
|
|
||||||
uses: swatinem/rust-cache@v2
|
|
||||||
- name: Install node dependencies
|
|
||||||
run: |
|
|
||||||
sudo swapoff -a
|
|
||||||
sudo fallocate -l 8G /swapfile
|
|
||||||
sudo chmod 600 /swapfile
|
|
||||||
sudo mkswap /swapfile
|
|
||||||
sudo swapon /swapfile
|
|
||||||
sudo swapon --show
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
npm run build-release
|
|
||||||
cd ../docs
|
|
||||||
npm install
|
|
||||||
- name: Test
|
|
||||||
env:
|
|
||||||
LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
|
|
||||||
LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
|
|
||||||
run: |
|
|
||||||
cd docs
|
|
||||||
npm t
|
|
||||||
|
|||||||
15
.github/workflows/java-publish.yml
vendored
15
.github/workflows/java-publish.yml
vendored
@@ -43,7 +43,6 @@ jobs:
|
|||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: "1.81.0"
|
|
||||||
cache-workspaces: "./java/core/lancedb-jni"
|
cache-workspaces: "./java/core/lancedb-jni"
|
||||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||||
# "1" means line tables only, which is useful for panic tracebacks.
|
# "1" means line tables only, which is useful for panic tracebacks.
|
||||||
@@ -112,3 +111,17 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||||
|
report-failure:
|
||||||
|
name: Report Workflow Failure
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [linux-arm64, linux-x86, macos-arm64]
|
||||||
|
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/create-failure-issue
|
||||||
|
with:
|
||||||
|
job-results: ${{ toJSON(needs) }}
|
||||||
|
workflow-name: ${{ github.workflow }}
|
||||||
|
|||||||
7
.github/workflows/java.yml
vendored
7
.github/workflows/java.yml
vendored
@@ -35,6 +35,9 @@ jobs:
|
|||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
with:
|
with:
|
||||||
workspaces: java/core/lancedb-jni
|
workspaces: java/core/lancedb-jni
|
||||||
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
components: rustfmt
|
||||||
- name: Run cargo fmt
|
- name: Run cargo fmt
|
||||||
run: cargo fmt --check
|
run: cargo fmt --check
|
||||||
working-directory: ./java/core/lancedb-jni
|
working-directory: ./java/core/lancedb-jni
|
||||||
@@ -68,6 +71,9 @@ jobs:
|
|||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
with:
|
with:
|
||||||
workspaces: java/core/lancedb-jni
|
workspaces: java/core/lancedb-jni
|
||||||
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
components: rustfmt
|
||||||
- name: Run cargo fmt
|
- name: Run cargo fmt
|
||||||
run: cargo fmt --check
|
run: cargo fmt --check
|
||||||
working-directory: ./java/core/lancedb-jni
|
working-directory: ./java/core/lancedb-jni
|
||||||
@@ -110,4 +116,3 @@ jobs:
|
|||||||
-Djdk.reflect.useDirectMethodHandle=false \
|
-Djdk.reflect.useDirectMethodHandle=false \
|
||||||
-Dio.netty.tryReflectionSetAccessible=true"
|
-Dio.netty.tryReflectionSetAccessible=true"
|
||||||
JAVA_HOME=$JAVA_17 mvn clean test
|
JAVA_HOME=$JAVA_17 mvn clean test
|
||||||
|
|
||||||
|
|||||||
9
.github/workflows/make-release-commit.yml
vendored
9
.github/workflows/make-release-commit.yml
vendored
@@ -84,6 +84,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install bump-my-version PyGithub packaging
|
pip install bump-my-version PyGithub packaging
|
||||||
bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
|
bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
|
||||||
|
bash ci/update_lockfiles.sh --amend
|
||||||
- name: Push new version tag
|
- name: Push new version tag
|
||||||
if: ${{ !inputs.dry_run }}
|
if: ${{ !inputs.dry_run }}
|
||||||
uses: ad-m/github-push-action@master
|
uses: ad-m/github-push-action@master
|
||||||
@@ -92,11 +93,3 @@ jobs:
|
|||||||
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
||||||
branch: ${{ github.ref }}
|
branch: ${{ github.ref }}
|
||||||
tags: true
|
tags: true
|
||||||
- uses: ./.github/workflows/update_package_lock
|
|
||||||
if: ${{ !inputs.dry_run && inputs.other }}
|
|
||||||
with:
|
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
- uses: ./.github/workflows/update_package_lock_nodejs
|
|
||||||
if: ${{ !inputs.dry_run && inputs.other }}
|
|
||||||
with:
|
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|||||||
147
.github/workflows/node.yml
vendored
147
.github/workflows/node.yml
vendored
@@ -1,147 +0,0 @@
|
|||||||
name: Node
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- node/**
|
|
||||||
- rust/ffi/node/**
|
|
||||||
- .github/workflows/node.yml
|
|
||||||
- docker-compose.yml
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
env:
|
|
||||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
|
||||||
# "1" means line tables only, which is useful for panic tracebacks.
|
|
||||||
#
|
|
||||||
# Use native CPU to accelerate tests if possible, especially for f16
|
|
||||||
# target-cpu=haswell fixes failing ci build
|
|
||||||
RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
|
|
||||||
RUST_BACKTRACE: "1"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
linux:
|
|
||||||
name: Linux (Node ${{ matrix.node-version }})
|
|
||||||
timeout-minutes: 30
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
node-version: [ "18", "20" ]
|
|
||||||
runs-on: "ubuntu-22.04"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: ${{ matrix.node-version }}
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: npm run test
|
|
||||||
macos:
|
|
||||||
timeout-minutes: 30
|
|
||||||
runs-on: "macos-13"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: brew install protobuf
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: |
|
|
||||||
npm run test
|
|
||||||
aws-integtest:
|
|
||||||
timeout-minutes: 45
|
|
||||||
runs-on: "ubuntu-22.04"
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
env:
|
|
||||||
AWS_ACCESS_KEY_ID: ACCESSKEY
|
|
||||||
AWS_SECRET_ACCESS_KEY: SECRETKEY
|
|
||||||
AWS_DEFAULT_REGION: us-west-2
|
|
||||||
# this one is for s3
|
|
||||||
AWS_ENDPOINT: http://localhost:4566
|
|
||||||
# this one is for dynamodb
|
|
||||||
DYNAMODB_ENDPOINT: http://localhost:4566
|
|
||||||
ALLOW_HTTP: true
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- name: start local stack
|
|
||||||
run: docker compose -f ../docker-compose.yml up -d --wait
|
|
||||||
- name: create s3
|
|
||||||
run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
|
|
||||||
- name: create ddb
|
|
||||||
run: |
|
|
||||||
aws dynamodb create-table \
|
|
||||||
--table-name lancedb-integtest \
|
|
||||||
--attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
|
|
||||||
--key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
|
|
||||||
--provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
|
|
||||||
--endpoint-url $DYNAMODB_ENDPOINT
|
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run build
|
|
||||||
npm run pack-build
|
|
||||||
npm install --no-save ./dist/lancedb-vectordb-*.tgz
|
|
||||||
# Remove index.node to test with dependency installed
|
|
||||||
rm index.node
|
|
||||||
- name: Test
|
|
||||||
run: npm run integration-test
|
|
||||||
10
.github/workflows/nodejs.yml
vendored
10
.github/workflows/nodejs.yml
vendored
@@ -6,6 +6,7 @@ on:
|
|||||||
- main
|
- main
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
|
- Cargo.toml
|
||||||
- nodejs/**
|
- nodejs/**
|
||||||
- .github/workflows/nodejs.yml
|
- .github/workflows/nodejs.yml
|
||||||
- docker-compose.yml
|
- docker-compose.yml
|
||||||
@@ -47,6 +48,9 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
components: rustfmt, clippy
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: |
|
run: |
|
||||||
cargo fmt --all -- --check
|
cargo fmt --all -- --check
|
||||||
@@ -76,7 +80,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: ${{ matrix.node-version }}
|
node-version: ${{ matrix.node-version }}
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: node/package-lock.json
|
cache-dependency-path: nodejs/package-lock.json
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -113,7 +117,7 @@ jobs:
|
|||||||
set -e
|
set -e
|
||||||
npm ci
|
npm ci
|
||||||
npm run docs
|
npm run docs
|
||||||
if ! git diff --exit-code; then
|
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||||
echo "Docs need to be updated"
|
echo "Docs need to be updated"
|
||||||
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
||||||
exit 1
|
exit 1
|
||||||
@@ -134,7 +138,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: node/package-lock.json
|
cache-dependency-path: nodejs/package-lock.json
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
205
.github/workflows/npm-publish.yml
vendored
205
.github/workflows/npm-publish.yml
vendored
@@ -365,202 +365,17 @@ jobs:
|
|||||||
ARGS="$ARGS --tag preview"
|
ARGS="$ARGS --tag preview"
|
||||||
fi
|
fi
|
||||||
npm publish $ARGS
|
npm publish $ARGS
|
||||||
|
report-failure:
|
||||||
|
name: Report Workflow Failure
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
# vectordb release (legacy)
|
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
# TODO: delete this when we drop vectordb
|
|
||||||
node:
|
|
||||||
name: vectordb Typescript
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
working-directory: node
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: "npm"
|
|
||||||
cache-dependency-path: node/package-lock.json
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
npm ci
|
|
||||||
npm run tsc
|
|
||||||
npm pack
|
|
||||||
- name: Upload Linux Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-package
|
|
||||||
path: |
|
|
||||||
node/vectordb-*.tgz
|
|
||||||
|
|
||||||
node-macos:
|
|
||||||
name: vectordb ${{ matrix.config.arch }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- arch: x86_64-apple-darwin
|
|
||||||
runner: macos-13
|
|
||||||
- arch: aarch64-apple-darwin
|
|
||||||
# xlarge is implicitly arm64.
|
|
||||||
runner: macos-14
|
|
||||||
runs-on: ${{ matrix.config.runner }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install system dependencies
|
|
||||||
run: brew install protobuf
|
|
||||||
- name: Install npm dependencies
|
|
||||||
run: |
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
- name: Build MacOS native node modules
|
|
||||||
run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
|
|
||||||
- name: Upload Darwin Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-darwin-${{ matrix.config.arch }}
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-darwin*.tgz
|
|
||||||
|
|
||||||
node-linux-gnu:
|
|
||||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
|
|
||||||
runs-on: ${{ matrix.config.runner }}
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- arch: x86_64
|
|
||||||
runner: ubuntu-latest
|
|
||||||
- arch: aarch64
|
|
||||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
|
||||||
runner: warp-ubuntu-latest-arm64-4x
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
# To avoid OOM errors on ARM, we create a swap file.
|
|
||||||
- name: Configure aarch64 build
|
|
||||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
sudo fallocate -l 16G /swapfile
|
|
||||||
sudo chmod 600 /swapfile
|
|
||||||
sudo mkswap /swapfile
|
|
||||||
sudo swapon /swapfile
|
|
||||||
echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
|
|
||||||
# print info
|
|
||||||
swapon --show
|
|
||||||
free -h
|
|
||||||
- name: Build Linux Artifacts
|
|
||||||
run: |
|
|
||||||
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
|
|
||||||
- name: Upload Linux Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-linux-${{ matrix.config.arch }}-gnu
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-linux*.tgz
|
|
||||||
|
|
||||||
node-windows:
|
|
||||||
name: vectordb ${{ matrix.target }}
|
|
||||||
runs-on: windows-2022
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
target: [x86_64-pc-windows-msvc]
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Protoc v21.12
|
|
||||||
working-directory: C:\
|
|
||||||
run: |
|
|
||||||
New-Item -Path 'C:\protoc' -ItemType Directory
|
|
||||||
Set-Location C:\protoc
|
|
||||||
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
|
||||||
7z x protoc.zip
|
|
||||||
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
|
||||||
shell: powershell
|
|
||||||
- name: Install npm dependencies
|
|
||||||
run: |
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
- name: Build Windows native node modules
|
|
||||||
run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
|
|
||||||
- name: Upload Windows Artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: node-native-windows
|
|
||||||
path: |
|
|
||||||
node/dist/lancedb-vectordb-win32*.tgz
|
|
||||||
|
|
||||||
release:
|
|
||||||
name: vectordb NPM Publish
|
|
||||||
needs: [node, node-macos, node-linux-gnu, node-windows]
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# Only runs on tags that matches the make-release action
|
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
|
||||||
steps:
|
|
||||||
- uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
pattern: node-*
|
|
||||||
- name: Display structure of downloaded files
|
|
||||||
run: ls -R
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
registry-url: "https://registry.npmjs.org"
|
|
||||||
- name: Publish to NPM
|
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
|
||||||
run: |
|
|
||||||
# Tag beta as "preview" instead of default "latest". See lancedb
|
|
||||||
# npm publish step for more info.
|
|
||||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
|
||||||
PUBLISH_ARGS="--tag preview"
|
|
||||||
fi
|
|
||||||
|
|
||||||
mv */*.tgz .
|
|
||||||
for filename in *.tgz; do
|
|
||||||
npm publish $PUBLISH_ARGS $filename
|
|
||||||
done
|
|
||||||
- name: Deprecate
|
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
|
||||||
# We need to deprecate the old package to avoid confusion.
|
|
||||||
# Each time we publish a new version, it gets undeprecated.
|
|
||||||
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
|
||||||
- name: Notify Slack Action
|
|
||||||
uses: ravsamhq/notify-slack-action@2.3.0
|
|
||||||
if: ${{ always() }}
|
|
||||||
with:
|
|
||||||
status: ${{ job.status }}
|
|
||||||
notify_when: "failure"
|
|
||||||
notification_title: "{workflow} is failing"
|
|
||||||
env:
|
|
||||||
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
|
||||||
|
|
||||||
update-package-lock:
|
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
|
||||||
needs: [release]
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
needs: [build-lancedb, test-lancedb, publish]
|
||||||
|
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: read
|
||||||
|
issues: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- uses: actions/checkout@v4
|
||||||
uses: actions/checkout@v4
|
- uses: ./.github/actions/create-failure-issue
|
||||||
with:
|
with:
|
||||||
ref: main
|
job-results: ${{ toJSON(needs) }}
|
||||||
token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
workflow-name: ${{ github.workflow }}
|
||||||
fetch-depth: 0
|
|
||||||
lfs: true
|
|
||||||
- uses: ./.github/workflows/update_package_lock
|
|
||||||
with:
|
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|||||||
18
.github/workflows/pypi-publish.yml
vendored
18
.github/workflows/pypi-publish.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
|||||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||||
mac:
|
mac:
|
||||||
timeout-minutes: 60
|
timeout-minutes: 90
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ${{ matrix.config.runner }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@@ -64,7 +64,7 @@ jobs:
|
|||||||
- target: x86_64-apple-darwin
|
- target: x86_64-apple-darwin
|
||||||
runner: macos-13
|
runner: macos-13
|
||||||
- target: aarch64-apple-darwin
|
- target: aarch64-apple-darwin
|
||||||
runner: macos-14
|
runner: warp-macos-14-arm64-6x
|
||||||
env:
|
env:
|
||||||
MACOSX_DEPLOYMENT_TARGET: 10.15
|
MACOSX_DEPLOYMENT_TARGET: 10.15
|
||||||
steps:
|
steps:
|
||||||
@@ -173,3 +173,17 @@ jobs:
|
|||||||
generate_release_notes: false
|
generate_release_notes: false
|
||||||
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
|
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
|
||||||
body: ${{ steps.python_release_notes.outputs.changelog }}
|
body: ${{ steps.python_release_notes.outputs.changelog }}
|
||||||
|
report-failure:
|
||||||
|
name: Report Workflow Failure
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [linux, mac, windows]
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/create-failure-issue
|
||||||
|
with:
|
||||||
|
job-results: ${{ toJSON(needs) }}
|
||||||
|
workflow-name: ${{ github.workflow }}
|
||||||
|
|||||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -6,6 +6,7 @@ on:
|
|||||||
- main
|
- main
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
|
- Cargo.toml
|
||||||
- python/**
|
- python/**
|
||||||
- .github/workflows/python.yml
|
- .github/workflows/python.yml
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/run_tests/action.yml
vendored
4
.github/workflows/run_tests/action.yml
vendored
@@ -24,8 +24,8 @@ runs:
|
|||||||
- name: pytest (with integration)
|
- name: pytest (with integration)
|
||||||
shell: bash
|
shell: bash
|
||||||
if: ${{ inputs.integration == 'true' }}
|
if: ${{ inputs.integration == 'true' }}
|
||||||
run: pytest -m "not slow" -x -v --durations=30 python/python/tests
|
run: pytest -m "not slow" -vv --durations=30 python/python/tests
|
||||||
- name: pytest (no integration tests)
|
- name: pytest (no integration tests)
|
||||||
shell: bash
|
shell: bash
|
||||||
if: ${{ inputs.integration != 'true' }}
|
if: ${{ inputs.integration != 'true' }}
|
||||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/python/tests
|
run: pytest -m "not slow and not s3_test" -vv --durations=30 python/python/tests
|
||||||
|
|||||||
16
.github/workflows/rust.yml
vendored
16
.github/workflows/rust.yml
vendored
@@ -40,6 +40,9 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
components: rustfmt, clippy
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
with:
|
with:
|
||||||
workspaces: rust
|
workspaces: rust
|
||||||
@@ -93,6 +96,7 @@ jobs:
|
|||||||
# Need up-to-date compilers for kernels
|
# Need up-to-date compilers for kernels
|
||||||
CC: clang-18
|
CC: clang-18
|
||||||
CXX: clang++-18
|
CXX: clang++-18
|
||||||
|
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -114,15 +118,17 @@ jobs:
|
|||||||
sudo chmod 600 /swapfile
|
sudo chmod 600 /swapfile
|
||||||
sudo mkswap /swapfile
|
sudo mkswap /swapfile
|
||||||
sudo swapon /swapfile
|
sudo swapon /swapfile
|
||||||
- name: Start S3 integration test environment
|
|
||||||
working-directory: .
|
|
||||||
run: docker compose up --detach --wait
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --all-features --tests --locked --examples
|
run: cargo build --all-features --tests --locked --examples
|
||||||
- name: Run tests
|
- name: Run feature tests
|
||||||
run: cargo test --all-features --locked
|
run: make -C ./lancedb feature-tests
|
||||||
- name: Run examples
|
- name: Run examples
|
||||||
run: cargo run --example simple --locked
|
run: cargo run --example simple --locked
|
||||||
|
- name: Run remote tests
|
||||||
|
# Running this requires access to secrets, so skip if this is
|
||||||
|
# a PR from a fork.
|
||||||
|
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
|
||||||
|
run: make -C ./lancedb remote-tests
|
||||||
|
|
||||||
macos:
|
macos:
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
|
|||||||
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
@@ -1,26 +0,0 @@
|
|||||||
name: Trigger vectordb-recipers workflow
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- .github/workflows/trigger-vectordb-recipes.yml
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Trigger vectordb-recipes workflow
|
|
||||||
uses: actions/github-script@v6
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
|
|
||||||
script: |
|
|
||||||
const result = await github.rest.actions.createWorkflowDispatch({
|
|
||||||
owner: 'lancedb',
|
|
||||||
repo: 'vectordb-recipes',
|
|
||||||
workflow_id: 'examples-test.yml',
|
|
||||||
ref: 'main'
|
|
||||||
});
|
|
||||||
console.log(result);
|
|
||||||
33
.github/workflows/update_package_lock/action.yml
vendored
33
.github/workflows/update_package_lock/action.yml
vendored
@@ -1,33 +0,0 @@
|
|||||||
name: update_package_lock
|
|
||||||
description: "Update node's package.lock"
|
|
||||||
|
|
||||||
inputs:
|
|
||||||
github_token:
|
|
||||||
required: true
|
|
||||||
description: "github token for the repo"
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: "composite"
|
|
||||||
steps:
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
- name: Set git configs
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
git config user.name 'Lance Release'
|
|
||||||
git config user.email 'lance-dev@lancedb.com'
|
|
||||||
- name: Update package-lock.json file
|
|
||||||
working-directory: ./node
|
|
||||||
run: |
|
|
||||||
npm install
|
|
||||||
git add package-lock.json
|
|
||||||
git commit -m "Updating package-lock.json"
|
|
||||||
shell: bash
|
|
||||||
- name: Push changes
|
|
||||||
if: ${{ inputs.dry_run }} == "false"
|
|
||||||
uses: ad-m/github-push-action@master
|
|
||||||
with:
|
|
||||||
github_token: ${{ inputs.github_token }}
|
|
||||||
branch: main
|
|
||||||
tags: true
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
name: update_package_lock_nodejs
|
|
||||||
description: "Update nodejs's package.lock"
|
|
||||||
|
|
||||||
inputs:
|
|
||||||
github_token:
|
|
||||||
required: true
|
|
||||||
description: "github token for the repo"
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: "composite"
|
|
||||||
steps:
|
|
||||||
- uses: actions/setup-node@v3
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
- name: Set git configs
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
git config user.name 'Lance Release'
|
|
||||||
git config user.email 'lance-dev@lancedb.com'
|
|
||||||
- name: Update package-lock.json file
|
|
||||||
working-directory: ./nodejs
|
|
||||||
run: |
|
|
||||||
npm install
|
|
||||||
git add package-lock.json
|
|
||||||
git commit -m "Updating package-lock.json"
|
|
||||||
shell: bash
|
|
||||||
- name: Push changes
|
|
||||||
if: ${{ inputs.dry_run }} == "false"
|
|
||||||
uses: ad-m/github-push-action@master
|
|
||||||
with:
|
|
||||||
github_token: ${{ inputs.github_token }}
|
|
||||||
branch: main
|
|
||||||
tags: true
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -31,9 +31,6 @@ python/dist
|
|||||||
*.node
|
*.node
|
||||||
**/node_modules
|
**/node_modules
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
node/dist
|
|
||||||
node/examples/**/package-lock.json
|
|
||||||
node/examples/**/dist
|
|
||||||
nodejs/lancedb/native*
|
nodejs/lancedb/native*
|
||||||
dist
|
dist
|
||||||
|
|
||||||
|
|||||||
80
CLAUDE.md
Normal file
80
CLAUDE.md
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
|
||||||
|
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
|
||||||
|
remote (against LanceDB Cloud).
|
||||||
|
|
||||||
|
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
|
||||||
|
|
||||||
|
Project layout:
|
||||||
|
|
||||||
|
* `rust/lancedb`: The LanceDB core Rust implementation.
|
||||||
|
* `python`: The Python bindings, using PyO3.
|
||||||
|
* `nodejs`: The Typescript bindings, using napi-rs
|
||||||
|
* `java`: The Java bindings
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
|
||||||
|
* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
|
||||||
|
* Run tests: `cargo test --quiet --features remote --tests`
|
||||||
|
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
|
||||||
|
* Lint: `cargo clippy --quiet --features remote --tests --examples`
|
||||||
|
* Format: `cargo fmt --all`
|
||||||
|
|
||||||
|
Before committing changes, run formatting.
|
||||||
|
|
||||||
|
## Coding tips
|
||||||
|
|
||||||
|
* When writing Rust doctests for things that require a connection or table reference,
|
||||||
|
write them as a function instead of a fully executable test. This allows type checking
|
||||||
|
to run but avoids needing a full test environment. For example:
|
||||||
|
```rust
|
||||||
|
/// ```
|
||||||
|
/// use lance_index::scalar::FullTextSearchQuery;
|
||||||
|
/// use lancedb::query::{QueryBase, ExecutableQuery};
|
||||||
|
///
|
||||||
|
/// # use lancedb::Table;
|
||||||
|
/// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
/// let results = table.query()
|
||||||
|
/// .full_text_search(FullTextSearchQuery::new("hello world".into()))
|
||||||
|
/// .execute()
|
||||||
|
/// .await?;
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example plan: adding a new method on Table
|
||||||
|
|
||||||
|
Adding a new method involves first adding it to the Rust core, then exposing it
|
||||||
|
in the Python and TypeScript bindings. There are both local and remote tables.
|
||||||
|
Remote tables are implemented via a HTTP API and require the `remote` cargo
|
||||||
|
feature flag to be enabled. Python has both sync and async methods.
|
||||||
|
|
||||||
|
Rust core changes:
|
||||||
|
|
||||||
|
1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
|
||||||
|
2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
|
||||||
|
3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
|
||||||
|
* Test with unit test in `rust/lancedb/src/table.rs`.
|
||||||
|
4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
|
||||||
|
* Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
|
||||||
|
|
||||||
|
Python bindings changes:
|
||||||
|
|
||||||
|
1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
|
||||||
|
2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
|
||||||
|
3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
|
||||||
|
4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
|
||||||
|
5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
|
||||||
|
* Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
|
||||||
|
6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
|
||||||
|
7. Add unit test in `python/tests/test_table.py`.
|
||||||
|
|
||||||
|
TypeScript bindings changes:
|
||||||
|
|
||||||
|
1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
|
||||||
|
2. Run `npm run build` to generate TypeScript definitions.
|
||||||
|
3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
|
||||||
|
4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
|
||||||
|
* Note: despite the name, this class is also used for remote tables.
|
||||||
|
5. Add test in `nodejs/__test__/table.test.ts`.
|
||||||
|
6. Run `npm run docs` to generate TypeScript documentation.
|
||||||
3833
Cargo.lock
generated
3833
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
79
Cargo.toml
79
Cargo.toml
@@ -1,11 +1,5 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
|
||||||
"rust/ffi/node",
|
|
||||||
"rust/lancedb",
|
|
||||||
"nodejs",
|
|
||||||
"python",
|
|
||||||
"java/core/lancedb-jni",
|
|
||||||
]
|
|
||||||
# Python package needs to be built by maturin.
|
# Python package needs to be built by maturin.
|
||||||
exclude = ["python"]
|
exclude = ["python"]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
@@ -21,55 +15,60 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.27.0", "features" = ["dynamodb"], tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"] }
|
||||||
lance-io = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-io = { "version" = "=0.38.2", default-features = false }
|
||||||
lance-index = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-index = "=0.38.2"
|
||||||
lance-linalg = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-linalg = "=0.38.2"
|
||||||
lance-table = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-table = "=0.38.2"
|
||||||
lance-testing = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-testing = "=0.38.2"
|
||||||
lance-datafusion = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-datafusion = "=0.38.2"
|
||||||
lance-encoding = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-encoding = "=0.38.2"
|
||||||
|
lance-namespace = "0.0.18"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "54.1", optional = false }
|
arrow = { version = "56.2", optional = false }
|
||||||
arrow-array = "54.1"
|
arrow-array = "56.2"
|
||||||
arrow-data = "54.1"
|
arrow-data = "56.2"
|
||||||
arrow-ipc = "54.1"
|
arrow-ipc = "56.2"
|
||||||
arrow-ord = "54.1"
|
arrow-ord = "56.2"
|
||||||
arrow-schema = "54.1"
|
arrow-schema = "56.2"
|
||||||
arrow-arith = "54.1"
|
arrow-cast = "56.2"
|
||||||
arrow-cast = "54.1"
|
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "46.0", default-features = false }
|
datafusion = { version = "50.1", default-features = false }
|
||||||
datafusion-catalog = "46.0"
|
datafusion-catalog = "50.1"
|
||||||
datafusion-common = { version = "46.0", default-features = false }
|
datafusion-common = { version = "50.1", default-features = false }
|
||||||
datafusion-execution = "46.0"
|
datafusion-execution = "50.1"
|
||||||
datafusion-expr = "46.0"
|
datafusion-expr = "50.1"
|
||||||
datafusion-physical-plan = "46.0"
|
datafusion-physical-plan = "50.1"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "2.6.0", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
] }
|
] }
|
||||||
futures = "0"
|
futures = "0"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
moka = { version = "0.12", features = ["future"] }
|
moka = { version = "0.12", features = ["future"] }
|
||||||
object_store = "0.11.0"
|
object_store = "0.12.0"
|
||||||
pin-project = "1.0.7"
|
pin-project = "1.0.7"
|
||||||
snafu = "0.8"
|
snafu = "0.8"
|
||||||
url = "2"
|
url = "2"
|
||||||
num-traits = "0.2"
|
num-traits = "0.2"
|
||||||
rand = "0.8"
|
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
lazy_static = "1"
|
lazy_static = "1"
|
||||||
semver = "1.0.25"
|
semver = "1.0.25"
|
||||||
|
crunchy = "0.2.4"
|
||||||
# Temporary pins to work around downstream issues
|
# Temporary pins to work around downstream issues
|
||||||
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
|
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
|
||||||
chrono = "=0.4.39"
|
chrono = "=0.4.41"
|
||||||
# https://github.com/RustCrypto/formats/issues/1684
|
|
||||||
base64ct = "=1.6.0"
|
|
||||||
|
|
||||||
# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
|
|
||||||
crunchy = "=0.2.2"
|
|
||||||
|
|
||||||
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
|
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
|
||||||
bytemuck_derive = ">=1.8.1, <1.9.0"
|
bytemuck_derive = ">=1.8.1, <1.9.0"
|
||||||
|
|
||||||
|
# This is only needed when we reference preview releases of lance
|
||||||
|
# [patch.crates-io]
|
||||||
|
# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
|
||||||
|
# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
|||||||
129
README.md
129
README.md
@@ -1,94 +1,97 @@
|
|||||||
<a href="https://cloud.lancedb.com" target="_blank">
|
<a href="https://cloud.lancedb.com" target="_blank">
|
||||||
<img src="https://github.com/user-attachments/assets/92dad0a2-2a37-4ce1-b783-0d1b4f30a00c" alt="LanceDB Cloud Public Beta" width="100%" style="max-width: 100%;">
|
<img src="https://github.com/user-attachments/assets/92dad0a2-2a37-4ce1-b783-0d1b4f30a00c" alt="LanceDB Cloud Public Beta" width="100%" style="max-width: 100%;">
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
<div align="center">
|
<div align="center">
|
||||||
<p align="center">
|
|
||||||
|
|
||||||
<picture>
|
[](https://lancedb.com)
|
||||||
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/ac270358-333e-4bea-a132-acefaa94040e">
|
[](https://lancedb.com/)
|
||||||
<source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0">
|
[](https://blog.lancedb.com/)
|
||||||
<img alt="LanceDB Logo" src="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0" width=300>
|
[](https://discord.gg/zMM32dvNtd)
|
||||||
</picture>
|
[](https://twitter.com/lancedb)
|
||||||
|
[](https://www.linkedin.com/company/lancedb/)
|
||||||
|
|
||||||
**Search More, Manage Less**
|
|
||||||
|
|
||||||
<a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
<img src="docs/src/assets/lancedb.png" alt="LanceDB" width="50%">
|
||||||
<a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
|
||||||
[](https://blog.lancedb.com/)
|
|
||||||
[](https://discord.gg/zMM32dvNtd)
|
|
||||||
[](https://twitter.com/lancedb)
|
|
||||||
[](https://gurubase.io/g/lancedb)
|
|
||||||
|
|
||||||
</p>
|
# **The Multimodal AI Lakehouse**
|
||||||
|
|
||||||
<img max-width="750px" alt="LanceDB Multimodal Search" src="https://github.com/lancedb/lancedb/assets/917119/09c5afc5-7816-4687-bae4-f2ca194426ec">
|
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.github.io/lancedb/) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||||
|
|
||||||
|
**The ultimate multimodal data platform for AI/ML applications.**
|
||||||
|
|
||||||
|
LanceDB is designed for fast, scalable, and production-ready vector search. It is built on top of the Lance columnar format. You can store, index, and search over petabytes of multimodal data and vectors with ease.
|
||||||
|
LanceDB is a central location where developers can build, train and analyze their AI workloads.
|
||||||
|
|
||||||
</p>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<hr />
|
<br>
|
||||||
|
|
||||||
LanceDB is an open-source database for vector-search built with persistent storage, which greatly simplifies retrieval, filtering and management of embeddings.
|
## **Demo: Multimodal Search by Keyword, Vector or with SQL**
|
||||||
|
<img max-width="750px" alt="LanceDB Multimodal Search" src="https://github.com/lancedb/lancedb/assets/917119/09c5afc5-7816-4687-bae4-f2ca194426ec">
|
||||||
|
|
||||||
The key features of LanceDB include:
|
## **Star LanceDB to get updates!**
|
||||||
|
|
||||||
* Production-scale vector search with no servers to manage.
|
<details>
|
||||||
|
<summary>⭐ Click here ⭐ to see how fast we're growing!</summary>
|
||||||
|
<picture>
|
||||||
|
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=lancedb/lancedb&theme=dark&type=Date">
|
||||||
|
<img width="100%" src="https://api.star-history.com/svg?repos=lancedb/lancedb&theme=dark&type=Date">
|
||||||
|
</picture>
|
||||||
|
</details>
|
||||||
|
|
||||||
* Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more).
|
## **Key Features**:
|
||||||
|
|
||||||
* Support for vector similarity search, full-text search and SQL.
|
- **Fast Vector Search**: Search billions of vectors in milliseconds with state-of-the-art indexing.
|
||||||
|
- **Comprehensive Search**: Support for vector similarity search, full-text search and SQL.
|
||||||
|
- **Multimodal Support**: Store, query and filter vectors, metadata and multimodal data (text, images, videos, point clouds, and more).
|
||||||
|
- **Advanced Features**: Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure. GPU support in building vector index.
|
||||||
|
|
||||||
* Native Python and Javascript/Typescript support.
|
### **Products**:
|
||||||
|
- **Open Source & Local**: 100% open source, runs locally or in your cloud. No vendor lock-in.
|
||||||
|
- **Cloud and Enterprise**: Production-scale vector search with no servers to manage. Complete data sovereignty and security.
|
||||||
|
|
||||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
### **Ecosystem**:
|
||||||
|
- **Columnar Storage**: Built on the Lance columnar format for efficient storage and analytics.
|
||||||
|
- **Seamless Integration**: Python, Node.js, Rust, and REST APIs for easy integration. Native Python and Javascript/Typescript support.
|
||||||
|
- **Rich Ecosystem**: Integrations with [**LangChain** 🦜️🔗](https://python.langchain.com/docs/integrations/vectorstores/lancedb/), [**LlamaIndex** 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||||
|
|
||||||
* GPU support in building vector index(*).
|
## **How to Install**:
|
||||||
|
|
||||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/docs/integrations/vectorstores/lancedb/), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
Follow the [Quickstart](https://lancedb.github.io/lancedb/basic/) doc to set up LanceDB locally.
|
||||||
|
|
||||||
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||||
|
|
||||||
## Quick Start
|
| Interface | Documentation |
|
||||||
|
|-----------|---------------|
|
||||||
|
| Python SDK | https://lancedb.github.io/lancedb/python/python/ |
|
||||||
|
| Typescript SDK | https://lancedb.github.io/lancedb/js/globals/ |
|
||||||
|
| Rust SDK | https://docs.rs/lancedb/latest/lancedb/index.html |
|
||||||
|
| REST API | https://docs.lancedb.com/api-reference/introduction |
|
||||||
|
|
||||||
**Javascript**
|
## **Join Us and Contribute**
|
||||||
```shell
|
|
||||||
npm install @lancedb/lancedb
|
|
||||||
```
|
|
||||||
|
|
||||||
```javascript
|
We welcome contributions from everyone! Whether you're a developer, researcher, or just someone who wants to help out.
|
||||||
import * as lancedb from "@lancedb/lancedb";
|
|
||||||
|
|
||||||
const db = await lancedb.connect("data/sample-lancedb");
|
If you have any suggestions or feature requests, please feel free to open an issue on GitHub or discuss it on our [**Discord**](https://discord.gg/G5DcmnZWKB) server.
|
||||||
const table = await db.createTable("vectors", [
|
|
||||||
{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
|
[**Check out the GitHub Issues**](https://github.com/lancedb/lancedb/issues) if you would like to work on the features that are planned for the future. If you have any suggestions or feature requests, please feel free to open an issue on GitHub.
|
||||||
{ id: 2, vector: [1.1, 1.2], item: "bar", price: 50 },
|
|
||||||
], {mode: 'overwrite'});
|
## **Contributors**
|
||||||
|
|
||||||
|
<a href="https://github.com/lancedb/lancedb/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=lancedb/lancedb" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
|
||||||
const query = table.vectorSearch([0.1, 0.3]).limit(2);
|
## **Stay in Touch With Us**
|
||||||
const results = await query.toArray();
|
<div align="center">
|
||||||
|
|
||||||
// You can also search for rows by specific criteria without involving a vector search.
|
</br>
|
||||||
const rowsByCriteria = await table.query().where("price >= 10").toArray();
|
|
||||||
```
|
|
||||||
|
|
||||||
**Python**
|
[](https://lancedb.com/)
|
||||||
```shell
|
[](https://blog.lancedb.com/)
|
||||||
pip install lancedb
|
[](https://discord.gg/zMM32dvNtd)
|
||||||
```
|
[](https://twitter.com/lancedb)
|
||||||
|
[](https://www.linkedin.com/company/lancedb/)
|
||||||
|
|
||||||
```python
|
</div>
|
||||||
import lancedb
|
|
||||||
|
|
||||||
uri = "data/sample-lancedb"
|
|
||||||
db = lancedb.connect(uri)
|
|
||||||
table = db.create_table("my_table",
|
|
||||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
|
||||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
|
|
||||||
result = table.search([100, 100]).limit(2).to_pandas()
|
|
||||||
```
|
|
||||||
|
|
||||||
## Blogs, Tutorials & Videos
|
|
||||||
* 📈 <a href="https://blog.lancedb.com/benchmarking-random-access-in-lance/">2000x better performance with Lance over Parquet</a>
|
|
||||||
* 🤖 <a href="https://github.com/lancedb/vectordb-recipes/tree/main/examples/Youtube-Search-QA-Bot">Build a question and answer bot with LanceDB</a>
|
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
ARCH=${1:-x86_64}
|
|
||||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
|
||||||
|
|
||||||
# We pass down the current user so that when we later mount the local files
|
|
||||||
# into the container, the files are accessible by the current user.
|
|
||||||
pushd ci/manylinux_node
|
|
||||||
docker build \
|
|
||||||
-t lancedb-node-manylinux \
|
|
||||||
--build-arg="ARCH=$ARCH" \
|
|
||||||
--build-arg="DOCKER_USER=$(id -u)" \
|
|
||||||
--progress=plain \
|
|
||||||
.
|
|
||||||
popd
|
|
||||||
|
|
||||||
# We turn on memory swap to avoid OOM killer
|
|
||||||
docker run \
|
|
||||||
-v $(pwd):/io -w /io \
|
|
||||||
--memory-swap=-1 \
|
|
||||||
lancedb-node-manylinux \
|
|
||||||
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
# Builds the macOS artifacts (node binaries).
|
|
||||||
# Usage: ./ci/build_macos_artifacts.sh [target]
|
|
||||||
# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
|
|
||||||
set -e
|
|
||||||
|
|
||||||
prebuild_rust() {
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
pushd rust/ffi/node
|
|
||||||
echo "Building rust library for $1"
|
|
||||||
export RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $1
|
|
||||||
popd
|
|
||||||
}
|
|
||||||
|
|
||||||
build_node_binaries() {
|
|
||||||
pushd node
|
|
||||||
echo "Building node library for $1"
|
|
||||||
npm run build-release -- --target $1
|
|
||||||
npm run pack-build -- --target $1
|
|
||||||
popd
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ -n "$1" ]; then
|
|
||||||
targets=$1
|
|
||||||
else
|
|
||||||
targets="x86_64-apple-darwin aarch64-apple-darwin"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Building artifacts for targets: $targets"
|
|
||||||
for target in $targets
|
|
||||||
do
|
|
||||||
prebuild_rust $target
|
|
||||||
build_node_binaries $target
|
|
||||||
done
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
# Builds the Windows artifacts (node binaries).
|
|
||||||
# Usage: .\ci\build_windows_artifacts.ps1 [target]
|
|
||||||
# Targets supported:
|
|
||||||
# - x86_64-pc-windows-msvc
|
|
||||||
# - i686-pc-windows-msvc
|
|
||||||
# - aarch64-pc-windows-msvc
|
|
||||||
|
|
||||||
function Prebuild-Rust {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
Push-Location -Path "rust/ffi/node"
|
|
||||||
Write-Host "Building rust library for $target"
|
|
||||||
$env:RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
function Build-NodeBinaries {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
Push-Location -Path "node"
|
|
||||||
Write-Host "Building node library for $target"
|
|
||||||
npm run build-release -- --target $target
|
|
||||||
npm run pack-build -- --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
$targets = $args[0]
|
|
||||||
if (-not $targets) {
|
|
||||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Host "Building artifacts for targets: $targets"
|
|
||||||
foreach ($target in $targets) {
|
|
||||||
Prebuild-Rust $target
|
|
||||||
Build-NodeBinaries $target
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
# Builds the Windows artifacts (nodejs binaries).
|
|
||||||
# Usage: .\ci\build_windows_artifacts_nodejs.ps1 [target]
|
|
||||||
# Targets supported:
|
|
||||||
# - x86_64-pc-windows-msvc
|
|
||||||
# - i686-pc-windows-msvc
|
|
||||||
# - aarch64-pc-windows-msvc
|
|
||||||
|
|
||||||
function Prebuild-Rust {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
# Building here for the sake of easier debugging.
|
|
||||||
Push-Location -Path "rust/lancedb"
|
|
||||||
Write-Host "Building rust library for $target"
|
|
||||||
$env:RUST_BACKTRACE=1
|
|
||||||
cargo build --release --target $target
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
function Build-NodeBinaries {
|
|
||||||
param (
|
|
||||||
[string]$target
|
|
||||||
)
|
|
||||||
|
|
||||||
Push-Location -Path "nodejs"
|
|
||||||
Write-Host "Building nodejs library for $target"
|
|
||||||
$env:RUST_TARGET=$target
|
|
||||||
npm run build-release
|
|
||||||
Pop-Location
|
|
||||||
}
|
|
||||||
|
|
||||||
$targets = $args[0]
|
|
||||||
if (-not $targets) {
|
|
||||||
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Host "Building artifacts for targets: $targets"
|
|
||||||
foreach ($target in $targets) {
|
|
||||||
Prebuild-Rust $target
|
|
||||||
Build-NodeBinaries $target
|
|
||||||
}
|
|
||||||
4
ci/create_lancedb_test_connection.sh
Executable file
4
ci/create_lancedb_test_connection.sh
Executable file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
export RUST_LOG=info
|
||||||
|
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
|
|
||||||
# This container allows building the node modules native libraries in an
|
|
||||||
# environment with a very old glibc, so that we are compatible with a wide
|
|
||||||
# range of linux distributions.
|
|
||||||
ARG ARCH=x86_64
|
|
||||||
|
|
||||||
FROM quay.io/pypa/manylinux_2_28_${ARCH}
|
|
||||||
|
|
||||||
ARG ARCH=x86_64
|
|
||||||
ARG DOCKER_USER=default_user
|
|
||||||
|
|
||||||
# Protobuf is also installed as root.
|
|
||||||
COPY install_protobuf.sh install_protobuf.sh
|
|
||||||
RUN ./install_protobuf.sh ${ARCH}
|
|
||||||
|
|
||||||
ENV DOCKER_USER=${DOCKER_USER}
|
|
||||||
# Create a group and user, but only if it doesn't exist
|
|
||||||
RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
|
|
||||||
|
|
||||||
# We switch to the user to install Rust and Node, since those like to be
|
|
||||||
# installed at the user level.
|
|
||||||
USER ${DOCKER_USER}
|
|
||||||
|
|
||||||
COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
|
|
||||||
RUN cp /prepare_manylinux_node.sh $HOME/ && \
|
|
||||||
cd $HOME && \
|
|
||||||
./prepare_manylinux_node.sh ${ARCH}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
|
|
||||||
set -e
|
|
||||||
ARCH=${1:-x86_64}
|
|
||||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
|
||||||
|
|
||||||
#Alpine doesn't have .bashrc
|
|
||||||
FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
|
||||||
|
|
||||||
cd node
|
|
||||||
npm ci
|
|
||||||
npm run build-release
|
|
||||||
npm run pack-build -- -t $TARGET_TRIPLE
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Installs protobuf compiler. Should be run as root.
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [[ $1 == x86_64* ]]; then
|
|
||||||
ARCH=x86_64
|
|
||||||
else
|
|
||||||
# gnu target
|
|
||||||
ARCH=aarch_64
|
|
||||||
fi
|
|
||||||
|
|
||||||
PB_REL=https://github.com/protocolbuffers/protobuf/releases
|
|
||||||
PB_VERSION=23.1
|
|
||||||
curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
|
|
||||||
unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
install_node() {
|
|
||||||
echo "Installing node..."
|
|
||||||
|
|
||||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
|
|
||||||
|
|
||||||
source "$HOME"/.bashrc
|
|
||||||
|
|
||||||
nvm install --no-progress 18
|
|
||||||
}
|
|
||||||
|
|
||||||
install_rust() {
|
|
||||||
echo "Installing rust..."
|
|
||||||
curl https://sh.rustup.rs -sSf | bash -s -- -y
|
|
||||||
export PATH="$PATH:/root/.cargo/bin"
|
|
||||||
}
|
|
||||||
|
|
||||||
install_node
|
|
||||||
install_rust
|
|
||||||
18
ci/run_with_docker_compose.sh
Executable file
18
ci/run_with_docker_compose.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# A script for running the given command together with a docker compose environment.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Bring down the docker setup once the command is done running.
|
||||||
|
tear_down() {
|
||||||
|
docker compose -p fixture down
|
||||||
|
}
|
||||||
|
trap tear_down EXIT
|
||||||
|
|
||||||
|
set +xe
|
||||||
|
|
||||||
|
# Clean up any existing docker setup and bring up a new one.
|
||||||
|
docker compose -p fixture up --detach --wait || exit 1
|
||||||
|
|
||||||
|
"${@}"
|
||||||
68
ci/run_with_test_connection.sh
Executable file
68
ci/run_with_test_connection.sh
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# A script for running the given command together with the lancedb cli.
|
||||||
|
#
|
||||||
|
|
||||||
|
die() {
|
||||||
|
echo $?
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
check_command_exists() {
|
||||||
|
command="${1}"
|
||||||
|
which ${command} &> /dev/null || \
|
||||||
|
die "Unable to locate command: ${command}. Did you install it?"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ ! -e ./lancedb ]]; then
|
||||||
|
if [[ -v SOPHON_READ_TOKEN ]]; then
|
||||||
|
INPUT="lancedb-linux-x64"
|
||||||
|
gh release \
|
||||||
|
--repo lancedb/lancedb \
|
||||||
|
download ci-support-binaries \
|
||||||
|
--pattern "${INPUT}" \
|
||||||
|
|| die "failed to fetch cli."
|
||||||
|
check_command_exists openssl
|
||||||
|
openssl enc -aes-256-cbc \
|
||||||
|
-d -pbkdf2 \
|
||||||
|
-pass "env:SOPHON_READ_TOKEN" \
|
||||||
|
-in "${INPUT}" \
|
||||||
|
-out ./lancedb-linux-x64.tar.gz \
|
||||||
|
|| die "openssl failed"
|
||||||
|
TARGET="${INPUT}.tar.gz"
|
||||||
|
else
|
||||||
|
ARCH="x64"
|
||||||
|
if [[ $OSTYPE == 'darwin'* ]]; then
|
||||||
|
UNAME=$(uname -m)
|
||||||
|
if [[ $UNAME == 'arm64' ]]; then
|
||||||
|
ARCH='arm64'
|
||||||
|
fi
|
||||||
|
OSTYPE="macos"
|
||||||
|
elif [[ $OSTYPE == 'linux'* ]]; then
|
||||||
|
if [[ $UNAME == 'aarch64' ]]; then
|
||||||
|
ARCH='arm64'
|
||||||
|
fi
|
||||||
|
OSTYPE="linux"
|
||||||
|
else
|
||||||
|
die "unknown OSTYPE: $OSTYPE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_command_exists gh
|
||||||
|
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
|
||||||
|
gh release \
|
||||||
|
--repo lancedb/sophon \
|
||||||
|
download lancedb-cli-v0.0.3 \
|
||||||
|
--pattern "${TARGET}" \
|
||||||
|
|| die "failed to fetch cli."
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_command_exists tar
|
||||||
|
tar xvf "${TARGET}" || die "tar failed."
|
||||||
|
[[ -e ./lancedb ]] || die "failed to extract lancedb."
|
||||||
|
fi
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||||
|
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
|
||||||
|
|
||||||
|
"${@}"
|
||||||
270
ci/set_lance_version.py
Normal file
270
ci/set_lance_version.py
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(command: str) -> str:
|
||||||
|
"""
|
||||||
|
Run a shell command and return stdout as a string.
|
||||||
|
If exit code is not 0, raise an exception with the stderr output.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
result = subprocess.run(command, shell=True, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise Exception(f"Command failed with error: {result.stderr.strip()}")
|
||||||
|
return result.stdout.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_stable_version() -> str:
|
||||||
|
version_line = run_command("cargo info lance | grep '^version:'")
|
||||||
|
# Example output: "version: 0.35.0 (latest 0.37.0)"
|
||||||
|
match = re.search(r'\(latest ([0-9.]+)\)', version_line)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
# Fallback: use the first version after 'version:'
|
||||||
|
return version_line.split("version:")[1].split()[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_preview_version() -> str:
|
||||||
|
lance_tags = run_command(
|
||||||
|
"git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
|
||||||
|
).splitlines()
|
||||||
|
lance_tags = (
|
||||||
|
tag.split("refs/tags/")[1]
|
||||||
|
for tag in lance_tags
|
||||||
|
if "refs/tags/" in tag and "beta" in tag
|
||||||
|
)
|
||||||
|
from packaging.version import Version
|
||||||
|
|
||||||
|
latest = max(
|
||||||
|
(tag[1:] for tag in lance_tags if tag.startswith("v")), key=lambda t: Version(t)
|
||||||
|
)
|
||||||
|
return str(latest)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_features(line: str) -> list:
|
||||||
|
"""
|
||||||
|
Extracts the features from a line in Cargo.toml.
|
||||||
|
Example: 'lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }'
|
||||||
|
Returns: ['dynamodb']
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
features_str = match.group(1)
|
||||||
|
return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def extract_default_features(line: str) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if default-features = false is present in a line in Cargo.toml.
|
||||||
|
Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
|
||||||
|
Returns: True if default-features = false is present, False otherwise
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
match = re.search(r'default-features\s*=\s*false', line)
|
||||||
|
return match is not None
|
||||||
|
|
||||||
|
|
||||||
|
def dict_to_toml_line(package_name: str, config: dict) -> str:
|
||||||
|
"""
|
||||||
|
Converts a configuration dictionary to a TOML dependency line.
|
||||||
|
Dictionary insertion order is preserved (Python 3.7+), so the caller
|
||||||
|
controls the order of fields in the output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
package_name: The name of the package (e.g., "lance", "lance-io")
|
||||||
|
config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
|
||||||
|
The order of keys in this dict determines the order in the output.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A properly formatted TOML line with a trailing newline
|
||||||
|
"""
|
||||||
|
# If only version is specified, use simple format
|
||||||
|
if len(config) == 1 and "version" in config:
|
||||||
|
return f'{package_name} = "{config["version"]}"\n'
|
||||||
|
|
||||||
|
# Otherwise, use inline table format
|
||||||
|
parts = []
|
||||||
|
for key, value in config.items():
|
||||||
|
if key == "default-features" and not value:
|
||||||
|
parts.append("default-features = false")
|
||||||
|
elif key == "features":
|
||||||
|
parts.append(f'"features" = {json.dumps(value)}')
|
||||||
|
elif isinstance(value, str):
|
||||||
|
parts.append(f'"{key}" = "{value}"')
|
||||||
|
else:
|
||||||
|
# This shouldn't happen with our current usage
|
||||||
|
parts.append(f'"{key}" = {json.dumps(value)}')
|
||||||
|
|
||||||
|
return f'{package_name} = {{ {", ".join(parts)} }}\n'
|
||||||
|
|
||||||
|
|
||||||
|
def update_cargo_toml(line_updater):
|
||||||
|
"""
|
||||||
|
Updates the Cargo.toml file by applying the line_updater function to each line.
|
||||||
|
The line_updater function should take a line as input and return the updated line.
|
||||||
|
"""
|
||||||
|
with open("Cargo.toml", "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
new_lines = []
|
||||||
|
lance_line = ""
|
||||||
|
is_parsing_lance_line = False
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("lance") and not line.startswith("lance-namespace"):
|
||||||
|
# Check if this is a single-line or multi-line entry
|
||||||
|
# Single-line entries either:
|
||||||
|
# 1. End with } (complete inline table)
|
||||||
|
# 2. End with " (simple version string)
|
||||||
|
# Multi-line entries start with { but don't end with }
|
||||||
|
if line.strip().endswith("}") or line.strip().endswith('"'):
|
||||||
|
# Single-line entry - process immediately
|
||||||
|
new_lines.append(line_updater(line))
|
||||||
|
elif "{" in line and not line.strip().endswith("}"):
|
||||||
|
# Multi-line entry - start accumulating
|
||||||
|
lance_line = line
|
||||||
|
is_parsing_lance_line = True
|
||||||
|
else:
|
||||||
|
# Single-line entry without quotes or braces (shouldn't happen but handle it)
|
||||||
|
new_lines.append(line_updater(line))
|
||||||
|
elif is_parsing_lance_line:
|
||||||
|
lance_line += line
|
||||||
|
if line.strip().endswith("}"):
|
||||||
|
new_lines.append(line_updater(lance_line))
|
||||||
|
lance_line = ""
|
||||||
|
is_parsing_lance_line = False
|
||||||
|
else:
|
||||||
|
# Keep the line unchanged
|
||||||
|
new_lines.append(line)
|
||||||
|
|
||||||
|
with open("Cargo.toml", "w") as f:
|
||||||
|
f.writelines(new_lines)
|
||||||
|
|
||||||
|
|
||||||
|
def set_stable_version(version: str):
|
||||||
|
"""
|
||||||
|
Sets lines to
|
||||||
|
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
|
||||||
|
lance-io = { "version" = "=0.29.0", default-features = false }
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def line_updater(line: str) -> str:
|
||||||
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
|
|
||||||
|
# Build config in desired order: version, default-features, features
|
||||||
|
config = {"version": f"={version}"}
|
||||||
|
|
||||||
|
if extract_default_features(line):
|
||||||
|
config["default-features"] = False
|
||||||
|
|
||||||
|
features = extract_features(line)
|
||||||
|
if features:
|
||||||
|
config["features"] = features
|
||||||
|
|
||||||
|
return dict_to_toml_line(package_name, config)
|
||||||
|
|
||||||
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
|
|
||||||
|
def set_preview_version(version: str):
|
||||||
|
"""
|
||||||
|
Sets lines to
|
||||||
|
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def line_updater(line: str) -> str:
|
||||||
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
|
base_version = version.split("-")[0] # Get the base version without beta suffix
|
||||||
|
|
||||||
|
# Build config in desired order: version, default-features, features, tag, git
|
||||||
|
config = {"version": f"={base_version}"}
|
||||||
|
|
||||||
|
if extract_default_features(line):
|
||||||
|
config["default-features"] = False
|
||||||
|
|
||||||
|
features = extract_features(line)
|
||||||
|
if features:
|
||||||
|
config["features"] = features
|
||||||
|
|
||||||
|
config["tag"] = f"v{version}"
|
||||||
|
config["git"] = "https://github.com/lancedb/lance.git"
|
||||||
|
|
||||||
|
return dict_to_toml_line(package_name, config)
|
||||||
|
|
||||||
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
|
|
||||||
|
def set_local_version():
|
||||||
|
"""
|
||||||
|
Sets lines to
|
||||||
|
lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
|
||||||
|
lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def line_updater(line: str) -> str:
|
||||||
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
|
|
||||||
|
# Build config in desired order: path, default-features, features
|
||||||
|
config = {"path": f"../lance/rust/{package_name}"}
|
||||||
|
|
||||||
|
if extract_default_features(line):
|
||||||
|
config["default-features"] = False
|
||||||
|
|
||||||
|
features = extract_features(line)
|
||||||
|
if features:
|
||||||
|
config["features"] = features
|
||||||
|
|
||||||
|
return dict_to_toml_line(package_name, config)
|
||||||
|
|
||||||
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
|
||||||
|
parser.add_argument(
|
||||||
|
"version",
|
||||||
|
type=str,
|
||||||
|
help="The version to set for the Lance package. Use 'stable' for the latest stable version, 'preview' for latest preview version, or a specific version number (e.g., '0.1.0'). You can also specify 'local' to use a local path.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.version == "stable":
|
||||||
|
latest_stable_version = get_latest_stable_version()
|
||||||
|
print(
|
||||||
|
f"Found latest stable version: \033[1mv{latest_stable_version}\033[0m",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
set_stable_version(latest_stable_version)
|
||||||
|
elif args.version == "preview":
|
||||||
|
latest_preview_version = get_latest_preview_version()
|
||||||
|
print(
|
||||||
|
f"Found latest preview version: \033[1mv{latest_preview_version}\033[0m",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
set_preview_version(latest_preview_version)
|
||||||
|
elif args.version == "local":
|
||||||
|
set_local_version()
|
||||||
|
else:
|
||||||
|
# Parse the version number.
|
||||||
|
version = args.version
|
||||||
|
# Ignore initial v if present.
|
||||||
|
if version.startswith("v"):
|
||||||
|
version = version[1:]
|
||||||
|
|
||||||
|
if "beta" in version:
|
||||||
|
set_preview_version(version)
|
||||||
|
else:
|
||||||
|
set_stable_version(version)
|
||||||
|
|
||||||
|
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||||
|
run_command("cargo metadata > /dev/null")
|
||||||
|
print(" done.", file=sys.stderr)
|
||||||
27
ci/update_lockfiles.sh
Executable file
27
ci/update_lockfiles.sh
Executable file
@@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
AMEND=false
|
||||||
|
|
||||||
|
for arg in "$@"; do
|
||||||
|
if [[ "$arg" == "--amend" ]]; then
|
||||||
|
AMEND=true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# This updates the lockfile without building
|
||||||
|
cargo metadata --quiet > /dev/null
|
||||||
|
|
||||||
|
pushd nodejs || exit 1
|
||||||
|
npm install --package-lock-only --silent
|
||||||
|
popd
|
||||||
|
|
||||||
|
if git diff --quiet --exit-code; then
|
||||||
|
echo "No lockfile changes to commit; skipping amend."
|
||||||
|
elif $AMEND; then
|
||||||
|
git add Cargo.lock nodejs/package-lock.json
|
||||||
|
git commit --amend --no-edit
|
||||||
|
else
|
||||||
|
git add Cargo.lock nodejs/package-lock.json
|
||||||
|
git commit -m "Update lockfiles"
|
||||||
|
fi
|
||||||
@@ -70,6 +70,23 @@ plugins:
|
|||||||
- mkdocs-jupyter
|
- mkdocs-jupyter
|
||||||
- render_swagger:
|
- render_swagger:
|
||||||
allow_arbitrary_locations: true
|
allow_arbitrary_locations: true
|
||||||
|
- redirects:
|
||||||
|
redirect_maps:
|
||||||
|
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
|
||||||
|
# other sub-pages are handled by the ingected js in overrides/partials/header.html
|
||||||
|
'index.md': 'https://lancedb.com/docs/'
|
||||||
|
'guides/tables.md': 'https://lancedb.com/docs/tables/'
|
||||||
|
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
|
||||||
|
'basic.md': 'https://lancedb.com/docs/quickstart/'
|
||||||
|
'faq.md': 'https://lancedb.com/docs/faq/'
|
||||||
|
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
|
||||||
|
'integrations.md': 'https://lancedb.com/docs/integrations/'
|
||||||
|
'examples.md': 'https://lancedb.com/docs/tutorials/'
|
||||||
|
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
|
||||||
|
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
|
||||||
|
'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
markdown_extensions:
|
markdown_extensions:
|
||||||
- admonition
|
- admonition
|
||||||
@@ -105,8 +122,7 @@ markdown_extensions:
|
|||||||
nav:
|
nav:
|
||||||
- Home:
|
- Home:
|
||||||
- LanceDB: index.md
|
- LanceDB: index.md
|
||||||
- 👉 Quickstart: quickstart.md
|
- 🏃🏼♂️ Quick start: basic.md
|
||||||
- 🏃🏼♂️ Basic Usage: basic.md
|
|
||||||
- 📚 Concepts:
|
- 📚 Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
@@ -194,6 +210,7 @@ nav:
|
|||||||
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
||||||
- Polars: python/polars_arrow.md
|
- Polars: python/polars_arrow.md
|
||||||
- DuckDB: python/duckdb.md
|
- DuckDB: python/duckdb.md
|
||||||
|
- Datafusion: python/datafusion.md
|
||||||
- LangChain:
|
- LangChain:
|
||||||
- LangChain 🔗: integrations/langchain.md
|
- LangChain 🔗: integrations/langchain.md
|
||||||
- LangChain demo: notebooks/langchain_demo.ipynb
|
- LangChain demo: notebooks/langchain_demo.ipynb
|
||||||
@@ -206,6 +223,7 @@ nav:
|
|||||||
- PromptTools: integrations/prompttools.md
|
- PromptTools: integrations/prompttools.md
|
||||||
- dlt: integrations/dlt.md
|
- dlt: integrations/dlt.md
|
||||||
- phidata: integrations/phidata.md
|
- phidata: integrations/phidata.md
|
||||||
|
- Genkit: integrations/genkit.md
|
||||||
- 🎯 Examples:
|
- 🎯 Examples:
|
||||||
- Overview: examples/index.md
|
- Overview: examples/index.md
|
||||||
- 🐍 Python:
|
- 🐍 Python:
|
||||||
@@ -238,9 +256,7 @@ nav:
|
|||||||
- 👾 JavaScript (lancedb): js/globals.md
|
- 👾 JavaScript (lancedb): js/globals.md
|
||||||
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
||||||
|
|
||||||
- Getting Started:
|
- Quick start: basic.md
|
||||||
- Quickstart: quickstart.md
|
|
||||||
- Basic Usage: basic.md
|
|
||||||
- Concepts:
|
- Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
@@ -250,6 +266,7 @@ nav:
|
|||||||
- Data management: concepts/data_management.md
|
- Data management: concepts/data_management.md
|
||||||
- Guides:
|
- Guides:
|
||||||
- Working with tables: guides/tables.md
|
- Working with tables: guides/tables.md
|
||||||
|
- Working with SQL: guides/sql_querying.md
|
||||||
- Building an ANN index: ann_indexes.md
|
- Building an ANN index: ann_indexes.md
|
||||||
- Vector Search: search.md
|
- Vector Search: search.md
|
||||||
- Full-text search (native): fts.md
|
- Full-text search (native): fts.md
|
||||||
@@ -326,6 +343,7 @@ nav:
|
|||||||
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
||||||
- Polars: python/polars_arrow.md
|
- Polars: python/polars_arrow.md
|
||||||
- DuckDB: python/duckdb.md
|
- DuckDB: python/duckdb.md
|
||||||
|
- Datafusion: python/datafusion.md
|
||||||
- LangChain 🦜️🔗↗: integrations/langchain.md
|
- LangChain 🦜️🔗↗: integrations/langchain.md
|
||||||
- LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
|
- LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
|
||||||
- LlamaIndex 🦙↗: integrations/llamaIndex.md
|
- LlamaIndex 🦙↗: integrations/llamaIndex.md
|
||||||
@@ -334,6 +352,7 @@ nav:
|
|||||||
- PromptTools: integrations/prompttools.md
|
- PromptTools: integrations/prompttools.md
|
||||||
- dlt: integrations/dlt.md
|
- dlt: integrations/dlt.md
|
||||||
- phidata: integrations/phidata.md
|
- phidata: integrations/phidata.md
|
||||||
|
- Genkit: integrations/genkit.md
|
||||||
- Examples:
|
- Examples:
|
||||||
- examples/index.md
|
- examples/index.md
|
||||||
- 🐍 Python:
|
- 🐍 Python:
|
||||||
|
|||||||
@@ -19,7 +19,13 @@
|
|||||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
IN THE SOFTWARE.
|
IN THE SOFTWARE.
|
||||||
-->
|
-->
|
||||||
|
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
|
||||||
|
<p style="margin: 0; font-size: 1.1em;">
|
||||||
|
<strong>This documentation site is deprecated.</strong>
|
||||||
|
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
|
||||||
|
lancedb.com/docs</a> for the latest information.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
{% set class = "md-header" %}
|
{% set class = "md-header" %}
|
||||||
{% if "navigation.tabs.sticky" in features %}
|
{% if "navigation.tabs.sticky" in features %}
|
||||||
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||||
@@ -174,3 +180,76 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
(function() {
|
||||||
|
function checkPathAndRedirect() {
|
||||||
|
var banner = document.getElementById('deprecation-banner');
|
||||||
|
|
||||||
|
if (document.querySelector('meta[http-equiv="refresh"]')) {
|
||||||
|
return; // The redirects plugin is already handling this page.
|
||||||
|
}
|
||||||
|
|
||||||
|
var currentPath = window.location.pathname;
|
||||||
|
|
||||||
|
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
|
||||||
|
? currentPath.slice(0, -1)
|
||||||
|
: currentPath;
|
||||||
|
|
||||||
|
// These are the ONLY paths that should remain on the old site
|
||||||
|
var apiPaths = [
|
||||||
|
'/lancedb/python',
|
||||||
|
'/lancedb/javascript',
|
||||||
|
'/lancedb/js',
|
||||||
|
'/lancedb/api_reference'
|
||||||
|
];
|
||||||
|
|
||||||
|
var isApiPage = apiPaths.some(function(apiPath) {
|
||||||
|
return cleanPath.startsWith(apiPath);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (isApiPage) {
|
||||||
|
if (banner) {
|
||||||
|
banner.style.display = 'none';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (banner) {
|
||||||
|
banner.style.display = 'block';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add noindex meta tag to prevent indexing of old docs for seo
|
||||||
|
var noindexMeta = document.createElement('meta');
|
||||||
|
noindexMeta.setAttribute('name', 'robots');
|
||||||
|
noindexMeta.setAttribute('content', 'noindex, follow');
|
||||||
|
document.head.appendChild(noindexMeta);
|
||||||
|
|
||||||
|
// Add canonical link to point to the new docs to reward new site for seo
|
||||||
|
var canonicalLink = document.createElement('link');
|
||||||
|
canonicalLink.setAttribute('rel', 'canonical');
|
||||||
|
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
|
||||||
|
document.head.appendChild(canonicalLink);
|
||||||
|
|
||||||
|
window.location.replace('https://lancedb.com/docs');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the check only if doc is ready. This makes sure we catch the initial load
|
||||||
|
// and redirect.
|
||||||
|
if (document.readyState === 'loading') {
|
||||||
|
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
|
||||||
|
} else {
|
||||||
|
checkPathAndRedirect();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use an interval to handle subsequent navigation clicks.
|
||||||
|
var lastPath = window.location.pathname;
|
||||||
|
setInterval(function() {
|
||||||
|
if (window.location.pathname !== lastPath) {
|
||||||
|
lastPath = window.location.pathname;
|
||||||
|
checkPathAndRedirect();
|
||||||
|
}
|
||||||
|
}, 2000); // keeping it 2 second to make it easy for user to understand
|
||||||
|
// what's happening
|
||||||
|
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
5
docs/overrides/partials/main.html
Normal file
5
docs/overrides/partials/main.html
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block announce %}
|
||||||
|
📚 Starting June 1st, 2025, please use <a href="https://lancedb.github.io/documentation" target="_blank" rel="noopener noreferrer">lancedb.github.io/documentation</a> for the latest docs.
|
||||||
|
{% endblock %}
|
||||||
12
docs/package-lock.json
generated
12
docs/package-lock.json
generated
@@ -19,7 +19,7 @@
|
|||||||
},
|
},
|
||||||
"../node": {
|
"../node": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.12.0",
|
"version": "0.21.2-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -65,11 +65,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.12.0",
|
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.12.0",
|
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.12.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.12.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
|||||||
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
|
|||||||
griffe
|
griffe
|
||||||
mkdocs-render-swagger-plugin
|
mkdocs-render-swagger-plugin
|
||||||
pydantic
|
pydantic
|
||||||
|
mkdocs-redirects
|
||||||
|
|||||||
@@ -291,7 +291,7 @@ Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` t
|
|||||||
|
|
||||||
`num_partitions` is used to decide how many partitions the first level `IVF` index uses.
|
`num_partitions` is used to decide how many partitions the first level `IVF` index uses.
|
||||||
Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
|
Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
|
||||||
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.
|
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 4K-8K rows lead to a good latency / recall.
|
||||||
|
|
||||||
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
|
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
|
||||||
PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
|
PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
|
||||||
|
|||||||
BIN
docs/src/assets/hero-header.png
Normal file
BIN
docs/src/assets/hero-header.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 MiB |
BIN
docs/src/assets/lancedb.png
Normal file
BIN
docs/src/assets/lancedb.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 40 KiB |
@@ -1,4 +1,4 @@
|
|||||||
# Basic Usage
|
# Quick start
|
||||||
|
|
||||||
!!! info "LanceDB can be run in a number of ways:"
|
!!! info "LanceDB can be run in a number of ways:"
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ Depending on the use case and dataset, optimal compaction will have different re
|
|||||||
- It’s always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
|
- It’s always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
|
||||||
- Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)
|
- Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
LanceDB Cloud/Enterprise supports [auto-compaction](https://docs.lancedb.com/enterprise/architecture/architecture#write-path) which automatically optimizes fragments in the background as data changes.
|
||||||
|
|
||||||
## Deletion
|
## Deletion
|
||||||
|
|
||||||
Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
|
Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
|
||||||
@@ -50,13 +54,9 @@ Reindexing is the process of updating the index to account for new data, keeping
|
|||||||
|
|
||||||
Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.
|
Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.
|
||||||
|
|
||||||
When a reindex job is triggered in the background, the entire data is reindexed, but in the interim as new queries come in, LanceDB will combine results from the existing index with exhaustive kNN search on the new data. This is done to ensure that you're still searching on all your data, but it does come at a performance cost. The more data that you add without reindexing, the impact on latency (due to exhaustive search) can be noticeable.
|
In LanceDB OSS, re-indexing happens synchronously when you call either `create_index` or `optimize` on a table. In LanceDB Cloud, re-indexing happens asynchronously as you add and update data in your table.
|
||||||
|
|
||||||
### Vector reindex
|
By default, queries will search new data even if it has yet to be indexed. This is done using brute-force methods, such as kNN for vector search, and combined with the fast index search results. This is done to ensure that you're always searching over all your data, but it does come at a performance cost. Without reindexing, adding more data to a table will make queries slower and more expensive. This behavior can be disabled by setting the [fast_search](https://lancedb.github.io/lancedb/python/python/#lancedb.query.AsyncQuery.fast_search) parameter which will instruct the query to ignore un-indexed data.
|
||||||
|
|
||||||
* LanceDB Cloud supports incremental reindexing, where a background process will trigger a new index build for you automatically when new data is added to a dataset
|
* LanceDB Cloud/Enterprise supports [automatic incremental reindexing](https://docs.lancedb.com/core#vector-index) for vector, scalar, and FTS indices, where a background process will trigger a new index build for you automatically when new data is added or modified in a dataset
|
||||||
* LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
|
* LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
|
||||||
|
|
||||||
### FTS reindex
|
|
||||||
|
|
||||||
FTS reindexing is supported in both LanceDB OSS and Cloud, but requires that it's manually rebuilt once you have a significant enough amount of new data added that needs to be reindexed. We [updated](https://github.com/lancedb/lancedb/pull/762) Tantivy's default heap size from 128MB to 1GB in LanceDB to make it much faster to reindex, by up to 10x from the default settings.
|
|
||||||
|
|||||||
60
docs/src/guides/sql_querying.md
Normal file
60
docs/src/guides/sql_querying.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# SQL Querying
|
||||||
|
|
||||||
|
You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
|
||||||
|
This guide will show how to query Lance tables them using both.
|
||||||
|
|
||||||
|
We will re-use the dataset [created previously](./tables.md):
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
db = lancedb.connect("data/sample-lancedb")
|
||||||
|
data = [
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}
|
||||||
|
]
|
||||||
|
table = db.create_table("pd_table", data=data)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Querying a LanceDB Table with DuckDb
|
||||||
|
|
||||||
|
The `to_lance` method converts the LanceDB table to a `LanceDataset`, which is accessible to DuckDB through the Arrow compatibility layer.
|
||||||
|
To query the resulting Lance dataset in DuckDB, all you need to do is reference the dataset by the same name in your SQL query.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import duckdb
|
||||||
|
|
||||||
|
arrow_table = table.to_lance()
|
||||||
|
|
||||||
|
duckdb.query("SELECT * FROM arrow_table")
|
||||||
|
```
|
||||||
|
|
||||||
|
| vector | item | price |
|
||||||
|
| ----------- | ---- | ----- |
|
||||||
|
| [3.1, 4.1] | foo | 10.0 |
|
||||||
|
| [5.9, 26.5] | bar | 20.0 |
|
||||||
|
|
||||||
|
## Querying a LanceDB Table with Apache Datafusion
|
||||||
|
|
||||||
|
Have the required imports before doing any querying.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
--8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb"
|
||||||
|
--8<-- "python/python/tests/docs/test_guide_tables.py:import-session-context"
|
||||||
|
--8<-- "python/python/tests/docs/test_guide_tables.py:import-ffi-dataset"
|
||||||
|
```
|
||||||
|
|
||||||
|
Register the table created with the Datafusion session context.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
--8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
|
||||||
|
```
|
||||||
|
|
||||||
|
| vector | item | price |
|
||||||
|
| ----------- | ---- | ----- |
|
||||||
|
| [3.1, 4.1] | foo | 10.0 |
|
||||||
|
| [5.9, 26.5] | bar | 20.0 |
|
||||||
183
docs/src/integrations/genkit.md
Normal file
183
docs/src/integrations/genkit.md
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
### genkitx-lancedb
|
||||||
|
This is a lancedb plugin for genkit framework. It allows you to use LanceDB for ingesting and rereiving data using genkit framework.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Installation
|
||||||
|
```bash
|
||||||
|
pnpm install genkitx-lancedb
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Adding LanceDB plugin to your genkit instance.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { lancedbIndexerRef, lancedb, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb';
|
||||||
|
import { textEmbedding004, vertexAI } from '@genkit-ai/vertexai';
|
||||||
|
import { gemini } from '@genkit-ai/vertexai';
|
||||||
|
import { z, genkit } from 'genkit';
|
||||||
|
import { Document } from 'genkit/retriever';
|
||||||
|
import { chunk } from 'llm-chunk';
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
|
import path from 'path';
|
||||||
|
import pdf from 'pdf-parse/lib/pdf-parse';
|
||||||
|
|
||||||
|
const ai = genkit({
|
||||||
|
plugins: [
|
||||||
|
// vertexAI provides the textEmbedding004 embedder
|
||||||
|
vertexAI(),
|
||||||
|
|
||||||
|
// the local vector store requires an embedder to translate from text to vector
|
||||||
|
lancedb([
|
||||||
|
{
|
||||||
|
dbUri: '.db', // optional lancedb uri, default to .db
|
||||||
|
tableName: 'table', // optional table name, default to table
|
||||||
|
embedder: textEmbedding004,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run this app with the following command:
|
||||||
|
```bash
|
||||||
|
genkit start -- tsx --watch src/index.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
This'll add LanceDB as a retriever and indexer to the genkit instance. You can see it in the GUI view
|
||||||
|
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/e752f7f4-785b-4797-a11e-72ab06a531b7" />
|
||||||
|
|
||||||
|
**Testing retrieval on a sample table**
|
||||||
|
Let's see the raw retrieval results
|
||||||
|
|
||||||
|
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/b8d356ed-8421-4790-8fc0-d6af563b9657" />
|
||||||
|
On running this query, you'll 5 results fetched from the lancedb table, where each result looks something like this:
|
||||||
|
<img width="1417" alt="Screenshot 2025-05-11 at 7 21 18 PM" src="https://github.com/user-attachments/assets/77429525-36e2-4da6-a694-e58c1cf9eb83" />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Creating a custom RAG flow
|
||||||
|
|
||||||
|
Now that we've seen how you can use LanceDB for in a genkit pipeline, let's refine the flow and create a RAG. A RAG flow will consist of an index and a retreiver with its outputs postprocessed an fed into an LLM for final response
|
||||||
|
|
||||||
|
### Creating custom indexer flows
|
||||||
|
You can also create custom indexer flows, utilizing more options and features provided by LanceDB.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
export const menuPdfIndexer = lancedbIndexerRef({
|
||||||
|
// Using all defaults, for dbUri, tableName, and embedder, etc
|
||||||
|
});
|
||||||
|
|
||||||
|
const chunkingConfig = {
|
||||||
|
minLength: 1000,
|
||||||
|
maxLength: 2000,
|
||||||
|
splitter: 'sentence',
|
||||||
|
overlap: 100,
|
||||||
|
delimiters: '',
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
|
||||||
|
async function extractTextFromPdf(filePath: string) {
|
||||||
|
const pdfFile = path.resolve(filePath);
|
||||||
|
const dataBuffer = await readFile(pdfFile);
|
||||||
|
const data = await pdf(dataBuffer);
|
||||||
|
return data.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const indexMenu = ai.defineFlow(
|
||||||
|
{
|
||||||
|
name: 'indexMenu',
|
||||||
|
inputSchema: z.string().describe('PDF file path'),
|
||||||
|
outputSchema: z.void(),
|
||||||
|
},
|
||||||
|
async (filePath: string) => {
|
||||||
|
filePath = path.resolve(filePath);
|
||||||
|
|
||||||
|
// Read the pdf.
|
||||||
|
const pdfTxt = await ai.run('extract-text', () =>
|
||||||
|
extractTextFromPdf(filePath)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Divide the pdf text into segments.
|
||||||
|
const chunks = await ai.run('chunk-it', async () =>
|
||||||
|
chunk(pdfTxt, chunkingConfig)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert chunks of text into documents to store in the index.
|
||||||
|
const documents = chunks.map((text) => {
|
||||||
|
return Document.fromText(text, { filePath });
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add documents to the index.
|
||||||
|
await ai.index({
|
||||||
|
indexer: menuPdfIndexer,
|
||||||
|
documents,
|
||||||
|
options: {
|
||||||
|
writeMode: WriteMode.Overwrite,
|
||||||
|
} as any
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
<img width="1316" alt="Screenshot 2025-05-11 at 8 35 56 PM" src="https://github.com/user-attachments/assets/e2a20ce4-d1d0-4fa2-9a84-f2cc26e3a29f" />
|
||||||
|
|
||||||
|
In your console, you can see the logs
|
||||||
|
|
||||||
|
<img width="511" alt="Screenshot 2025-05-11 at 7 19 14 PM" src="https://github.com/user-attachments/assets/243f26c5-ed38-40b6-b661-002f40f0423a" />
|
||||||
|
|
||||||
|
### Creating custom retriever flows
|
||||||
|
You can also create custom retriever flows, utilizing more options and features provided by LanceDB.
|
||||||
|
```ts
|
||||||
|
export const menuRetriever = lancedbRetrieverRef({
|
||||||
|
tableName: "table", // Use the same table name as the indexer.
|
||||||
|
displayName: "Menu", // Use a custom display name.
|
||||||
|
|
||||||
|
export const menuQAFlow = ai.defineFlow(
|
||||||
|
{ name: "Menu", inputSchema: z.string(), outputSchema: z.string() },
|
||||||
|
async (input: string) => {
|
||||||
|
// retrieve relevant documents
|
||||||
|
const docs = await ai.retrieve({
|
||||||
|
retriever: menuRetriever,
|
||||||
|
query: input,
|
||||||
|
options: {
|
||||||
|
k: 3,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const extractedContent = docs.map(doc => {
|
||||||
|
if (doc.content && Array.isArray(doc.content) && doc.content.length > 0) {
|
||||||
|
if (doc.content[0].media && doc.content[0].media.url) {
|
||||||
|
return doc.content[0].media.url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "No content found";
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("Extracted content:", extractedContent);
|
||||||
|
|
||||||
|
const { text } = await ai.generate({
|
||||||
|
model: gemini('gemini-2.0-flash'),
|
||||||
|
prompt: `
|
||||||
|
You are acting as a helpful AI assistant that can answer
|
||||||
|
questions about the food available on the menu at Genkit Grub Pub.
|
||||||
|
|
||||||
|
Use only the context provided to answer the question.
|
||||||
|
If you don't know, do not make up an answer.
|
||||||
|
Do not add or change items on the menu.
|
||||||
|
|
||||||
|
Context:
|
||||||
|
${extractedContent.join('\n\n')}
|
||||||
|
|
||||||
|
Question: ${input}`,
|
||||||
|
docs,
|
||||||
|
});
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
```
|
||||||
|
Now using our retrieval flow, we can ask question about the ingsted PDF
|
||||||
|
<img width="1306" alt="Screenshot 2025-05-11 at 7 18 45 PM" src="https://github.com/user-attachments/assets/86c66b13-7c12-4d5f-9d81-ae36bfb1c346" />
|
||||||
|
|
||||||
53
docs/src/js/classes/BooleanQuery.md
Normal file
53
docs/src/js/classes/BooleanQuery.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / BooleanQuery
|
||||||
|
|
||||||
|
# Class: BooleanQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new BooleanQuery()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new BooleanQuery(queries): BooleanQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates an instance of BooleanQuery.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
|
||||||
|
An array of (Occur, FullTextQuery objects) to combine.
|
||||||
|
Occur specifies whether the query must match, or should match.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`BooleanQuery`](BooleanQuery.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
||||||
@@ -25,6 +25,51 @@ the underlying connection has been closed.
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
|
### cloneTable()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract cloneTable(
|
||||||
|
targetTableName,
|
||||||
|
sourceUri,
|
||||||
|
options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
|
Clone a table from a source table.
|
||||||
|
|
||||||
|
A shallow clone creates a new table that shares the underlying data files
|
||||||
|
with the source table but has its own independent manifest. This allows
|
||||||
|
both the source and cloned tables to evolve independently while initially
|
||||||
|
sharing the same data, deletion, and index files.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **targetTableName**: `string`
|
||||||
|
The name of the target table to create.
|
||||||
|
|
||||||
|
* **sourceUri**: `string`
|
||||||
|
The URI of the source table to clone from.
|
||||||
|
|
||||||
|
* **options?**
|
||||||
|
Clone options.
|
||||||
|
|
||||||
|
* **options.isShallow?**: `boolean`
|
||||||
|
Whether to perform a shallow clone (defaults to true).
|
||||||
|
|
||||||
|
* **options.sourceTag?**: `string`
|
||||||
|
The tag of the source table to clone.
|
||||||
|
|
||||||
|
* **options.sourceVersion?**: `number`
|
||||||
|
The version of the source table to clone.
|
||||||
|
|
||||||
|
* **options.targetNamespace?**: `string`[]
|
||||||
|
The namespace for the target table (defaults to root namespace).
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### close()
|
### close()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -45,6 +90,8 @@ Any attempt to use the connection after it is closed will result in an error.
|
|||||||
|
|
||||||
### createEmptyTable()
|
### createEmptyTable()
|
||||||
|
|
||||||
|
#### createEmptyTable(name, schema, options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract createEmptyTable(
|
abstract createEmptyTable(
|
||||||
name,
|
name,
|
||||||
@@ -54,7 +101,7 @@ abstract createEmptyTable(
|
|||||||
|
|
||||||
Creates a new empty Table
|
Creates a new empty Table
|
||||||
|
|
||||||
#### Parameters
|
##### Parameters
|
||||||
|
|
||||||
* **name**: `string`
|
* **name**: `string`
|
||||||
The name of the table.
|
The name of the table.
|
||||||
@@ -63,8 +110,39 @@ Creates a new empty Table
|
|||||||
The schema of the table
|
The schema of the table
|
||||||
|
|
||||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
Additional options (backwards compatibility)
|
||||||
|
|
||||||
#### Returns
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
|
#### createEmptyTable(name, schema, namespace, options)
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract createEmptyTable(
|
||||||
|
name,
|
||||||
|
schema,
|
||||||
|
namespace?,
|
||||||
|
options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates a new empty Table
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
* **name**: `string`
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
|
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||||
|
The schema of the table
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
Additional options
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)>
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
@@ -72,10 +150,10 @@ Creates a new empty Table
|
|||||||
|
|
||||||
### createTable()
|
### createTable()
|
||||||
|
|
||||||
#### createTable(options)
|
#### createTable(options, namespace)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract createTable(options): Promise<Table>
|
abstract createTable(options, namespace?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
Creates a new Table and initialize it with new data.
|
||||||
@@ -85,6 +163,9 @@ Creates a new Table and initialize it with new data.
|
|||||||
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
The options object.
|
The options object.
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
`Promise`<[`Table`](Table.md)>
|
`Promise`<[`Table`](Table.md)>
|
||||||
@@ -110,6 +191,38 @@ Creates a new Table and initialize it with new data.
|
|||||||
to be inserted into the table
|
to be inserted into the table
|
||||||
|
|
||||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
Additional options (backwards compatibility)
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
|
#### createTable(name, data, namespace, options)
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract createTable(
|
||||||
|
name,
|
||||||
|
data,
|
||||||
|
namespace?,
|
||||||
|
options?): Promise<Table>
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates a new Table and initialize it with new data.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
* **name**: `string`
|
||||||
|
The name of the table.
|
||||||
|
|
||||||
|
* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`<`string`, `unknown`>[]
|
||||||
|
Non-empty Array of Records
|
||||||
|
to be inserted into the table
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
|
Additional options
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -134,11 +247,16 @@ Return a brief description of the connection
|
|||||||
### dropAllTables()
|
### dropAllTables()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract dropAllTables(): Promise<void>
|
abstract dropAllTables(namespace?): Promise<void>
|
||||||
```
|
```
|
||||||
|
|
||||||
Drop all tables in the database.
|
Drop all tables in the database.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace to drop tables from (defaults to root namespace).
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<`void`>
|
||||||
@@ -148,7 +266,7 @@ Drop all tables in the database.
|
|||||||
### dropTable()
|
### dropTable()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract dropTable(name): Promise<void>
|
abstract dropTable(name, namespace?): Promise<void>
|
||||||
```
|
```
|
||||||
|
|
||||||
Drop an existing table.
|
Drop an existing table.
|
||||||
@@ -158,6 +276,9 @@ Drop an existing table.
|
|||||||
* **name**: `string`
|
* **name**: `string`
|
||||||
The name of the table to drop.
|
The name of the table to drop.
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace of the table (defaults to root namespace).
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<`void`>
|
||||||
@@ -181,7 +302,10 @@ Return true if the connection has not been closed
|
|||||||
### openTable()
|
### openTable()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract openTable(name, options?): Promise<Table>
|
abstract openTable(
|
||||||
|
name,
|
||||||
|
namespace?,
|
||||||
|
options?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
Open a table in the database.
|
Open a table in the database.
|
||||||
@@ -191,7 +315,11 @@ Open a table in the database.
|
|||||||
* **name**: `string`
|
* **name**: `string`
|
||||||
The name of the table
|
The name of the table
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace of the table (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||||
|
Additional options
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -201,6 +329,8 @@ Open a table in the database.
|
|||||||
|
|
||||||
### tableNames()
|
### tableNames()
|
||||||
|
|
||||||
|
#### tableNames(options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract tableNames(options?): Promise<string[]>
|
abstract tableNames(options?): Promise<string[]>
|
||||||
```
|
```
|
||||||
@@ -209,12 +339,35 @@ List all the table names in this database.
|
|||||||
|
|
||||||
Tables will be returned in lexicographical order.
|
Tables will be returned in lexicographical order.
|
||||||
|
|
||||||
#### Parameters
|
##### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||||
|
options to control the
|
||||||
|
paging / start point (backwards compatibility)
|
||||||
|
|
||||||
|
##### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`[]>
|
||||||
|
|
||||||
|
#### tableNames(namespace, options)
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract tableNames(namespace?, options?): Promise<string[]>
|
||||||
|
```
|
||||||
|
|
||||||
|
List all the table names in this database.
|
||||||
|
|
||||||
|
Tables will be returned in lexicographical order.
|
||||||
|
|
||||||
|
##### Parameters
|
||||||
|
|
||||||
|
* **namespace?**: `string`[]
|
||||||
|
The namespace to list tables from (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||||
options to control the
|
options to control the
|
||||||
paging / start point
|
paging / start point
|
||||||
|
|
||||||
#### Returns
|
##### Returns
|
||||||
|
|
||||||
`Promise`<`string`[]>
|
`Promise`<`string`[]>
|
||||||
|
|||||||
85
docs/src/js/classes/HeaderProvider.md
Normal file
85
docs/src/js/classes/HeaderProvider.md
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / HeaderProvider
|
||||||
|
|
||||||
|
# Class: `abstract` HeaderProvider
|
||||||
|
|
||||||
|
Abstract base class for providing custom headers for each request.
|
||||||
|
|
||||||
|
Users can implement this interface to provide dynamic headers for various purposes
|
||||||
|
such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
|
||||||
|
custom metadata, or any other header-based requirements. The provider is called
|
||||||
|
before each request to ensure fresh header values are always used.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Simple JWT token provider:
|
||||||
|
```typescript
|
||||||
|
class JWTProvider extends HeaderProvider {
|
||||||
|
constructor(private token: string) {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
getHeaders(): Record<string, string> {
|
||||||
|
return { authorization: `Bearer ${this.token}` };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Provider with request tracking:
|
||||||
|
```typescript
|
||||||
|
class RequestTrackingProvider extends HeaderProvider {
|
||||||
|
constructor(private sessionId: string) {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
getHeaders(): Record<string, string> {
|
||||||
|
return {
|
||||||
|
"X-Session-Id": this.sessionId,
|
||||||
|
"X-Request-Id": `req-${Date.now()}`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Extended by
|
||||||
|
|
||||||
|
- [`StaticHeaderProvider`](StaticHeaderProvider.md)
|
||||||
|
- [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new HeaderProvider()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new HeaderProvider(): HeaderProvider
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`HeaderProvider`](HeaderProvider.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### getHeaders()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract getHeaders(): Record<string, string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the latest headers to be added to requests.
|
||||||
|
|
||||||
|
This method is called before each request to the remote LanceDB server.
|
||||||
|
Implementations should return headers that will be merged with existing headers.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Record`<`string`, `string`>
|
||||||
|
|
||||||
|
Dictionary of header names to values to add to the request.
|
||||||
|
|
||||||
|
#### Throws
|
||||||
|
|
||||||
|
If unable to fetch headers, the exception will be propagated and the request will fail.
|
||||||
@@ -40,6 +40,8 @@ Creates an instance of MatchQuery.
|
|||||||
- `boost`: The boost factor for the query (default is 1.0).
|
- `boost`: The boost factor for the query (default is 1.0).
|
||||||
- `fuzziness`: The fuzziness level for the query (default is 0).
|
- `fuzziness`: The fuzziness level for the query (default is 0).
|
||||||
- `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
- `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
||||||
|
- `operator`: The logical operator to use for combining terms in the query (default is "OR").
|
||||||
|
- `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
|
||||||
|
|
||||||
* **options.boost?**: `number`
|
* **options.boost?**: `number`
|
||||||
|
|
||||||
@@ -47,6 +49,10 @@ Creates an instance of MatchQuery.
|
|||||||
|
|
||||||
* **options.maxExpansions?**: `number`
|
* **options.maxExpansions?**: `number`
|
||||||
|
|
||||||
|
* **options.operator?**: [`Operator`](../enumerations/Operator.md)
|
||||||
|
|
||||||
|
* **options.prefixLength?**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`MatchQuery`](MatchQuery.md)
|
[`MatchQuery`](MatchQuery.md)
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ Construct a MergeInsertBuilder. __Internal use only.__
|
|||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
execute(data): Promise<MergeStats>
|
execute(data, execOptions?): Promise<MergeResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Executes the merge insert operation
|
Executes the merge insert operation
|
||||||
@@ -42,11 +42,37 @@ Executes the merge insert operation
|
|||||||
|
|
||||||
* **data**: [`Data`](../type-aliases/Data.md)
|
* **data**: [`Data`](../type-aliases/Data.md)
|
||||||
|
|
||||||
|
* **execOptions?**: `Partial`<[`WriteExecutionOptions`](../interfaces/WriteExecutionOptions.md)>
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<[`MergeStats`](../interfaces/MergeStats.md)>
|
`Promise`<[`MergeResult`](../interfaces/MergeResult.md)>
|
||||||
|
|
||||||
Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
the merge result
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### useIndex()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
useIndex(useIndex): MergeInsertBuilder
|
||||||
|
```
|
||||||
|
|
||||||
|
Controls whether to use indexes for the merge operation.
|
||||||
|
|
||||||
|
When set to `true` (the default), the operation will use an index if available
|
||||||
|
on the join key for improved performance. When set to `false`, it forces a full
|
||||||
|
table scan even if an index exists. This can be useful for benchmarking or when
|
||||||
|
the query optimizer chooses a suboptimal path.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **useIndex**: `boolean`
|
||||||
|
Whether to use indices for the merge operation. Defaults to `true`.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
|||||||
@@ -38,9 +38,12 @@ Creates an instance of MultiMatchQuery.
|
|||||||
* **options?**
|
* **options?**
|
||||||
Optional parameters for the multi-match query.
|
Optional parameters for the multi-match query.
|
||||||
- `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
- `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
||||||
|
- `operator`: The logical operator to use for combining terms in the query (default is "OR").
|
||||||
|
|
||||||
* **options.boosts?**: `number`[]
|
* **options.boosts?**: `number`[]
|
||||||
|
|
||||||
|
* **options.operator?**: [`Operator`](../enumerations/Operator.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`MultiMatchQuery`](MultiMatchQuery.md)
|
[`MultiMatchQuery`](MultiMatchQuery.md)
|
||||||
|
|||||||
29
docs/src/js/classes/NativeJsHeaderProvider.md
Normal file
29
docs/src/js/classes/NativeJsHeaderProvider.md
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / NativeJsHeaderProvider
|
||||||
|
|
||||||
|
# Class: NativeJsHeaderProvider
|
||||||
|
|
||||||
|
JavaScript HeaderProvider implementation that wraps a JavaScript callback.
|
||||||
|
This is the only native header provider - all header provider implementations
|
||||||
|
should provide a JavaScript function that returns headers.
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new NativeJsHeaderProvider()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new JsHeaderProvider from a JavaScript callback
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **getHeadersCallback**
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`NativeJsHeaderProvider`](NativeJsHeaderProvider.md)
|
||||||
108
docs/src/js/classes/OAuthHeaderProvider.md
Normal file
108
docs/src/js/classes/OAuthHeaderProvider.md
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / OAuthHeaderProvider
|
||||||
|
|
||||||
|
# Class: OAuthHeaderProvider
|
||||||
|
|
||||||
|
Example implementation: OAuth token provider with automatic refresh.
|
||||||
|
|
||||||
|
This is an example implementation showing how to manage OAuth tokens
|
||||||
|
with automatic refresh when they expire.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
async function fetchToken(): Promise<TokenResponse> {
|
||||||
|
const response = await fetch("https://oauth.example.com/token", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({
|
||||||
|
grant_type: "client_credentials",
|
||||||
|
client_id: "your-client-id",
|
||||||
|
client_secret: "your-client-secret"
|
||||||
|
}),
|
||||||
|
headers: { "Content-Type": "application/json" }
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
return {
|
||||||
|
accessToken: data.access_token,
|
||||||
|
expiresIn: data.expires_in
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const provider = new OAuthHeaderProvider(fetchToken);
|
||||||
|
const headers = provider.getHeaders();
|
||||||
|
// Returns: {"authorization": "Bearer <your-token>"}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Extends
|
||||||
|
|
||||||
|
- [`HeaderProvider`](HeaderProvider.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new OAuthHeaderProvider()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
|
||||||
|
```
|
||||||
|
|
||||||
|
Initialize the OAuth provider.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **tokenFetcher**
|
||||||
|
Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
|
||||||
|
|
||||||
|
* **refreshBufferSeconds**: `number` = `300`
|
||||||
|
Seconds before expiry to refresh token. Default 300 (5 minutes).
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`OAuthHeaderProvider`](OAuthHeaderProvider.md)
|
||||||
|
|
||||||
|
#### Overrides
|
||||||
|
|
||||||
|
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### getHeaders()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
getHeaders(): Record<string, string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Get OAuth headers, refreshing token if needed.
|
||||||
|
Note: This is synchronous for now as the Rust implementation expects sync.
|
||||||
|
In a real implementation, this would need to handle async properly.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Record`<`string`, `string`>
|
||||||
|
|
||||||
|
Headers with Bearer token authorization.
|
||||||
|
|
||||||
|
#### Throws
|
||||||
|
|
||||||
|
If unable to fetch or refresh token.
|
||||||
|
|
||||||
|
#### Overrides
|
||||||
|
|
||||||
|
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### refreshToken()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
refreshToken(): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
|
Manually refresh the token.
|
||||||
|
Call this before using getHeaders() to ensure token is available.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`>
|
||||||
@@ -19,7 +19,10 @@ including methods to retrieve the query type and convert the query to a dictiona
|
|||||||
### new PhraseQuery()
|
### new PhraseQuery()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
new PhraseQuery(query, column): PhraseQuery
|
new PhraseQuery(
|
||||||
|
query,
|
||||||
|
column,
|
||||||
|
options?): PhraseQuery
|
||||||
```
|
```
|
||||||
|
|
||||||
Creates an instance of `PhraseQuery`.
|
Creates an instance of `PhraseQuery`.
|
||||||
@@ -32,6 +35,12 @@ Creates an instance of `PhraseQuery`.
|
|||||||
* **column**: `string`
|
* **column**: `string`
|
||||||
The name of the column to search within.
|
The name of the column to search within.
|
||||||
|
|
||||||
|
* **options?**
|
||||||
|
Optional parameters for the phrase query.
|
||||||
|
- `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
|
||||||
|
|
||||||
|
* **options.slop?**: `number`
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
[`PhraseQuery`](PhraseQuery.md)
|
[`PhraseQuery`](PhraseQuery.md)
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ A builder for LanceDB queries.
|
|||||||
|
|
||||||
## Extends
|
## Extends
|
||||||
|
|
||||||
- [`QueryBase`](QueryBase.md)<`NativeQuery`>
|
- `StandardQueryBase`<`NativeQuery`>
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ protected inner: Query | Promise<Query>;
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
`StandardQueryBase.inner`
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -73,7 +73,7 @@ AnalyzeExec verbose=true, metrics=[]
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
`StandardQueryBase.analyzePlan`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ single query)
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
`StandardQueryBase.execute`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
`StandardQueryBase.explainPlan`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -164,7 +164,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
`StandardQueryBase.fastSearch`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -194,7 +194,7 @@ Use `where` instead
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
|
`StandardQueryBase.filter`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -216,7 +216,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
`StandardQueryBase.fullTextSearch`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -241,7 +241,7 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
|
`StandardQueryBase.limit`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -325,6 +325,10 @@ nearestToText(query, columns?): Query
|
|||||||
offset(offset): this
|
offset(offset): this
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Set the number of rows to skip before returning results.
|
||||||
|
|
||||||
|
This is useful for pagination.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **offset**: `number`
|
* **offset**: `number`
|
||||||
@@ -335,7 +339,7 @@ offset(offset): this
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
`StandardQueryBase.offset`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -388,7 +392,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
`StandardQueryBase.select`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -410,7 +414,7 @@ Collect the results as an array of objects.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
`StandardQueryBase.toArray`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -436,7 +440,7 @@ ArrowTable.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
`StandardQueryBase.toArrow`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -471,7 +475,7 @@ on the filter column(s).
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
`StandardQueryBase.where`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -493,4 +497,4 @@ order to perform hybrid search.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
`StandardQueryBase.withRowId`
|
||||||
|
|||||||
@@ -15,12 +15,11 @@ Common methods supported by all query types
|
|||||||
|
|
||||||
## Extended by
|
## Extended by
|
||||||
|
|
||||||
- [`Query`](Query.md)
|
- [`TakeQuery`](TakeQuery.md)
|
||||||
- [`VectorQuery`](VectorQuery.md)
|
|
||||||
|
|
||||||
## Type Parameters
|
## Type Parameters
|
||||||
|
|
||||||
• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery`
|
• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery` \| `NativeTakeQuery`
|
||||||
|
|
||||||
## Implements
|
## Implements
|
||||||
|
|
||||||
@@ -141,104 +140,6 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### fastSearch()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
fastSearch(): this
|
|
||||||
```
|
|
||||||
|
|
||||||
Skip searching un-indexed data. This can make search faster, but will miss
|
|
||||||
any data that is not yet indexed.
|
|
||||||
|
|
||||||
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### ~~filter()~~
|
|
||||||
|
|
||||||
```ts
|
|
||||||
filter(predicate): this
|
|
||||||
```
|
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **predicate**: `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
#### See
|
|
||||||
|
|
||||||
where
|
|
||||||
|
|
||||||
#### Deprecated
|
|
||||||
|
|
||||||
Use `where` instead
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### fullTextSearch()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
fullTextSearch(query, options?): this
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### limit()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
limit(limit): this
|
|
||||||
```
|
|
||||||
|
|
||||||
Set the maximum number of results to return.
|
|
||||||
|
|
||||||
By default, a plain search has no limit. If this method is not
|
|
||||||
called then every valid row from the table will be returned.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **limit**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### offset()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
offset(offset): this
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **offset**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### select()
|
### select()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -328,37 +229,6 @@ ArrowTable.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### where()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
where(predicate): this
|
|
||||||
```
|
|
||||||
|
|
||||||
A filter statement to be applied to this query.
|
|
||||||
|
|
||||||
The filter should be supplied as an SQL query string. For example:
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **predicate**: `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`this`
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```ts
|
|
||||||
x > 10
|
|
||||||
y > 0 AND y < 100
|
|
||||||
x > 5 OR y = 'test'
|
|
||||||
|
|
||||||
Filtering performance can often be improved by creating a scalar index
|
|
||||||
on the filter column(s).
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### withRowId()
|
### withRowId()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
88
docs/src/js/classes/Session.md
Normal file
88
docs/src/js/classes/Session.md
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / Session
|
||||||
|
|
||||||
|
# Class: Session
|
||||||
|
|
||||||
|
A session for managing caches and object stores across LanceDB operations.
|
||||||
|
|
||||||
|
Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
which can significantly impact memory use and performance. They can
|
||||||
|
also be re-used across multiple connections to share the same cache state.
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new Session()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new session with custom cache sizes.
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
|
||||||
|
- `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||||
|
Index data is stored in memory in this cache to speed up queries.
|
||||||
|
Defaults to 6GB if not specified.
|
||||||
|
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||||
|
The metadata cache stores file metadata and schema information in memory.
|
||||||
|
This cache improves scan and write performance.
|
||||||
|
Defaults to 1GB if not specified.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **indexCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
* **metadataCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### approxNumItems()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
approxNumItems(): number
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the approximate number of items cached in the session.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`number`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### sizeBytes()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
sizeBytes(): bigint
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the current size of the session caches in bytes.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`bigint`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### default()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static default(): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a session with default cache sizes.
|
||||||
|
|
||||||
|
This is equivalent to creating a session with 6GB index cache
|
||||||
|
and 1GB metadata cache.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
70
docs/src/js/classes/StaticHeaderProvider.md
Normal file
70
docs/src/js/classes/StaticHeaderProvider.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / StaticHeaderProvider
|
||||||
|
|
||||||
|
# Class: StaticHeaderProvider
|
||||||
|
|
||||||
|
Example implementation: A simple header provider that returns static headers.
|
||||||
|
|
||||||
|
This is an example implementation showing how to create a HeaderProvider
|
||||||
|
for cases where headers don't change during the session.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const provider = new StaticHeaderProvider({
|
||||||
|
authorization: "Bearer my-token",
|
||||||
|
"X-Custom-Header": "custom-value"
|
||||||
|
});
|
||||||
|
const headers = provider.getHeaders();
|
||||||
|
// Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Extends
|
||||||
|
|
||||||
|
- [`HeaderProvider`](HeaderProvider.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new StaticHeaderProvider()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new StaticHeaderProvider(headers): StaticHeaderProvider
|
||||||
|
```
|
||||||
|
|
||||||
|
Initialize with static headers.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **headers**: `Record`<`string`, `string`>
|
||||||
|
Headers to return for every request.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`StaticHeaderProvider`](StaticHeaderProvider.md)
|
||||||
|
|
||||||
|
#### Overrides
|
||||||
|
|
||||||
|
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### getHeaders()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
getHeaders(): Record<string, string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Return the static headers.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Record`<`string`, `string`>
|
||||||
|
|
||||||
|
Copy of the static headers.
|
||||||
|
|
||||||
|
#### Overrides
|
||||||
|
|
||||||
|
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
|
||||||
@@ -40,7 +40,7 @@ Returns the name of the table
|
|||||||
### add()
|
### add()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract add(data, options?): Promise<void>
|
abstract add(data, options?): Promise<AddResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Insert records into this Table.
|
Insert records into this Table.
|
||||||
@@ -54,14 +54,17 @@ Insert records into this Table.
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`AddResult`](../interfaces/AddResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the new version number of the table
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### addColumns()
|
### addColumns()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract addColumns(newColumnTransforms): Promise<void>
|
abstract addColumns(newColumnTransforms): Promise<AddColumnsResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
@@ -76,14 +79,17 @@ Add new columns with defined values.
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`AddColumnsResult`](../interfaces/AddColumnsResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the new version number of the table after adding the columns.
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### alterColumns()
|
### alterColumns()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract alterColumns(columnAlterations): Promise<void>
|
abstract alterColumns(columnAlterations): Promise<AlterColumnsResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Alter the name or nullability of columns.
|
Alter the name or nullability of columns.
|
||||||
@@ -96,7 +102,10 @@ Alter the name or nullability of columns.
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`AlterColumnsResult`](../interfaces/AlterColumnsResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the new version number of the table after altering the columns.
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -252,7 +261,7 @@ await table.createIndex("my_float_col");
|
|||||||
### delete()
|
### delete()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract delete(predicate): Promise<void>
|
abstract delete(predicate): Promise<DeleteResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Delete the rows that satisfy the predicate.
|
Delete the rows that satisfy the predicate.
|
||||||
@@ -263,7 +272,10 @@ Delete the rows that satisfy the predicate.
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`DeleteResult`](../interfaces/DeleteResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the new version number of the table
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -284,7 +296,7 @@ Return a brief description of the table
|
|||||||
### dropColumns()
|
### dropColumns()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract dropColumns(columnNames): Promise<void>
|
abstract dropColumns(columnNames): Promise<DropColumnsResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Drop one or more columns from the dataset
|
Drop one or more columns from the dataset
|
||||||
@@ -303,7 +315,10 @@ then call ``cleanup_files`` to remove the old files.
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`DropColumnsResult`](../interfaces/DropColumnsResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the new version number of the table after dropping the columns.
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -597,7 +612,7 @@ of the given query
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
the query, a vector or string
|
the query, a vector or string
|
||||||
|
|
||||||
* **queryType?**: `string`
|
* **queryType?**: `string`
|
||||||
@@ -659,6 +674,48 @@ console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### takeOffsets()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract takeOffsets(offsets): TakeQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a query that returns a subset of the rows in the table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **offsets**: `number`[]
|
||||||
|
The offsets of the rows to return.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`TakeQuery`](TakeQuery.md)
|
||||||
|
|
||||||
|
A builder that can be used to parameterize the query.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### takeRowIds()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract takeRowIds(rowIds): TakeQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a query that returns a subset of the rows in the table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **rowIds**: `number`[]
|
||||||
|
The row ids of the rows to return.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`TakeQuery`](TakeQuery.md)
|
||||||
|
|
||||||
|
A builder that can be used to parameterize the query.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -678,7 +735,7 @@ Return the table as an arrow table
|
|||||||
#### update(opts)
|
#### update(opts)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract update(opts): Promise<void>
|
abstract update(opts): Promise<UpdateResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
@@ -689,7 +746,10 @@ Update existing records in the Table
|
|||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object containing
|
||||||
|
the number of rows updated and the new version number
|
||||||
|
|
||||||
##### Example
|
##### Example
|
||||||
|
|
||||||
@@ -700,7 +760,7 @@ table.update({where:"x = 2", values:{"vector": [10, 10]}})
|
|||||||
#### update(opts)
|
#### update(opts)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract update(opts): Promise<void>
|
abstract update(opts): Promise<UpdateResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
@@ -711,7 +771,10 @@ Update existing records in the Table
|
|||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object containing
|
||||||
|
the number of rows updated and the new version number
|
||||||
|
|
||||||
##### Example
|
##### Example
|
||||||
|
|
||||||
@@ -722,7 +785,7 @@ table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
|
|||||||
#### update(updates, options)
|
#### update(updates, options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract update(updates, options?): Promise<void>
|
abstract update(updates, options?): Promise<UpdateResult>
|
||||||
```
|
```
|
||||||
|
|
||||||
Update existing records in the Table
|
Update existing records in the Table
|
||||||
@@ -745,10 +808,6 @@ repeatedly calilng this method.
|
|||||||
* **updates**: `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
* **updates**: `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
the
|
the
|
||||||
columns to update
|
columns to update
|
||||||
Keys in the map should specify the name of the column to update.
|
|
||||||
Values in the map provide the new value of the column. These can
|
|
||||||
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
|
||||||
based on the row being updated (e.g. "my_col + 1")
|
|
||||||
|
|
||||||
* **options?**: `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
* **options?**: `Partial`<[`UpdateOptions`](../interfaces/UpdateOptions.md)>
|
||||||
additional options to control
|
additional options to control
|
||||||
@@ -756,7 +815,15 @@ repeatedly calilng this method.
|
|||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
`Promise`<`void`>
|
`Promise`<[`UpdateResult`](../interfaces/UpdateResult.md)>
|
||||||
|
|
||||||
|
A promise that resolves to an object
|
||||||
|
containing the number of rows updated and the new version number
|
||||||
|
|
||||||
|
Keys in the map should specify the name of the column to update.
|
||||||
|
Values in the map provide the new value of the column. These can
|
||||||
|
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
|
||||||
|
based on the row being updated (e.g. "my_col + 1")
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -774,7 +841,7 @@ by `query`.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
|
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
|
|||||||
265
docs/src/js/classes/TakeQuery.md
Normal file
265
docs/src/js/classes/TakeQuery.md
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / TakeQuery
|
||||||
|
|
||||||
|
# Class: TakeQuery
|
||||||
|
|
||||||
|
A query that returns a subset of the rows in the table.
|
||||||
|
|
||||||
|
## Extends
|
||||||
|
|
||||||
|
- [`QueryBase`](QueryBase.md)<`NativeTakeQuery`>
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### inner
|
||||||
|
|
||||||
|
```ts
|
||||||
|
protected inner: TakeQuery | Promise<TakeQuery>;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### analyzePlan()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
analyzePlan(): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||||
|
|
||||||
|
This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||||
|
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`>
|
||||||
|
|
||||||
|
A query execution plan with runtime metrics for each step.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import * as lancedb from "@lancedb/lancedb"
|
||||||
|
|
||||||
|
const db = await lancedb.connect("./.lancedb");
|
||||||
|
const table = await db.createTable("my_table", [
|
||||||
|
{ vector: [1.1, 0.9], id: "1" },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||||
|
|
||||||
|
Example output (with runtime metrics inlined):
|
||||||
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
|
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||||
|
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||||
|
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||||
|
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||||
|
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||||
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||||
|
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||||
|
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### execute()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
protected execute(options?): RecordBatchIterator
|
||||||
|
```
|
||||||
|
|
||||||
|
Execute the query and return the results as an
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`RecordBatchIterator`](RecordBatchIterator.md)
|
||||||
|
|
||||||
|
#### See
|
||||||
|
|
||||||
|
- AsyncIterator
|
||||||
|
of
|
||||||
|
- RecordBatch.
|
||||||
|
|
||||||
|
By default, LanceDb will use many threads to calculate results and, when
|
||||||
|
the result set is large, multiple batches will be processed at one time.
|
||||||
|
This readahead is limited however and backpressure will be applied if this
|
||||||
|
stream is consumed slowly (this constrains the maximum memory used by a
|
||||||
|
single query)
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### explainPlan()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
explainPlan(verbose): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Generates an explanation of the query execution plan.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **verbose**: `boolean` = `false`
|
||||||
|
If true, provides a more detailed explanation. Defaults to false.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`>
|
||||||
|
|
||||||
|
A Promise that resolves to a string containing the query execution plan explanation.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import * as lancedb from "@lancedb/lancedb"
|
||||||
|
const db = await lancedb.connect("./.lancedb");
|
||||||
|
const table = await db.createTable("my_table", [
|
||||||
|
{ vector: [1.1, 0.9], id: "1" },
|
||||||
|
]);
|
||||||
|
const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### select()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
select(columns): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Return only the specified columns.
|
||||||
|
|
||||||
|
By default a query will return all columns from the table. However, this can have
|
||||||
|
a very significant impact on latency. LanceDb stores data in a columnar fashion. This
|
||||||
|
means we can finely tune our I/O to select exactly the columns we need.
|
||||||
|
|
||||||
|
As a best practice you should always limit queries to the columns that you need. If you
|
||||||
|
pass in an array of column names then only those columns will be returned.
|
||||||
|
|
||||||
|
You can also use this method to create new "dynamic" columns based on your existing columns.
|
||||||
|
For example, you may not care about "a" or "b" but instead simply want "a + b". This is often
|
||||||
|
seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
|
||||||
|
|
||||||
|
To create dynamic columns you can pass in a Map<string, string>. A column will be returned
|
||||||
|
for each entry in the map. The key provides the name of the column. The value is
|
||||||
|
an SQL string used to specify how the column is calculated.
|
||||||
|
|
||||||
|
For example, an SQL query might state `SELECT a + b AS combined, c`. The equivalent
|
||||||
|
input to this method would be:
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new Map([["combined", "a + b"], ["c", "c"]])
|
||||||
|
|
||||||
|
Columns will always be returned in the order given, even if that order is different than
|
||||||
|
the order used when adding the data.
|
||||||
|
|
||||||
|
Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
|
||||||
|
uses `Object.entries` which should preserve the insertion order of the object. However,
|
||||||
|
object insertion order is easy to get wrong and `Map` is more foolproof.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### toArray()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
toArray(options?): Promise<any[]>
|
||||||
|
```
|
||||||
|
|
||||||
|
Collect the results as an array of objects.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`any`[]>
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### toArrow()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
toArrow(options?): Promise<Table<any>>
|
||||||
|
```
|
||||||
|
|
||||||
|
Collect the results as an Arrow
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`Table`<`any`>>
|
||||||
|
|
||||||
|
#### See
|
||||||
|
|
||||||
|
ArrowTable.
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### withRowId()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
withRowId(): this
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to return the row id in the results.
|
||||||
|
|
||||||
|
This column can be used to match results between different queries. For
|
||||||
|
example, to match results from a full text search and a vector search in
|
||||||
|
order to perform hybrid search.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`this`
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||||
@@ -16,7 +16,7 @@ This builder can be reused to execute the query many times.
|
|||||||
|
|
||||||
## Extends
|
## Extends
|
||||||
|
|
||||||
- [`QueryBase`](QueryBase.md)<`NativeVectorQuery`>
|
- `StandardQueryBase`<`NativeVectorQuery`>
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ protected inner: VectorQuery | Promise<VectorQuery>;
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
`StandardQueryBase.inner`
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ AnalyzeExec verbose=true, metrics=[]
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
`StandardQueryBase.analyzePlan`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -248,7 +248,7 @@ single query)
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
`StandardQueryBase.execute`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -284,7 +284,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
`StandardQueryBase.explainPlan`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -305,7 +305,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
`StandardQueryBase.fastSearch`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -335,7 +335,7 @@ Use `where` instead
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
|
`StandardQueryBase.filter`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -357,7 +357,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
`StandardQueryBase.fullTextSearch`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -382,7 +382,54 @@ called then every valid row from the table will be returned.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
|
`StandardQueryBase.limit`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### maximumNprobes()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
maximumNprobes(maximumNprobes): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Set the maximum number of probes used.
|
||||||
|
|
||||||
|
This controls the maximum number of partitions that will be searched. If this
|
||||||
|
number is greater than minimumNprobes then the excess partitions will _only_ be
|
||||||
|
searched if we have not found enough results. This can be useful when there is
|
||||||
|
a narrow filter to allow these queries to spend more time searching and avoid
|
||||||
|
potential false negatives.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **maximumNprobes**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`VectorQuery`](VectorQuery.md)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### minimumNprobes()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
minimumNprobes(minimumNprobes): VectorQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Set the minimum number of probes used.
|
||||||
|
|
||||||
|
This controls the minimum number of partitions that will be searched. This
|
||||||
|
parameter will impact every query against a vector index, regardless of the
|
||||||
|
filter. See `nprobes` for more details. Higher values will increase recall
|
||||||
|
but will also increase latency.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **minimumNprobes**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`VectorQuery`](VectorQuery.md)
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -413,6 +460,10 @@ For best results we recommend tuning this parameter with a benchmark against
|
|||||||
your actual data to find the smallest possible value that will still give
|
your actual data to find the smallest possible value that will still give
|
||||||
you the desired recall.
|
you the desired recall.
|
||||||
|
|
||||||
|
For more fine grained control over behavior when you have a very narrow filter
|
||||||
|
you can use `minimumNprobes` and `maximumNprobes`. This method sets both
|
||||||
|
the minimum and maximum to the same value.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **nprobes**: `number`
|
* **nprobes**: `number`
|
||||||
@@ -429,6 +480,10 @@ you the desired recall.
|
|||||||
offset(offset): this
|
offset(offset): this
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Set the number of rows to skip before returning results.
|
||||||
|
|
||||||
|
This is useful for pagination.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **offset**: `number`
|
* **offset**: `number`
|
||||||
@@ -439,7 +494,7 @@ offset(offset): this
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
`StandardQueryBase.offset`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -586,7 +641,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
`StandardQueryBase.select`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -608,7 +663,7 @@ Collect the results as an array of objects.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
`StandardQueryBase.toArray`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -634,7 +689,7 @@ ArrowTable.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
`StandardQueryBase.toArrow`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -669,7 +724,7 @@ on the filter column(s).
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
`StandardQueryBase.where`
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
@@ -691,4 +746,4 @@ order to perform hybrid search.
|
|||||||
|
|
||||||
#### Inherited from
|
#### Inherited from
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
`StandardQueryBase.withRowId`
|
||||||
|
|||||||
@@ -15,6 +15,14 @@ Enum representing the types of full-text queries supported.
|
|||||||
|
|
||||||
## Enumeration Members
|
## Enumeration Members
|
||||||
|
|
||||||
|
### Boolean
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Boolean: "boolean";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### Boost
|
### Boost
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
37
docs/src/js/enumerations/Occur.md
Normal file
37
docs/src/js/enumerations/Occur.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / Occur
|
||||||
|
|
||||||
|
# Enumeration: Occur
|
||||||
|
|
||||||
|
Enum representing the occurrence of terms in full-text queries.
|
||||||
|
|
||||||
|
- `Must`: The term must be present in the document.
|
||||||
|
- `Should`: The term should contribute to the document score, but is not required.
|
||||||
|
- `MustNot`: The term must not be present in the document.
|
||||||
|
|
||||||
|
## Enumeration Members
|
||||||
|
|
||||||
|
### Must
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Must: "MUST";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### MustNot
|
||||||
|
|
||||||
|
```ts
|
||||||
|
MustNot: "MUST_NOT";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### Should
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Should: "SHOULD";
|
||||||
|
```
|
||||||
28
docs/src/js/enumerations/Operator.md
Normal file
28
docs/src/js/enumerations/Operator.md
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / Operator
|
||||||
|
|
||||||
|
# Enumeration: Operator
|
||||||
|
|
||||||
|
Enum representing the logical operators used in full-text queries.
|
||||||
|
|
||||||
|
- `And`: All terms must match.
|
||||||
|
- `Or`: At least one term must match.
|
||||||
|
|
||||||
|
## Enumeration Members
|
||||||
|
|
||||||
|
### And
|
||||||
|
|
||||||
|
```ts
|
||||||
|
And: "AND";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### Or
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Or: "OR";
|
||||||
|
```
|
||||||
@@ -6,10 +6,14 @@
|
|||||||
|
|
||||||
# Function: connect()
|
# Function: connect()
|
||||||
|
|
||||||
## connect(uri, options)
|
## connect(uri, options, session, headerProvider)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
function connect(uri, options?): Promise<Connection>
|
function connect(
|
||||||
|
uri,
|
||||||
|
options?,
|
||||||
|
session?,
|
||||||
|
headerProvider?): Promise<Connection>
|
||||||
```
|
```
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI.
|
Connect to a LanceDB instance at the given URI.
|
||||||
@@ -29,6 +33,10 @@ Accepted formats:
|
|||||||
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||||
The options to use when connecting to the database
|
The options to use when connecting to the database
|
||||||
|
|
||||||
|
* **session?**: [`Session`](../classes/Session.md)
|
||||||
|
|
||||||
|
* **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`<`string`, `string`> \| () => `Promise`<`Record`<`string`, `string`>>
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||||
@@ -50,6 +58,18 @@ const conn = await connect(
|
|||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Using with a header provider for per-request authentication:
|
||||||
|
```ts
|
||||||
|
const provider = new StaticHeaderProvider({
|
||||||
|
"X-API-Key": "my-key"
|
||||||
|
});
|
||||||
|
const conn = await connectWithHeaderProvider(
|
||||||
|
"db://host:port",
|
||||||
|
options,
|
||||||
|
provider
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
## connect(options)
|
## connect(options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -77,7 +97,7 @@ Accepted formats:
|
|||||||
|
|
||||||
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
||||||
|
|
||||||
### Example
|
### Examples
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
const conn = await connect({
|
const conn = await connect({
|
||||||
@@ -85,3 +105,11 @@ const conn = await connect({
|
|||||||
storageOptions: {timeout: "60s"}
|
storageOptions: {timeout: "60s"}
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const session = Session.default();
|
||||||
|
const conn = await connect({
|
||||||
|
uri: "/path/to/database",
|
||||||
|
session: session
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ function makeArrowTable(
|
|||||||
metadata?): ArrowTable
|
metadata?): ArrowTable
|
||||||
```
|
```
|
||||||
|
|
||||||
An enhanced version of the makeTable function from Apache Arrow
|
An enhanced version of the apache-arrow makeTable function from Apache Arrow
|
||||||
that supports nested fields and embeddings columns.
|
that supports nested fields and embeddings columns.
|
||||||
|
|
||||||
(typically you do not need to call this function. It will be called automatically
|
(typically you do not need to call this function. It will be called automatically
|
||||||
|
|||||||
@@ -12,35 +12,49 @@
|
|||||||
## Enumerations
|
## Enumerations
|
||||||
|
|
||||||
- [FullTextQueryType](enumerations/FullTextQueryType.md)
|
- [FullTextQueryType](enumerations/FullTextQueryType.md)
|
||||||
|
- [Occur](enumerations/Occur.md)
|
||||||
|
- [Operator](enumerations/Operator.md)
|
||||||
|
|
||||||
## Classes
|
## Classes
|
||||||
|
|
||||||
|
- [BooleanQuery](classes/BooleanQuery.md)
|
||||||
- [BoostQuery](classes/BoostQuery.md)
|
- [BoostQuery](classes/BoostQuery.md)
|
||||||
- [Connection](classes/Connection.md)
|
- [Connection](classes/Connection.md)
|
||||||
|
- [HeaderProvider](classes/HeaderProvider.md)
|
||||||
- [Index](classes/Index.md)
|
- [Index](classes/Index.md)
|
||||||
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
||||||
- [MatchQuery](classes/MatchQuery.md)
|
- [MatchQuery](classes/MatchQuery.md)
|
||||||
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
||||||
- [MultiMatchQuery](classes/MultiMatchQuery.md)
|
- [MultiMatchQuery](classes/MultiMatchQuery.md)
|
||||||
|
- [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
|
||||||
|
- [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
|
||||||
- [PhraseQuery](classes/PhraseQuery.md)
|
- [PhraseQuery](classes/PhraseQuery.md)
|
||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [QueryBase](classes/QueryBase.md)
|
- [QueryBase](classes/QueryBase.md)
|
||||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||||
|
- [Session](classes/Session.md)
|
||||||
|
- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
|
||||||
- [Table](classes/Table.md)
|
- [Table](classes/Table.md)
|
||||||
- [TagContents](classes/TagContents.md)
|
- [TagContents](classes/TagContents.md)
|
||||||
- [Tags](classes/Tags.md)
|
- [Tags](classes/Tags.md)
|
||||||
|
- [TakeQuery](classes/TakeQuery.md)
|
||||||
- [VectorColumnOptions](classes/VectorColumnOptions.md)
|
- [VectorColumnOptions](classes/VectorColumnOptions.md)
|
||||||
- [VectorQuery](classes/VectorQuery.md)
|
- [VectorQuery](classes/VectorQuery.md)
|
||||||
|
|
||||||
## Interfaces
|
## Interfaces
|
||||||
|
|
||||||
|
- [AddColumnsResult](interfaces/AddColumnsResult.md)
|
||||||
- [AddColumnsSql](interfaces/AddColumnsSql.md)
|
- [AddColumnsSql](interfaces/AddColumnsSql.md)
|
||||||
- [AddDataOptions](interfaces/AddDataOptions.md)
|
- [AddDataOptions](interfaces/AddDataOptions.md)
|
||||||
|
- [AddResult](interfaces/AddResult.md)
|
||||||
|
- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
|
||||||
- [ClientConfig](interfaces/ClientConfig.md)
|
- [ClientConfig](interfaces/ClientConfig.md)
|
||||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||||
- [CompactionStats](interfaces/CompactionStats.md)
|
- [CompactionStats](interfaces/CompactionStats.md)
|
||||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||||
|
- [DeleteResult](interfaces/DeleteResult.md)
|
||||||
|
- [DropColumnsResult](interfaces/DropColumnsResult.md)
|
||||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||||
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
||||||
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
||||||
@@ -54,7 +68,7 @@
|
|||||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||||
- [MergeStats](interfaces/MergeStats.md)
|
- [MergeResult](interfaces/MergeResult.md)
|
||||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||||
@@ -64,8 +78,12 @@
|
|||||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||||
- [TableStatistics](interfaces/TableStatistics.md)
|
- [TableStatistics](interfaces/TableStatistics.md)
|
||||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||||
|
- [TlsConfig](interfaces/TlsConfig.md)
|
||||||
|
- [TokenResponse](interfaces/TokenResponse.md)
|
||||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||||
|
- [UpdateResult](interfaces/UpdateResult.md)
|
||||||
- [Version](interfaces/Version.md)
|
- [Version](interfaces/Version.md)
|
||||||
|
- [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)
|
||||||
|
|
||||||
## Type Aliases
|
## Type Aliases
|
||||||
|
|
||||||
@@ -74,6 +92,7 @@
|
|||||||
- [FieldLike](type-aliases/FieldLike.md)
|
- [FieldLike](type-aliases/FieldLike.md)
|
||||||
- [IntoSql](type-aliases/IntoSql.md)
|
- [IntoSql](type-aliases/IntoSql.md)
|
||||||
- [IntoVector](type-aliases/IntoVector.md)
|
- [IntoVector](type-aliases/IntoVector.md)
|
||||||
|
- [MultiVector](type-aliases/MultiVector.md)
|
||||||
- [RecordBatchLike](type-aliases/RecordBatchLike.md)
|
- [RecordBatchLike](type-aliases/RecordBatchLike.md)
|
||||||
- [SchemaLike](type-aliases/SchemaLike.md)
|
- [SchemaLike](type-aliases/SchemaLike.md)
|
||||||
- [TableLike](type-aliases/TableLike.md)
|
- [TableLike](type-aliases/TableLike.md)
|
||||||
|
|||||||
15
docs/src/js/interfaces/AddColumnsResult.md
Normal file
15
docs/src/js/interfaces/AddColumnsResult.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / AddColumnsResult
|
||||||
|
|
||||||
|
# Interface: AddColumnsResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
15
docs/src/js/interfaces/AddResult.md
Normal file
15
docs/src/js/interfaces/AddResult.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / AddResult
|
||||||
|
|
||||||
|
# Interface: AddResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
15
docs/src/js/interfaces/AlterColumnsResult.md
Normal file
15
docs/src/js/interfaces/AlterColumnsResult.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / AlterColumnsResult
|
||||||
|
|
||||||
|
# Interface: AlterColumnsResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
@@ -16,6 +16,14 @@ optional extraHeaders: Record<string, string>;
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### idDelimiter?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional idDelimiter: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### retryConfig?
|
### retryConfig?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -32,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### tlsConfig?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional tlsConfig: TlsConfig;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### userAgent?
|
### userAgent?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### session?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional session: Session;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB OSS only): the session to use for this connection. Holds
|
||||||
|
shared caches and other session-specific state.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### storageOptions?
|
### storageOptions?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
15
docs/src/js/interfaces/DeleteResult.md
Normal file
15
docs/src/js/interfaces/DeleteResult.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / DeleteResult
|
||||||
|
|
||||||
|
# Interface: DeleteResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
15
docs/src/js/interfaces/DropColumnsResult.md
Normal file
15
docs/src/js/interfaces/DropColumnsResult.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / DropColumnsResult
|
||||||
|
|
||||||
|
# Interface: DropColumnsResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
@@ -23,7 +23,7 @@ whether to remove punctuation
|
|||||||
### baseTokenizer?
|
### baseTokenizer?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional baseTokenizer: "raw" | "simple" | "whitespace";
|
optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
|
||||||
```
|
```
|
||||||
|
|
||||||
The tokenizer to use when building the index.
|
The tokenizer to use when building the index.
|
||||||
@@ -71,6 +71,36 @@ tokens longer than this length will be ignored
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### ngramMaxLength?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional ngramMaxLength: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
ngram max length
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### ngramMinLength?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional ngramMinLength: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
ngram min length
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### prefixOnly?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional prefixOnly: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
|
whether to only index the prefix of the token for ngram tokenizer
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### removeStopWords?
|
### removeStopWords?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -26,6 +26,18 @@ will be used to determine the most useful kind of index to create.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### name?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional name: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional custom name for the index.
|
||||||
|
|
||||||
|
If not provided, a default name will be generated based on the column name.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### replace?
|
### replace?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -42,8 +54,27 @@ The default is true
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### train?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional train: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to train the index with existing data.
|
||||||
|
|
||||||
|
If true (default), the index will be trained with existing data in the table.
|
||||||
|
If false, the index will be created empty and populated as new data is added.
|
||||||
|
|
||||||
|
Note: This option is only supported for scalar indices. Vector indices always train.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### waitTimeoutSeconds?
|
### waitTimeoutSeconds?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional waitTimeoutSeconds: number;
|
optional waitTimeoutSeconds: number;
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Timeout in seconds to wait for index creation to complete.
|
||||||
|
|
||||||
|
If not specified, the method will return immediately after starting the index creation.
|
||||||
|
|||||||
39
docs/src/js/interfaces/MergeResult.md
Normal file
39
docs/src/js/interfaces/MergeResult.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / MergeResult
|
||||||
|
|
||||||
|
# Interface: MergeResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### numDeletedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numDeletedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numInsertedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numInsertedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### numUpdatedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numUpdatedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / MergeStats
|
|
||||||
|
|
||||||
# Interface: MergeStats
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### numDeletedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numDeletedRows: bigint;
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numInsertedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numInsertedRows: bigint;
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numUpdatedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numUpdatedRows: bigint;
|
|
||||||
```
|
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
### indexCacheSize?
|
### ~~indexCacheSize?~~
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional indexCacheSize: number;
|
optional indexCacheSize: number;
|
||||||
@@ -16,6 +16,11 @@ optional indexCacheSize: number;
|
|||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
|
#### Deprecated
|
||||||
|
|
||||||
|
Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to the connect() function.
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
- IVF: there is one entry for each IVF partition
|
- IVF: there is one entry for each IVF partition
|
||||||
- BTREE: there is one entry for the entire index
|
- BTREE: there is one entry for the entire index
|
||||||
|
|||||||
@@ -24,10 +24,10 @@ The default is 7 days
|
|||||||
// Delete all versions older than 1 day
|
// Delete all versions older than 1 day
|
||||||
const olderThan = new Date();
|
const olderThan = new Date();
|
||||||
olderThan.setDate(olderThan.getDate() - 1));
|
olderThan.setDate(olderThan.getDate() - 1));
|
||||||
tbl.cleanupOlderVersions(olderThan);
|
tbl.optimize({cleanupOlderThan: olderThan});
|
||||||
|
|
||||||
// Delete all versions except the current version
|
// Delete all versions except the current version
|
||||||
tbl.cleanupOlderVersions(new Date());
|
tbl.optimize({cleanupOlderThan: new Date()});
|
||||||
```
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|||||||
@@ -44,3 +44,17 @@ optional readTimeout: number;
|
|||||||
The timeout for reading data from the server in seconds. Default is 300
|
The timeout for reading data from the server in seconds. Default is 300
|
||||||
seconds (5 minutes). This can also be set via the environment variable
|
seconds (5 minutes). This can also be set via the environment variable
|
||||||
`LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
|
`LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### timeout?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional timeout: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
The overall timeout for the entire request in seconds. This includes
|
||||||
|
connection, send, and read time. If the entire request doesn't complete
|
||||||
|
within this time, it will fail. Default is None (no overall timeout).
|
||||||
|
This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
|
||||||
|
as an integer number of seconds.
|
||||||
|
|||||||
49
docs/src/js/interfaces/TlsConfig.md
Normal file
49
docs/src/js/interfaces/TlsConfig.md
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / TlsConfig
|
||||||
|
|
||||||
|
# Interface: TlsConfig
|
||||||
|
|
||||||
|
TLS/mTLS configuration for the remote HTTP client.
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### assertHostname?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional assertHostname: boolean;
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether to verify the hostname in the server's certificate.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### certFile?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional certFile: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
Path to the client certificate file (PEM format) for mTLS authentication.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### keyFile?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional keyFile: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
Path to the client private key file (PEM format) for mTLS authentication.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### sslCaCert?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional sslCaCert: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
Path to the CA certificate file (PEM format) for server verification.
|
||||||
25
docs/src/js/interfaces/TokenResponse.md
Normal file
25
docs/src/js/interfaces/TokenResponse.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / TokenResponse
|
||||||
|
|
||||||
|
# Interface: TokenResponse
|
||||||
|
|
||||||
|
Token response from OAuth provider.
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### accessToken
|
||||||
|
|
||||||
|
```ts
|
||||||
|
accessToken: string;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### expiresIn?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional expiresIn: number;
|
||||||
|
```
|
||||||
23
docs/src/js/interfaces/UpdateResult.md
Normal file
23
docs/src/js/interfaces/UpdateResult.md
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / UpdateResult
|
||||||
|
|
||||||
|
# Interface: UpdateResult
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### rowsUpdated
|
||||||
|
|
||||||
|
```ts
|
||||||
|
rowsUpdated: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### version
|
||||||
|
|
||||||
|
```ts
|
||||||
|
version: number;
|
||||||
|
```
|
||||||
26
docs/src/js/interfaces/WriteExecutionOptions.md
Normal file
26
docs/src/js/interfaces/WriteExecutionOptions.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / WriteExecutionOptions
|
||||||
|
|
||||||
|
# Interface: WriteExecutionOptions
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### timeoutMs?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional timeoutMs: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
Maximum time to run the operation before cancelling it.
|
||||||
|
|
||||||
|
By default, there is a 30-second timeout that is only enforced after the
|
||||||
|
first attempt. This is to prevent spending too long retrying to resolve
|
||||||
|
conflicts. For example, if a write attempt takes 20 seconds and fails,
|
||||||
|
the second attempt will be cancelled after 10 seconds, hitting the
|
||||||
|
30-second timeout. However, a write that takes one hour and succeeds on the
|
||||||
|
first attempt will not be cancelled.
|
||||||
|
|
||||||
|
When this is set, the timeout is enforced on all attempts, including the first.
|
||||||
11
docs/src/js/type-aliases/MultiVector.md
Normal file
11
docs/src/js/type-aliases/MultiVector.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / MultiVector
|
||||||
|
|
||||||
|
# Type Alias: MultiVector
|
||||||
|
|
||||||
|
```ts
|
||||||
|
type MultiVector: IntoVector[];
|
||||||
|
```
|
||||||
@@ -428,7 +428,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"**Why?** \n",
|
"**Why?** \n",
|
||||||
"Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time: \n",
|
"Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time: \n",
|
||||||
"- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search. \n",
|
"- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search. \n",
|
||||||
"- **Step 5a** contains the full ingestion code for reference (run it only if necessary). \n",
|
"- **Step 5a** contains the full ingestion code for reference (run it only if necessary). \n",
|
||||||
"- **Step 6** contains the details on creating the index on the multivector column"
|
"- **Step 6** contains the details on creating the index on the multivector column"
|
||||||
]
|
]
|
||||||
|
|||||||
53
docs/src/python/datafusion.md
Normal file
53
docs/src/python/datafusion.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# Apache Datafusion
|
||||||
|
|
||||||
|
In Python, LanceDB tables can also be queried with [Apache Datafusion](https://datafusion.apache.org/), an extensible query engine written in Rust that uses Apache Arrow as its in-memory format. This means you can write complex SQL queries to analyze your data in LanceDB.
|
||||||
|
|
||||||
|
This integration is done via [Datafusion FFI](https://docs.rs/datafusion-ffi/latest/datafusion_ffi/), which provides a native integration between LanceDB and Datafusion.
|
||||||
|
The Datafusion FFI allows to pass down column selections and basic filters to LanceDB, reducing the amount of scanned data when executing your query. Additionally, the integration allows streaming data from LanceDB tables which allows to do aggregation larger-than-memory.
|
||||||
|
|
||||||
|
We can demonstrate this by first installing `datafusion` and `lancedb`.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install datafusion lancedb
|
||||||
|
```
|
||||||
|
|
||||||
|
We will re-use the dataset [created previously](./pandas_and_pyarrow.md):
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
from datafusion import SessionContext
|
||||||
|
from lance import FFILanceTableProvider
|
||||||
|
|
||||||
|
db = lancedb.connect("data/sample-lancedb")
|
||||||
|
data = [
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}
|
||||||
|
]
|
||||||
|
lance_table = db.create_table("lance_table", data)
|
||||||
|
|
||||||
|
ctx = SessionContext()
|
||||||
|
|
||||||
|
ffi_lance_table = FFILanceTableProvider(
|
||||||
|
lance_table.to_lance(), with_row_id=True, with_row_addr=True
|
||||||
|
)
|
||||||
|
ctx.register_table_provider("ffi_lance_table", ffi_lance_table)
|
||||||
|
```
|
||||||
|
|
||||||
|
The `to_lance` method converts the LanceDB table to a `LanceDataset`, which is accessible to Datafusion through the Datafusion FFI integration layer.
|
||||||
|
To query the resulting Lance dataset in Datafusion, you first need to register the dataset with Datafusion and then just reference it by the same name in your SQL query.
|
||||||
|
|
||||||
|
```python
|
||||||
|
ctx.table("ffi_lance_table")
|
||||||
|
ctx.sql("SELECT * FROM ffi_lance_table")
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┬─────────┬────────┬─────────────────┬─────────────────┐
|
||||||
|
│ vector │ item │ price │ _rowid │ _rowaddr │
|
||||||
|
│ float[] │ varchar │ double │ bigint unsigned │ bigint unsigned │
|
||||||
|
├─────────────┼─────────┼────────┼─────────────────┼─────────────────┤
|
||||||
|
│ [3.1, 4.1] │ foo │ 10.0 │ 0 │ 0 │
|
||||||
|
│ [5.9, 26.5] │ bar │ 20.0 │ 1 │ 1 │
|
||||||
|
└─────────────┴─────────┴────────┴─────────────────┴─────────────────┘
|
||||||
|
```
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
|
|
||||||
# Getting Started with LanceDB: A Minimal Vector Search Tutorial
|
|
||||||
|
|
||||||
Let's set up a LanceDB database, insert vector data, and perform a simple vector search. We'll use simple character classes like "knight" and "rogue" to illustrate semantic relevance.
|
|
||||||
|
|
||||||
## 1. Install Dependencies
|
|
||||||
|
|
||||||
Before starting, make sure you have the necessary packages:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install lancedb pandas numpy
|
|
||||||
```
|
|
||||||
|
|
||||||
## 2. Import Required Libraries
|
|
||||||
|
|
||||||
```python
|
|
||||||
import lancedb
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
```
|
|
||||||
|
|
||||||
## 3. Connect to LanceDB
|
|
||||||
|
|
||||||
You can use a local directory to store your database:
|
|
||||||
|
|
||||||
```python
|
|
||||||
db = lancedb.connect("./lancedb")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 4. Create Sample Data
|
|
||||||
|
|
||||||
Add sample text data and corresponding 4D vectors:
|
|
||||||
|
|
||||||
```python
|
|
||||||
data = pd.DataFrame([
|
|
||||||
{"id": "1", "vector": [1.0, 0.0, 0.0, 0.0], "text": "knight"},
|
|
||||||
{"id": "2", "vector": [0.9, 0.1, 0.0, 0.0], "text": "warrior"},
|
|
||||||
{"id": "3", "vector": [0.0, 1.0, 0.0, 0.0], "text": "rogue"},
|
|
||||||
{"id": "4", "vector": [0.0, 0.9, 0.1, 0.0], "text": "thief"},
|
|
||||||
{"id": "5", "vector": [0.5, 0.5, 0.0, 0.0], "text": "ranger"},
|
|
||||||
])
|
|
||||||
```
|
|
||||||
|
|
||||||
## 5. Create a Table in LanceDB
|
|
||||||
|
|
||||||
```python
|
|
||||||
table = db.create_table("rpg_classes", data=data, mode="overwrite")
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's see how the table looks:
|
|
||||||
```python
|
|
||||||
print(data)
|
|
||||||
```
|
|
||||||
|
|
||||||
| id | vector | text |
|
|
||||||
|----|--------|------|
|
|
||||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight |
|
|
||||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior |
|
|
||||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue |
|
|
||||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger |
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 6. Perform a Vector Search
|
|
||||||
|
|
||||||
Search for the most similar character classes to our query vector:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Query as if we are searching for "rogue"
|
|
||||||
results = table.search([0.95, 0.05, 0.0, 0.0]).limit(3).to_df()
|
|
||||||
print(results)
|
|
||||||
```
|
|
||||||
|
|
||||||
This will return the top 3 closest classes to the vector, effectively showing how LanceDB can be used for semantic search.
|
|
||||||
|
|
||||||
| id | vector | text | _distance |
|
|
||||||
|------|------------------------|----------|-----------|
|
|
||||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue | 0.00 |
|
|
||||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief | 0.02 |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
|
||||||
|
|
||||||
Let's try searching for "knight"
|
|
||||||
|
|
||||||
```python
|
|
||||||
query_vector = [1.0, 0.0, 0.0, 0.0]
|
|
||||||
results = table.search(query_vector).limit(3).to_pandas()
|
|
||||||
print(results)
|
|
||||||
```
|
|
||||||
|
|
||||||
| id | vector | text | _distance |
|
|
||||||
|------|------------------------|----------|-----------|
|
|
||||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight | 0.00 |
|
|
||||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior | 0.02 |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
That's it - you just conducted vector search!
|
|
||||||
|
|
||||||
For more beginner tips, check out the [Basic Usage](basic.md) guide.
|
|
||||||
@@ -30,7 +30,8 @@ excluded_globs = [
|
|||||||
"../src/rag/advanced_techniques/*.md",
|
"../src/rag/advanced_techniques/*.md",
|
||||||
"../src/guides/scalar_index.md",
|
"../src/guides/scalar_index.md",
|
||||||
"../src/guides/storage.md",
|
"../src/guides/storage.md",
|
||||||
"../src/search.md"
|
"../src/search.md",
|
||||||
|
"../src/guides/sql_querying.md",
|
||||||
]
|
]
|
||||||
|
|
||||||
python_prefix = "py"
|
python_prefix = "py"
|
||||||
|
|||||||
@@ -7,3 +7,4 @@ tantivy==0.20.1
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
torch
|
torch
|
||||||
polars>=0.19, <=1.3.0
|
polars>=0.19, <=1.3.0
|
||||||
|
datafusion
|
||||||
|
|||||||
19
java/.mvn/wrapper/maven-wrapper.properties
vendored
Normal file
19
java/.mvn/wrapper/maven-wrapper.properties
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
wrapperVersion=3.3.2
|
||||||
|
distributionType=only-script
|
||||||
|
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
|
||||||
37
java/README.md
Normal file
37
java/README.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# LanceDB Java SDK
|
||||||
|
|
||||||
|
## Configuration and Initialization
|
||||||
|
|
||||||
|
### LanceDB Cloud
|
||||||
|
|
||||||
|
For LanceDB Cloud, use the simplified builder API:
|
||||||
|
|
||||||
|
```java
|
||||||
|
import com.lancedb.lance.namespace.LanceRestNamespace;
|
||||||
|
|
||||||
|
// If your DB url is db://example-db, then your database here is example-db
|
||||||
|
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||||
|
.apiKey("your_lancedb_cloud_api_key")
|
||||||
|
.database("your_database_name")
|
||||||
|
.build();
|
||||||
|
```
|
||||||
|
|
||||||
|
### LanceDB Enterprise
|
||||||
|
|
||||||
|
For Enterprise deployments, use your VPC endpoint:
|
||||||
|
|
||||||
|
```java
|
||||||
|
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||||
|
.apiKey("your_lancedb_enterprise_api_key")
|
||||||
|
.database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
|
||||||
|
.hostOverride("http://<vpc_endpoint_dns_name>:80")
|
||||||
|
.build();
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
Build:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./mvnw install
|
||||||
|
```
|
||||||
@@ -15,13 +15,16 @@ publish = false
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
lancedb = { path = "../../../rust/lancedb" }
|
lancedb = { path = "../../../rust/lancedb", default-features = false }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
arrow = { workspace = true, features = ["ffi"] }
|
arrow = { workspace = true, features = ["ffi"] }
|
||||||
arrow-schema.workspace = true
|
arrow-schema.workspace = true
|
||||||
tokio = "1.23"
|
tokio = "1.46"
|
||||||
jni = "0.21.1"
|
jni = "0.21.1"
|
||||||
snafu.workspace = true
|
snafu.workspace = true
|
||||||
lazy_static.workspace = true
|
lazy_static.workspace = true
|
||||||
serde = { version = "^1" }
|
serde = { version = "^1" }
|
||||||
serde_json = { version = "1" }
|
serde_json = { version = "1" }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["lancedb/default"]
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
|
|||||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||||
|
|
||||||
/// Get strings from Java List<String> object.
|
/// Get strings from Java List<String> object.
|
||||||
|
#[allow(dead_code)]
|
||||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||||
|
|
||||||
/// Get strings from Java String[] object.
|
/// Get strings from Java String[] object.
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use jni::JNIEnv;
|
|||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub trait FromJObject<T> {
|
pub trait FromJObject<T> {
|
||||||
fn extract(&self) -> Result<T>;
|
fn extract(&self) -> Result<T>;
|
||||||
}
|
}
|
||||||
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub trait FromJString {
|
pub trait FromJString {
|
||||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||||
}
|
}
|
||||||
@@ -66,6 +68,7 @@ pub trait JMapExt {
|
|||||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||||
where
|
where
|
||||||
for<'a> JObject<'a>: FromJObject<T>,
|
for<'a> JObject<'a>: FromJObject<T>,
|
||||||
|
|||||||
@@ -8,18 +8,24 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.19.1-beta.1</version>
|
<version>0.22.2-beta.2</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<name>LanceDB Core</name>
|
<name>${project.artifactId}</name>
|
||||||
|
<description>LanceDB Core</description>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<properties>
|
<properties>
|
||||||
<rust.release.build>false</rust.release.build>
|
<rust.release.build>false</rust.release.build>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.lancedb</groupId>
|
||||||
|
<artifactId>lance-namespace-core</artifactId>
|
||||||
|
<version>0.0.1</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.arrow</groupId>
|
<groupId>org.apache.arrow</groupId>
|
||||||
<artifactId>arrow-vector</artifactId>
|
<artifactId>arrow-vector</artifactId>
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user