mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 22:09:58 +00:00
Compare commits
17 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70958f6366 | ||
|
|
1ac745eb18 | ||
|
|
1357fe8aa1 | ||
|
|
0d78929893 | ||
|
|
9e2a68541e | ||
|
|
1aa0fd16e7 | ||
|
|
fec2a05629 | ||
|
|
79a1cd60ee | ||
|
|
88807a59a4 | ||
|
|
e0e7e01ea8 | ||
|
|
a416ebc11d | ||
|
|
f941054baf | ||
|
|
1a81c46505 | ||
|
|
82b25a71e9 | ||
|
|
13c613d45f | ||
|
|
e07389a36c | ||
|
|
e7e9e80b1d |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.22.1"
|
||||
current_version = "0.22.2-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
name: Create Failure Issue
|
||||
description: Creates a GitHub issue if any jobs in the workflow failed
|
||||
|
||||
inputs:
|
||||
job-results:
|
||||
description: 'JSON string of job results from needs context'
|
||||
required: true
|
||||
workflow-name:
|
||||
description: 'Name of the workflow'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Check for failures and create issue
|
||||
shell: bash
|
||||
env:
|
||||
JOB_RESULTS: ${{ inputs.job-results }}
|
||||
WORKFLOW_NAME: ${{ inputs.workflow-name }}
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
# Check if any job failed
|
||||
if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
|
||||
echo "Detected job failures, creating issue..."
|
||||
|
||||
# Extract failed job names
|
||||
FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
|
||||
|
||||
# Create issue with workflow name, failed jobs, and run URL
|
||||
gh issue create \
|
||||
--title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
|
||||
--body "The workflow **$WORKFLOW_NAME** failed during execution.
|
||||
|
||||
**Failed jobs:** $FAILED_JOBS
|
||||
|
||||
**Run URL:** $RUN_URL
|
||||
|
||||
Please investigate the failed jobs and address any issues." \
|
||||
--label "ci"
|
||||
|
||||
echo "Issue created successfully"
|
||||
else
|
||||
echo "No job failures detected, skipping issue creation"
|
||||
fi
|
||||
14
.github/workflows/cargo-publish.yml
vendored
14
.github/workflows/cargo-publish.yml
vendored
@@ -38,3 +38,17 @@ jobs:
|
||||
- name: Publish the package
|
||||
run: |
|
||||
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
3
.github/workflows/docs.yml
vendored
3
.github/workflows/docs.yml
vendored
@@ -56,8 +56,9 @@ jobs:
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: docs/package-lock.json
|
||||
- name: Install node dependencies
|
||||
working-directory: node
|
||||
working-directory: nodejs
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
|
||||
15
.github/workflows/java-publish.yml
vendored
15
.github/workflows/java-publish.yml
vendored
@@ -43,7 +43,6 @@ jobs:
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
toolchain: "1.81.0"
|
||||
cache-workspaces: "./java/core/lancedb-jni"
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
@@ -112,3 +111,17 @@ jobs:
|
||||
env:
|
||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [linux-arm64, linux-x86, macos-arm64]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- nodejs/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
14
.github/workflows/npm-publish.yml
vendored
14
.github/workflows/npm-publish.yml
vendored
@@ -365,3 +365,17 @@ jobs:
|
||||
ARGS="$ARGS --tag preview"
|
||||
fi
|
||||
npm publish $ARGS
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-lancedb, test-lancedb, publish]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
14
.github/workflows/pypi-publish.yml
vendored
14
.github/workflows/pypi-publish.yml
vendored
@@ -173,3 +173,17 @@ jobs:
|
||||
generate_release_notes: false
|
||||
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
|
||||
body: ${{ steps.python_release_notes.outputs.changelog }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [linux, mac, windows]
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- python/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
|
||||
13
.github/workflows/rust.yml
vendored
13
.github/workflows/rust.yml
vendored
@@ -96,6 +96,7 @@ jobs:
|
||||
# Need up-to-date compilers for kernels
|
||||
CC: clang-18
|
||||
CXX: clang++-18
|
||||
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -117,15 +118,17 @@ jobs:
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
- name: Start S3 integration test environment
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Build
|
||||
run: cargo build --all-features --tests --locked --examples
|
||||
- name: Run tests
|
||||
run: cargo test --all-features --locked
|
||||
- name: Run feature tests
|
||||
run: make -C ./lancedb feature-tests
|
||||
- name: Run examples
|
||||
run: cargo run --example simple --locked
|
||||
- name: Run remote tests
|
||||
# Running this requires access to secrets, so skip if this is
|
||||
# a PR from a fork.
|
||||
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
|
||||
run: make -C ./lancedb remote-tests
|
||||
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
|
||||
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
@@ -1,26 +0,0 @@
|
||||
name: Trigger vectordb-recipers workflow
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/trigger-vectordb-recipes.yml
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Trigger vectordb-recipes workflow
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
|
||||
script: |
|
||||
const result = await github.rest.actions.createWorkflowDispatch({
|
||||
owner: 'lancedb',
|
||||
repo: 'vectordb-recipes',
|
||||
workflow_id: 'examples-test.yml',
|
||||
ref: 'main'
|
||||
});
|
||||
console.log(result);
|
||||
2203
Cargo.lock
generated
2203
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
34
Cargo.toml
34
Cargo.toml
@@ -15,15 +15,15 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] }
|
||||
lance-io = { "version" = "=0.37.0", default-features = false }
|
||||
lance-index = "=0.37.0"
|
||||
lance-linalg = "=0.37.0"
|
||||
lance-table = "=0.37.0"
|
||||
lance-testing = "=0.37.0"
|
||||
lance-datafusion = "=0.37.0"
|
||||
lance-encoding = "=0.37.0"
|
||||
lance-namespace = "0.0.15"
|
||||
lance = { "version" = "=0.38.0", default-features = false, "features" = ["dynamodb"] }
|
||||
lance-io = { "version" = "=0.38.0", default-features = false }
|
||||
lance-index = "=0.38.0"
|
||||
lance-linalg = "=0.38.0"
|
||||
lance-table = "=0.38.0"
|
||||
lance-testing = "=0.38.0"
|
||||
lance-datafusion = "=0.38.0"
|
||||
lance-encoding = "=0.38.0"
|
||||
lance-namespace = "0.0.16"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "55.1", optional = false }
|
||||
arrow-array = "55.1"
|
||||
@@ -31,7 +31,6 @@ arrow-data = "55.1"
|
||||
arrow-ipc = "55.1"
|
||||
arrow-ord = "55.1"
|
||||
arrow-schema = "55.1"
|
||||
arrow-arith = "55.1"
|
||||
arrow-cast = "55.1"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "49.0", default-features = false }
|
||||
@@ -52,7 +51,6 @@ pin-project = "1.0.7"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
rand = "0.9"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
@@ -60,7 +58,17 @@ crunchy = "0.2.4"
|
||||
# Temporary pins to work around downstream issues
|
||||
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
|
||||
chrono = "=0.4.41"
|
||||
# https://github.com/RustCrypto/formats/issues/1684
|
||||
base64ct = "=1.6.0"
|
||||
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
|
||||
bytemuck_derive = ">=1.8.1, <1.9.0"
|
||||
|
||||
# This is only needed when we reference preview releases of lance
|
||||
# [patch.crates-io]
|
||||
# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
|
||||
# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
|
||||
|
||||
4
ci/create_lancedb_test_connection.sh
Executable file
4
ci/create_lancedb_test_connection.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export RUST_LOG=info
|
||||
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"
|
||||
18
ci/run_with_docker_compose.sh
Executable file
18
ci/run_with_docker_compose.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# A script for running the given command together with a docker compose environment.
|
||||
#
|
||||
|
||||
# Bring down the docker setup once the command is done running.
|
||||
tear_down() {
|
||||
docker compose -p fixture down
|
||||
}
|
||||
trap tear_down EXIT
|
||||
|
||||
set +xe
|
||||
|
||||
# Clean up any existing docker setup and bring up a new one.
|
||||
docker compose -p fixture up --detach --wait || exit 1
|
||||
|
||||
"${@}"
|
||||
68
ci/run_with_test_connection.sh
Executable file
68
ci/run_with_test_connection.sh
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# A script for running the given command together with the lancedb cli.
|
||||
#
|
||||
|
||||
die() {
|
||||
echo $?
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_command_exists() {
|
||||
command="${1}"
|
||||
which ${command} &> /dev/null || \
|
||||
die "Unable to locate command: ${command}. Did you install it?"
|
||||
}
|
||||
|
||||
if [[ ! -e ./lancedb ]]; then
|
||||
if [[ -v SOPHON_READ_TOKEN ]]; then
|
||||
INPUT="lancedb-linux-x64"
|
||||
gh release \
|
||||
--repo lancedb/lancedb \
|
||||
download ci-support-binaries \
|
||||
--pattern "${INPUT}" \
|
||||
|| die "failed to fetch cli."
|
||||
check_command_exists openssl
|
||||
openssl enc -aes-256-cbc \
|
||||
-d -pbkdf2 \
|
||||
-pass "env:SOPHON_READ_TOKEN" \
|
||||
-in "${INPUT}" \
|
||||
-out ./lancedb-linux-x64.tar.gz \
|
||||
|| die "openssl failed"
|
||||
TARGET="${INPUT}.tar.gz"
|
||||
else
|
||||
ARCH="x64"
|
||||
if [[ $OSTYPE == 'darwin'* ]]; then
|
||||
UNAME=$(uname -m)
|
||||
if [[ $UNAME == 'arm64' ]]; then
|
||||
ARCH='arm64'
|
||||
fi
|
||||
OSTYPE="macos"
|
||||
elif [[ $OSTYPE == 'linux'* ]]; then
|
||||
if [[ $UNAME == 'aarch64' ]]; then
|
||||
ARCH='arm64'
|
||||
fi
|
||||
OSTYPE="linux"
|
||||
else
|
||||
die "unknown OSTYPE: $OSTYPE"
|
||||
fi
|
||||
|
||||
check_command_exists gh
|
||||
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
|
||||
gh release \
|
||||
--repo lancedb/sophon \
|
||||
download lancedb-cli-v0.0.3 \
|
||||
--pattern "${TARGET}" \
|
||||
|| die "failed to fetch cli."
|
||||
fi
|
||||
|
||||
check_command_exists tar
|
||||
tar xvf "${TARGET}" || die "tar failed."
|
||||
[[ -e ./lancedb ]] || die "failed to extract lancedb."
|
||||
fi
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
|
||||
|
||||
"${@}"
|
||||
@@ -117,7 +117,7 @@ def update_cargo_toml(line_updater):
|
||||
lance_line = ""
|
||||
is_parsing_lance_line = False
|
||||
for line in lines:
|
||||
if line.startswith("lance"):
|
||||
if line.startswith("lance") and not line.startswith("lance-namespace"):
|
||||
# Check if this is a single-line or multi-line entry
|
||||
# Single-line entries either:
|
||||
# 1. End with } (complete inline table)
|
||||
|
||||
@@ -70,6 +70,22 @@ plugins:
|
||||
- mkdocs-jupyter
|
||||
- render_swagger:
|
||||
allow_arbitrary_locations: true
|
||||
- redirects:
|
||||
redirect_maps:
|
||||
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
|
||||
# other sub-pages are handled by the ingected js in overrides/partials/header.html
|
||||
'index.md': 'https://lancedb.com/docs/'
|
||||
'guides/tables.md': 'https://lancedb.com/docs/tables/'
|
||||
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
|
||||
'basic.md': 'https://lancedb.com/docs/quickstart/'
|
||||
'faq.md': 'https://lancedb.com/docs/faq/'
|
||||
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
|
||||
'integrations.md': 'https://lancedb.com/docs/integrations/'
|
||||
'examples.md': 'https://lancedb.com/docs/tutorials/'
|
||||
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
|
||||
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
|
||||
|
||||
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
@@ -386,4 +402,4 @@ extra:
|
||||
- icon: fontawesome/brands/x-twitter
|
||||
link: https://twitter.com/lancedb
|
||||
- icon: fontawesome/brands/linkedin
|
||||
link: https://www.linkedin.com/company/lancedb
|
||||
link: https://www.linkedin.com/company/lancedb
|
||||
@@ -19,7 +19,13 @@
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
-->
|
||||
|
||||
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
|
||||
<p style="margin: 0; font-size: 1.1em;">
|
||||
<strong>This documentation site is deprecated.</strong>
|
||||
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
|
||||
lancedb.com/docs</a> for the latest information.
|
||||
</p>
|
||||
</div>
|
||||
{% set class = "md-header" %}
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||
@@ -150,9 +156,9 @@
|
||||
|
||||
<div style="margin-left: 10px; margin-right: 5px;">
|
||||
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<div style="margin-left: 5px; margin-right: 5px;">
|
||||
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
|
||||
@@ -173,4 +179,77 @@
|
||||
{% include "partials/tabs.html" %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</header>
|
||||
</header>
|
||||
|
||||
<script>
|
||||
(function() {
|
||||
function checkPathAndRedirect() {
|
||||
var banner = document.getElementById('deprecation-banner');
|
||||
|
||||
if (document.querySelector('meta[http-equiv="refresh"]')) {
|
||||
return; // The redirects plugin is already handling this page.
|
||||
}
|
||||
|
||||
var currentPath = window.location.pathname;
|
||||
|
||||
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
|
||||
? currentPath.slice(0, -1)
|
||||
: currentPath;
|
||||
|
||||
// These are the ONLY paths that should remain on the old site
|
||||
var apiPaths = [
|
||||
'/lancedb/python',
|
||||
'/lancedb/javascript',
|
||||
'/lancedb/js',
|
||||
'/lancedb/api_reference'
|
||||
];
|
||||
|
||||
var isApiPage = apiPaths.some(function(apiPath) {
|
||||
return cleanPath.startsWith(apiPath);
|
||||
});
|
||||
|
||||
if (isApiPage) {
|
||||
if (banner) {
|
||||
banner.style.display = 'none';
|
||||
}
|
||||
} else {
|
||||
if (banner) {
|
||||
banner.style.display = 'block';
|
||||
}
|
||||
|
||||
// Add noindex meta tag to prevent indexing of old docs for seo
|
||||
var noindexMeta = document.createElement('meta');
|
||||
noindexMeta.setAttribute('name', 'robots');
|
||||
noindexMeta.setAttribute('content', 'noindex, follow');
|
||||
document.head.appendChild(noindexMeta);
|
||||
|
||||
// Add canonical link to point to the new docs to reward new site for seo
|
||||
var canonicalLink = document.createElement('link');
|
||||
canonicalLink.setAttribute('rel', 'canonical');
|
||||
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
|
||||
document.head.appendChild(canonicalLink);
|
||||
|
||||
window.location.replace('https://lancedb.com/docs');
|
||||
}
|
||||
}
|
||||
|
||||
// Run the check only if doc is ready. This makes sure we catch the initial load
|
||||
// and redirect.
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
|
||||
} else {
|
||||
checkPathAndRedirect();
|
||||
}
|
||||
|
||||
// Use an interval to handle subsequent navigation clicks.
|
||||
var lastPath = window.location.pathname;
|
||||
setInterval(function() {
|
||||
if (window.location.pathname !== lastPath) {
|
||||
lastPath = window.location.pathname;
|
||||
checkPathAndRedirect();
|
||||
}
|
||||
}, 2000); // keeping it 2 second to make it easy for user to understand
|
||||
// what's happening
|
||||
|
||||
})();
|
||||
</script>
|
||||
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
|
||||
griffe
|
||||
mkdocs-render-swagger-plugin
|
||||
pydantic
|
||||
mkdocs-redirects
|
||||
|
||||
@@ -52,6 +52,30 @@ the merge result
|
||||
|
||||
***
|
||||
|
||||
### useIndex()
|
||||
|
||||
```ts
|
||||
useIndex(useIndex): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Controls whether to use indexes for the merge operation.
|
||||
|
||||
When set to `true` (the default), the operation will use an index if available
|
||||
on the join key for improved performance. When set to `false`, it forces a full
|
||||
table scan even if an index exists. This can be useful for benchmarking or when
|
||||
the query optimizer chooses a suboptimal path.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **useIndex**: `boolean`
|
||||
Whether to use indices for the merge operation. Defaults to `true`.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
***
|
||||
|
||||
### whenMatchedUpdateAll()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.1-final.0</version>
|
||||
<version>0.22.2-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.1-final.0</version>
|
||||
<version>0.22.2-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.1-final.0</version>
|
||||
<version>0.22.2-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.22.1"
|
||||
version = "0.22.2-beta.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -211,8 +211,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
},
|
||||
);
|
||||
|
||||
// TODO: https://github.com/lancedb/lancedb/issues/1832
|
||||
it.skip("should be able to omit nullable fields", async () => {
|
||||
it("should be able to omit nullable fields", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
@@ -236,23 +235,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
await table.add([data3]);
|
||||
|
||||
let res = await table.query().limit(10).toArray();
|
||||
const resVector = res.map((r) => r.get("vector").toArray());
|
||||
const resVector = res.map((r) =>
|
||||
r.vector ? Array.from(r.vector) : null,
|
||||
);
|
||||
expect(resVector).toEqual([null, data2.vector, data3.vector]);
|
||||
const resItem = res.map((r) => r.get("item").toArray());
|
||||
const resItem = res.map((r) => r.item);
|
||||
expect(resItem).toEqual(["foo", null, "bar"]);
|
||||
const resPrice = res.map((r) => r.get("price").toArray());
|
||||
const resPrice = res.map((r) => r.price);
|
||||
expect(resPrice).toEqual([10.0, 2.0, 3.0]);
|
||||
|
||||
const data4 = { item: "foo" };
|
||||
// We can't omit a column if it's not nullable
|
||||
await expect(table.add([data4])).rejects.toThrow("Invalid user input");
|
||||
await expect(table.add([data4])).rejects.toThrow(
|
||||
"Append with different schema",
|
||||
);
|
||||
|
||||
// But we can alter columns to make them nullable
|
||||
await table.alterColumns([{ path: "price", nullable: true }]);
|
||||
await table.add([data4]);
|
||||
|
||||
res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
|
||||
expect(res).toEqual([data1, data2, data3, data4]);
|
||||
res = (await table.query().limit(10).toArray()).map((r) => ({
|
||||
...r.toJSON(),
|
||||
vector: r.vector ? Array.from(r.vector) : null,
|
||||
}));
|
||||
// Rust fills missing nullable fields with null
|
||||
expect(res).toEqual([
|
||||
{ ...data1, vector: null },
|
||||
{ ...data2, item: null },
|
||||
data3,
|
||||
{ ...data4, price: null, vector: null },
|
||||
]);
|
||||
});
|
||||
|
||||
it("should be able to insert nullable data for non-nullable fields", async () => {
|
||||
@@ -330,6 +342,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
const table = await db.createTable("my_table", data);
|
||||
expect(await table.countRows()).toEqual(2);
|
||||
});
|
||||
|
||||
it("should allow undefined and omitted nullable vector fields", async () => {
|
||||
// Test for the bug: can't pass undefined or omit vector column
|
||||
const db = await connect("memory://");
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int32(), true),
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
32,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
true, // nullable = true
|
||||
),
|
||||
]);
|
||||
const table = await db.createEmptyTable("test_table", schema);
|
||||
|
||||
// Should not throw error for undefined value
|
||||
await table.add([{ id: 0, vector: undefined }]);
|
||||
|
||||
// Should not throw error for omitted field
|
||||
await table.add([{ id: 1 }]);
|
||||
|
||||
// Should still work for null
|
||||
await table.add([{ id: 2, vector: null }]);
|
||||
|
||||
// Should still work for actual vector
|
||||
const testVector = new Array(32).fill(0.5);
|
||||
await table.add([{ id: 3, vector: testVector }]);
|
||||
expect(await table.countRows()).toEqual(4);
|
||||
|
||||
const res = await table.query().limit(10).toArray();
|
||||
const resVector = res.map((r) =>
|
||||
r.vector ? Array.from(r.vector) : null,
|
||||
);
|
||||
expect(resVector).toEqual([null, null, null, testVector]);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -1454,7 +1503,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
|
||||
@@ -1285,19 +1285,36 @@ function validateSchemaEmbeddings(
|
||||
if (isFixedSizeList(field.type)) {
|
||||
field = sanitizeField(field);
|
||||
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
||||
// Check if there's an embedding function registered for this field
|
||||
let hasEmbeddingFunction = false;
|
||||
|
||||
// Check schema metadata for embedding functions
|
||||
if (schema.metadata.has("embedding_functions")) {
|
||||
const embeddings = JSON.parse(
|
||||
schema.metadata.get("embedding_functions")!,
|
||||
);
|
||||
if (
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
|
||||
embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
|
||||
undefined
|
||||
) {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
|
||||
if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
|
||||
hasEmbeddingFunction = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check passed embedding function parameter
|
||||
if (embeddings && embeddings.vectorColumn === field.name) {
|
||||
hasEmbeddingFunction = true;
|
||||
}
|
||||
|
||||
// If the field is nullable AND there's no embedding function, allow undefined/omitted values
|
||||
if (field.nullable && !hasEmbeddingFunction) {
|
||||
fields.push(field);
|
||||
} else {
|
||||
// Either not nullable OR has embedding function - require explicit values
|
||||
if (hasEmbeddingFunction) {
|
||||
// Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
|
||||
fields.push(field);
|
||||
} else {
|
||||
missingEmbeddingFields.push(field);
|
||||
}
|
||||
} else {
|
||||
missingEmbeddingFields.push(field);
|
||||
}
|
||||
} else {
|
||||
fields.push(field);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.22.1",
|
||||
"version": "0.22.2-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.25.2-beta.0"
|
||||
current_version = "0.25.2-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.25.2-beta.0"
|
||||
version = "0.25.2-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -10,7 +10,7 @@ dependencies = [
|
||||
"pyarrow>=16",
|
||||
"pydantic>=1.10",
|
||||
"tqdm>=4.27.0",
|
||||
"lance-namespace==0.0.6"
|
||||
"lance-namespace>=0.0.16"
|
||||
]
|
||||
description = "lancedb"
|
||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||
|
||||
@@ -35,6 +35,8 @@ async def some_table(db_async):
|
||||
"tags": [
|
||||
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
|
||||
],
|
||||
"is_active": [random.choice([True, False]) for _ in range(NROWS)],
|
||||
"data": [random.randbytes(random.randint(0, 128)) for _ in range(NROWS)],
|
||||
}
|
||||
)
|
||||
return await db_async.create_table(
|
||||
@@ -99,10 +101,17 @@ async def test_create_fixed_size_binary_index(some_table: AsyncTable):
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_bitmap_index(some_table: AsyncTable):
|
||||
await some_table.create_index("id", config=Bitmap())
|
||||
await some_table.create_index("is_active", config=Bitmap())
|
||||
await some_table.create_index("data", config=Bitmap())
|
||||
indices = await some_table.list_indices()
|
||||
assert str(indices) == '[Index(Bitmap, columns=["id"], name="id_idx")]'
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert len(indices) == 3
|
||||
assert indices[0].index_type == "Bitmap"
|
||||
assert indices[0].columns == ["id"]
|
||||
assert indices[1].index_type == "Bitmap"
|
||||
assert indices[1].columns == ["is_active"]
|
||||
assert indices[2].index_type == "Bitmap"
|
||||
assert indices[2].columns == ["data"]
|
||||
|
||||
index_name = indices[0].name
|
||||
stats = await some_table.index_stats(index_name)
|
||||
assert stats.index_type == "BITMAP"
|
||||
@@ -111,6 +120,11 @@ async def test_create_bitmap_index(some_table: AsyncTable):
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
|
||||
assert (
|
||||
"ScalarIndexQuery"
|
||||
in await some_table.query().where("is_active = TRUE").explain_plan()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_label_list_index(some_table: AsyncTable):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.22.1"
|
||||
version = "0.22.2-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -82,6 +82,7 @@ crunchy.workspace = true
|
||||
bytemuck_derive.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = "1"
|
||||
tempfile = "3.5.0"
|
||||
rand = { version = "0.9", features = ["small_rng"] }
|
||||
random_word = { version = "0.4.3", features = ["en"] }
|
||||
|
||||
19
rust/lancedb/Makefile
Normal file
19
rust/lancedb/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
#
|
||||
# Makefile for running tests.
|
||||
#
|
||||
|
||||
# Run all tests.
|
||||
all-tests: feature-tests remote-tests
|
||||
|
||||
# Run tests for every feature. This requires using docker compose to set up
|
||||
# the environment.
|
||||
feature-tests:
|
||||
../../ci/run_with_docker_compose.sh \
|
||||
cargo test --all-features --tests --locked --examples
|
||||
.PHONY: feature-tests
|
||||
|
||||
# Run tests against remote endpoints.
|
||||
remote-tests:
|
||||
../../ci/run_with_test_connection.sh \
|
||||
cargo test --features remote --locked
|
||||
.PHONY: remote-tests
|
||||
@@ -1170,6 +1170,7 @@ mod tests {
|
||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
use crate::test_connection::test_utils::new_test_connection;
|
||||
use arrow::compute::concat_batches;
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
@@ -1185,11 +1186,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
assert_eq!(db.uri, uri);
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
assert_eq!(tc.connection.uri, tc.uri);
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@@ -1255,16 +1253,10 @@ mod tests {
|
||||
assert_eq!(tables, names[..7]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_s3() {
|
||||
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
let db = tc.connection;
|
||||
|
||||
assert_eq!(db.table_names().execute().await.unwrap().len(), 0);
|
||||
// open non-exist table
|
||||
|
||||
@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
|
||||
for field in reader.schema().fields() {
|
||||
match field.data_type() {
|
||||
DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
|
||||
columns.push(field.name().to_string());
|
||||
columns.push(field.name().clone());
|
||||
}
|
||||
DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
|
||||
columns_to_infer.insert(field.name().to_string(), None);
|
||||
columns_to_infer.insert(field.name().clone(), None);
|
||||
}
|
||||
DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
|
||||
columns_to_infer.insert(field.name().to_string(), None);
|
||||
columns_to_infer.insert(field.name().clone(), None);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -718,9 +718,9 @@ impl Database for ListingDatabase {
|
||||
.map_err(|e| Error::Lance { source: e })?;
|
||||
|
||||
let version_ref = match (request.source_version, request.source_tag) {
|
||||
(Some(v), None) => Ok(Ref::Version(v)),
|
||||
(Some(v), None) => Ok(Ref::Version(None, Some(v))),
|
||||
(None, Some(tag)) => Ok(Ref::Tag(tag)),
|
||||
(None, None) => Ok(Ref::Version(source_dataset.version().version)),
|
||||
(None, None) => Ok(Ref::Version(None, Some(source_dataset.version().version))),
|
||||
_ => Err(Error::InvalidInput {
|
||||
message: "Cannot specify both source_version and source_tag".to_string(),
|
||||
}),
|
||||
@@ -728,7 +728,7 @@ impl Database for ListingDatabase {
|
||||
|
||||
let target_uri = self.table_uri(&request.target_table_name)?;
|
||||
source_dataset
|
||||
.shallow_clone(&target_uri, version_ref, storage_params)
|
||||
.shallow_clone(&target_uri, version_ref, Some(storage_params))
|
||||
.await
|
||||
.map_err(|e| Error::Lance { source: e })?;
|
||||
|
||||
|
||||
@@ -206,6 +206,7 @@ pub mod query;
|
||||
pub mod remote;
|
||||
pub mod rerankers;
|
||||
pub mod table;
|
||||
pub mod test_connection;
|
||||
pub mod utils;
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
@@ -647,7 +647,7 @@ impl From<StorageOptions> for RemoteOptions {
|
||||
let mut filtered = HashMap::new();
|
||||
for opt in supported_opts {
|
||||
if let Some(v) = options.0.get(opt) {
|
||||
filtered.insert(opt.to_string(), v.to_string());
|
||||
filtered.insert(opt.to_string(), v.clone());
|
||||
}
|
||||
}
|
||||
Self::new(filtered)
|
||||
|
||||
@@ -1383,30 +1383,35 @@ impl Table {
|
||||
}
|
||||
|
||||
pub struct NativeTags {
|
||||
inner: LanceTags,
|
||||
dataset: dataset::DatasetConsistencyWrapper,
|
||||
}
|
||||
#[async_trait]
|
||||
impl Tags for NativeTags {
|
||||
async fn list(&self) -> Result<HashMap<String, TagContents>> {
|
||||
Ok(self.inner.list().await?)
|
||||
let dataset = self.dataset.get().await?;
|
||||
Ok(dataset.tags().list().await?)
|
||||
}
|
||||
|
||||
async fn get_version(&self, tag: &str) -> Result<u64> {
|
||||
Ok(self.inner.get_version(tag).await?)
|
||||
let dataset = self.dataset.get().await?;
|
||||
Ok(dataset.tags().get_version(tag).await?)
|
||||
}
|
||||
|
||||
async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
|
||||
self.inner.create(tag, version).await?;
|
||||
let dataset = self.dataset.get().await?;
|
||||
dataset.tags().create(tag, version).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete(&mut self, tag: &str) -> Result<()> {
|
||||
self.inner.delete(tag).await?;
|
||||
let dataset = self.dataset.get().await?;
|
||||
dataset.tags().delete(tag).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
|
||||
self.inner.update(tag, version).await?;
|
||||
let dataset = self.dataset.get().await?;
|
||||
dataset.tags().update(tag, version).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1780,13 +1785,13 @@ impl NativeTable {
|
||||
BuiltinIndexType::BTree,
|
||||
)))
|
||||
} else {
|
||||
return Err(Error::InvalidInput {
|
||||
Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"there are no indices supported for the field `{}` with the data type {}",
|
||||
field.name(),
|
||||
field.data_type()
|
||||
),
|
||||
});
|
||||
})?
|
||||
}
|
||||
}
|
||||
Index::BTree(_) => {
|
||||
@@ -2440,10 +2445,8 @@ impl BaseTable for NativeTable {
|
||||
}
|
||||
|
||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
||||
let dataset = self.dataset.get().await?;
|
||||
|
||||
Ok(Box::new(NativeTags {
|
||||
inner: dataset.tags.clone(),
|
||||
dataset: self.dataset.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -2760,6 +2763,7 @@ mod tests {
|
||||
RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
UInt32Array,
|
||||
};
|
||||
use arrow_array::{BinaryArray, LargeBinaryArray};
|
||||
use arrow_data::ArrayDataBuilder;
|
||||
use arrow_schema::{DataType, Field, Schema, TimeUnit};
|
||||
use futures::TryStreamExt;
|
||||
@@ -3725,6 +3729,10 @@ mod tests {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("category", DataType::Utf8, true),
|
||||
Field::new("large_category", DataType::LargeUtf8, true),
|
||||
Field::new("is_active", DataType::Boolean, true),
|
||||
Field::new("data", DataType::Binary, true),
|
||||
Field::new("large_data", DataType::LargeBinary, true),
|
||||
]));
|
||||
|
||||
let batch = RecordBatch::try_new(
|
||||
@@ -3734,6 +3742,16 @@ mod tests {
|
||||
Arc::new(StringArray::from_iter_values(
|
||||
(0..100).map(|i| format!("category_{}", i % 5)),
|
||||
)),
|
||||
Arc::new(LargeStringArray::from_iter_values(
|
||||
(0..100).map(|i| format!("large_category_{}", i % 5)),
|
||||
)),
|
||||
Arc::new(BooleanArray::from_iter((0..100).map(|i| Some(i % 2 == 0)))),
|
||||
Arc::new(BinaryArray::from_iter_values(
|
||||
(0_u32..100).map(|i| i.to_le_bytes()),
|
||||
)),
|
||||
Arc::new(LargeBinaryArray::from_iter_values(
|
||||
(0_u32..100).map(|i| i.to_le_bytes()),
|
||||
)),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
@@ -3754,12 +3772,58 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create bitmap index on the "is_active" column
|
||||
table
|
||||
.create_index(&["is_active"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create bitmap index on the "data" column
|
||||
table
|
||||
.create_index(&["data"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create bitmap index on the "large_data" column
|
||||
table
|
||||
.create_index(&["large_data"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create bitmap index on the "large_category" column
|
||||
table
|
||||
.create_index(&["large_category"], Index::Bitmap(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify the index was created
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
assert_eq!(index_configs.len(), 5);
|
||||
|
||||
let mut configs_iter = index_configs.into_iter();
|
||||
let index = configs_iter.next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["category".to_string()]);
|
||||
|
||||
let index = configs_iter.next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["is_active".to_string()]);
|
||||
|
||||
let index = configs_iter.next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["data".to_string()]);
|
||||
|
||||
let index = configs_iter.next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["large_data".to_string()]);
|
||||
|
||||
let index = configs_iter.next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["large_category".to_string()]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -172,7 +172,7 @@ impl TableProvider for BaseTableAdapter {
|
||||
if let Some(projection) = projection {
|
||||
let field_names = projection
|
||||
.iter()
|
||||
.map(|i| self.schema.field(*i).name().to_string())
|
||||
.map(|i| self.schema.field(*i).name().clone())
|
||||
.collect();
|
||||
query.select = Select::Columns(field_names);
|
||||
}
|
||||
|
||||
@@ -98,8 +98,9 @@ impl DatasetRef {
|
||||
}
|
||||
Self::TimeTravel { dataset, version } => {
|
||||
let should_checkout = match &target_ref {
|
||||
refs::Ref::Version(target_ver) => version != target_ver,
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
|
||||
refs::Ref::Version(_, None) => true, // No specific version, always checkout
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
};
|
||||
|
||||
if should_checkout {
|
||||
|
||||
126
rust/lancedb/src/test_connection.rs
Normal file
126
rust/lancedb/src/test_connection.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
//! Functions for testing connections.
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test_utils {
|
||||
use regex::Regex;
|
||||
use std::env;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::process::{Child, ChildStdout, Command, Stdio};
|
||||
|
||||
use crate::{connect, Connection};
|
||||
use anyhow::{bail, Result};
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
pub struct TestConnection {
|
||||
pub uri: String,
|
||||
pub connection: Connection,
|
||||
_temp_dir: Option<TempDir>,
|
||||
_process: Option<TestProcess>,
|
||||
}
|
||||
|
||||
struct TestProcess {
|
||||
child: Child,
|
||||
}
|
||||
|
||||
impl Drop for TestProcess {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
self.child.kill();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn new_test_connection() -> Result<TestConnection> {
|
||||
match env::var("CREATE_LANCEDB_TEST_CONNECTION_SCRIPT") {
|
||||
Ok(script_path) => new_remote_connection(&script_path).await,
|
||||
Err(_e) => new_local_connection().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
|
||||
let temp_dir = tempdir()?;
|
||||
let data_path = temp_dir.path().to_str().unwrap().to_string();
|
||||
let child_result = Command::new(script_path)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.arg(data_path.clone())
|
||||
.spawn();
|
||||
if child_result.is_err() {
|
||||
bail!(format!(
|
||||
"Unable to run {}: {:?}",
|
||||
script_path,
|
||||
child_result.err()
|
||||
));
|
||||
}
|
||||
let mut process = TestProcess {
|
||||
child: child_result.unwrap(),
|
||||
};
|
||||
let stdout = BufReader::new(process.child.stdout.take().unwrap());
|
||||
let port = read_process_port(stdout)?;
|
||||
let uri = "db://test";
|
||||
let host_override = format!("http://localhost:{}", port);
|
||||
let connection = create_new_connection(uri, &host_override).await?;
|
||||
Ok(TestConnection {
|
||||
uri: uri.to_string(),
|
||||
connection,
|
||||
_temp_dir: Some(temp_dir),
|
||||
_process: Some(process),
|
||||
})
|
||||
}
|
||||
|
||||
fn read_process_port(mut stdout: BufReader<ChildStdout>) -> Result<String> {
|
||||
let mut line = String::new();
|
||||
let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
|
||||
loop {
|
||||
let result = stdout.read_line(&mut line);
|
||||
if let Err(err) = result {
|
||||
bail!(format!(
|
||||
"read_process_port: error while reading from process output: {}",
|
||||
err
|
||||
));
|
||||
} else if result.unwrap() == 0 {
|
||||
bail!("read_process_port: hit EOF before reading port from process output.");
|
||||
}
|
||||
if re.is_match(&line) {
|
||||
let caps = re.captures(&line).unwrap();
|
||||
return Ok(caps[1].to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
async fn create_new_connection(
|
||||
uri: &str,
|
||||
host_override: &str,
|
||||
) -> crate::error::Result<Connection> {
|
||||
connect(uri)
|
||||
.region("us-east-1")
|
||||
.api_key("sk_localtest")
|
||||
.host_override(host_override)
|
||||
.execute()
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "remote"))]
|
||||
async fn create_new_connection(
|
||||
_uri: &str,
|
||||
_host_override: &str,
|
||||
) -> crate::error::Result<Connection> {
|
||||
panic!("remote feature not supported");
|
||||
}
|
||||
|
||||
async fn new_local_connection() -> Result<TestConnection> {
|
||||
let temp_dir = tempdir()?;
|
||||
let uri = temp_dir.path().to_str().unwrap();
|
||||
let connection = connect(uri).execute().await?;
|
||||
Ok(TestConnection {
|
||||
uri: uri.to_string(),
|
||||
connection,
|
||||
_temp_dir: Some(temp_dir),
|
||||
_process: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -174,7 +174,7 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
|
||||
),
|
||||
})
|
||||
} else {
|
||||
Ok(candidates[0].to_string())
|
||||
Ok(candidates[0].clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,7 +195,15 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
|
||||
}
|
||||
|
||||
pub fn supported_bitmap_data_type(dtype: &DataType) -> bool {
|
||||
dtype.is_integer() || matches!(dtype, DataType::Utf8)
|
||||
dtype.is_integer()
|
||||
|| matches!(
|
||||
dtype,
|
||||
DataType::Utf8
|
||||
| DataType::LargeUtf8
|
||||
| DataType::Binary
|
||||
| DataType::LargeBinary
|
||||
| DataType::Boolean
|
||||
)
|
||||
}
|
||||
|
||||
pub fn supported_label_list_data_type(dtype: &DataType) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user