mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-26 02:20:40 +00:00
Compare commits
35 Commits
python-v0.
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cca6a7c989 | ||
|
|
ad96489114 | ||
|
|
76429730c0 | ||
|
|
874b74dd3c | ||
|
|
61de47f3a5 | ||
|
|
f4d613565e | ||
|
|
410ab9b6fe | ||
|
|
1d6e00b902 | ||
|
|
a0228036ae | ||
|
|
d8fc071a7d | ||
|
|
e6fd8d071e | ||
|
|
670dcca551 | ||
|
|
ed7e01a58b | ||
|
|
3450ccaf7f | ||
|
|
9b229f1e7c | ||
|
|
f5b21c0aa4 | ||
|
|
e927924d26 | ||
|
|
11a4966bfc | ||
|
|
dd5aaa72dc | ||
|
|
3a200d77ef | ||
|
|
bd09c53938 | ||
|
|
0b18e33180 | ||
|
|
c89240b16c | ||
|
|
099ff355a4 | ||
|
|
c5995fda67 | ||
|
|
25eb1fbfa4 | ||
|
|
4ac41c5c3f | ||
|
|
9a5b0398ec | ||
|
|
d1d720d08a | ||
|
|
c2e543f1b7 | ||
|
|
216c1b5f77 | ||
|
|
fc1867da83 | ||
|
|
f951da2b00 | ||
|
|
6530d82690 | ||
|
|
b3fc9c444f |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.0-beta.4"
|
||||
current_version = "0.27.2-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
6
.github/workflows/dev.yml
vendored
6
.github/workflows/dev.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
name: Label PR
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: srvaroa/labeler@master
|
||||
- uses: srvaroa/labeler@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
commitlint:
|
||||
@@ -24,7 +24,7 @@ jobs:
|
||||
name: Verify PR title / description conforms to semantic-release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-node@v3
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "18"
|
||||
# These rules are disabled because Github will always ensure there
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
${{ github.event.pull_request.body }}
|
||||
- if: failure()
|
||||
uses: actions/github-script@v6
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const message = `**ACTION NEEDED**
|
||||
|
||||
4
.github/workflows/docs.yml
vendored
4
.github/workflows/docs.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
run: |
|
||||
PYTHONPATH=. mkdocs build
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v2
|
||||
uses: actions/configure-pages@v5
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
|
||||
9
.github/workflows/nodejs.yml
vendored
9
.github/workflows/nodejs.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
@@ -37,7 +38,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
@@ -77,7 +78,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js 20 for build
|
||||
with:
|
||||
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
|
||||
@@ -94,7 +95,7 @@ jobs:
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm run build:debug -- --profile ci
|
||||
- uses: actions/setup-node@v3
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js ${{ matrix.node-version }} for test
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
@@ -143,7 +144,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
|
||||
17
.github/workflows/npm-publish.yml
vendored
17
.github/workflows/npm-publish.yml
vendored
@@ -19,6 +19,7 @@ on:
|
||||
paths:
|
||||
- .github/workflows/npm-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -124,7 +125,12 @@ jobs:
|
||||
pre_build: |-
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
# The base image (manylinux2014-cross) sets TARGET_CC to the old
|
||||
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
|
||||
# so it picks up GCC even though the napi-rs image sets CC=clang.
|
||||
# Override to use the image's clang-18 which supports -fuse-ld=lld.
|
||||
export TARGET_CC=clang TARGET_CXX=clang++
|
||||
- target: x86_64-unknown-linux-musl
|
||||
# This one seems to need some extra memory
|
||||
host: ubuntu-2404-8x-x64
|
||||
@@ -144,9 +150,10 @@ jobs:
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
|
||||
export TARGET_CC=clang TARGET_CXX=clang++ &&
|
||||
# The manylinux2014 sysroot has glibc 2.17 headers which lack
|
||||
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
|
||||
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
|
||||
rustup target add aarch64-unknown-linux-gnu
|
||||
- target: aarch64-unknown-linux-musl
|
||||
host: ubuntu-2404-8x-x64
|
||||
@@ -266,7 +273,7 @@ jobs:
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
host: ubuntu-latest
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
host: buildjet-16vcpu-ubuntu-2204-arm
|
||||
host: ubuntu-2404-8x-arm64
|
||||
node:
|
||||
- '20'
|
||||
runs-on: ${{ matrix.settings.host }}
|
||||
|
||||
1
.github/workflows/pypi-publish.yml
vendored
1
.github/workflows/pypi-publish.yml
vendored
@@ -9,6 +9,7 @@ on:
|
||||
paths:
|
||||
- .github/workflows/pypi-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
17
.github/workflows/rust.yml
vendored
17
.github/workflows/rust.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
@@ -206,14 +207,14 @@ jobs:
|
||||
- name: Downgrade dependencies
|
||||
# These packages have newer requirements for MSRV
|
||||
run: |
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.64.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.55.0
|
||||
cargo update -p aws-config --precise 1.5.10
|
||||
cargo update -p aws-sdk-kms --precise 1.51.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.65.0
|
||||
cargo update -p aws-sdk-sso --precise 1.50.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.51.0
|
||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.77.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.68.0
|
||||
cargo update -p aws-config --precise 1.6.0
|
||||
cargo update -p aws-sdk-kms --precise 1.63.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.79.0
|
||||
cargo update -p aws-sdk-sso --precise 1.62.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.63.0
|
||||
cargo update -p aws-sdk-sts --precise 1.63.0
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
env:
|
||||
|
||||
2379
Cargo.lock
generated
2379
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=4.0.0-rc.3", default-features = false, "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=4.0.0-rc.3", default-features = false, "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=4.0.0-rc.3", default-features = false, "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=4.0.0-rc.3", "tag" = "v4.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -26,6 +27,7 @@ SEMVER_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
@dataclass(frozen=True)
|
||||
class SemVer:
|
||||
major: int
|
||||
@@ -156,7 +158,9 @@ def read_current_version(repo_root: Path) -> str:
|
||||
|
||||
|
||||
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
||||
return max(tags, key=lambda tag: tag.semver)
|
||||
# Stable releases (no prerelease) are always preferred over pre-releases.
|
||||
# Within each group, standard semver ordering applies.
|
||||
return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver))
|
||||
|
||||
|
||||
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
localstack:
|
||||
image: localstack/localstack:3.3
|
||||
image: localstack/localstack:4.0
|
||||
ports:
|
||||
- 4566:4566
|
||||
environment:
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
#Usage docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.10-slim-buster
|
||||
# Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
# Usage: docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
# Install Rust
|
||||
RUN apt-get update && apt-get install -y curl build-essential && \
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
|
||||
# Set the environment variable for Rust
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Install protobuf compiler
|
||||
RUN apt-get install -y protobuf-compiler && \
|
||||
# Install build dependencies in a single layer
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
build-essential \
|
||||
protobuf-compiler \
|
||||
git \
|
||||
ca-certificates && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apt-get -y update &&\
|
||||
apt-get -y upgrade && \
|
||||
apt-get -y install git
|
||||
# Install Rust (pinned installer, non-interactive)
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
|
||||
# Set the environment variable for Rust
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Verify installations
|
||||
RUN python --version && \
|
||||
rustc --version && \
|
||||
protoc --version
|
||||
|
||||
RUN pip install tantivy lancedb
|
||||
RUN pip install --no-cache-dir tantivy lancedb
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
mkdocs==1.5.3
|
||||
mkdocs==1.6.1
|
||||
mkdocs-jupyter==0.24.1
|
||||
mkdocs-material==9.5.3
|
||||
mkdocs-autorefs<=1.0
|
||||
mkdocstrings[python]==0.25.2
|
||||
griffe
|
||||
mkdocs-render-swagger-plugin
|
||||
pydantic
|
||||
mkdocs-redirects
|
||||
mkdocs-material==9.6.23
|
||||
mkdocs-autorefs>=0.5,<=1.0
|
||||
mkdocstrings[python]>=0.24,<1.0
|
||||
griffe>=0.40,<1.0
|
||||
mkdocs-render-swagger-plugin>=0.1.0
|
||||
pydantic>=2.0,<3.0
|
||||
mkdocs-redirects>=1.2.0
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.27.0-beta.4</version>
|
||||
<version>0.27.2-beta.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -71,11 +71,12 @@ Add new columns with defined values.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
pairs of column names and
|
||||
the SQL expression to use to calculate the value of the new column. These
|
||||
expressions will be evaluated for each row in the table, and can
|
||||
reference existing columns in the table.
|
||||
* **newColumnTransforms**: `Field`<`any`> \| `Field`<`any`>[] \| `Schema`<`any`> \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
Either:
|
||||
- An array of objects with column names and SQL expressions to calculate values
|
||||
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -484,19 +485,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
||||
- Prune: Removes old versions of the dataset
|
||||
- Index: Optimizes the indices, adding new data to existing indices
|
||||
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
|
||||
@@ -37,3 +37,12 @@ tbl.optimize({cleanupOlderThan: new Date()});
|
||||
```ts
|
||||
deleteUnverified: boolean;
|
||||
```
|
||||
|
||||
Because they may be part of an in-progress transaction, files newer than
|
||||
7 days old are not deleted by default. If you are sure that there are no
|
||||
in-progress transactions, then you can set this to true to delete all
|
||||
files older than `cleanupOlderThan`.
|
||||
|
||||
**WARNING**: This should only be set to true if you can guarantee that
|
||||
no other process is currently working on this dataset. Otherwise the
|
||||
dataset could be put into a corrupted state.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.0-beta.4</version>
|
||||
<version>0.27.2-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
@@ -56,21 +56,21 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<version>2.25.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<version>2.25.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<version>2.25.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
20
java/pom.xml
20
java/pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.0-beta.4</version>
|
||||
<version>0.27.2-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>3.1.0-beta.2</lance-core.version>
|
||||
<lance-core.version>3.0.1</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
@@ -111,7 +111,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>2.2.1</version>
|
||||
<version>3.3.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
@@ -124,7 +124,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>2.9.1</version>
|
||||
<version>3.11.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-javadocs</id>
|
||||
@@ -178,15 +178,15 @@
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<version>3.4.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
<version>3.3.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<version>3.14.0</version>
|
||||
<configuration>
|
||||
<compilerArgs>
|
||||
<arg>-h</arg>
|
||||
@@ -205,11 +205,11 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
<version>3.4.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>2.5.2</version>
|
||||
<version>3.1.3</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>com.diffplug.spotless</groupId>
|
||||
@@ -327,7 +327,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>1.5</version>
|
||||
<version>3.2.7</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.27.0-beta.4"
|
||||
version = "0.27.2-beta.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -25,12 +25,12 @@ napi = { version = "3.8.3", default-features = false, features = [
|
||||
] }
|
||||
napi-derive = "3.5.2"
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
lzma-sys = { version = "*", features = ["static"] }
|
||||
lzma-sys = { version = "0.1", features = ["static"] }
|
||||
log.workspace = true
|
||||
|
||||
# Workaround for build failure until we can fix it.
|
||||
aws-lc-sys = "=0.28.0"
|
||||
aws-lc-rs = "=1.13.0"
|
||||
# Pin to resolve build failures; update periodically for security patches.
|
||||
aws-lc-sys = "=0.38.0"
|
||||
aws-lc-rs = "=1.16.1"
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.3.1"
|
||||
|
||||
@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can add columns with schema for explicit data types", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Define schema for new columns with explicit data types
|
||||
// Note: All columns must be nullable when using addColumns with Schema
|
||||
// because they are initially populated with null values
|
||||
const newColumnsSchema = new Schema([
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
new Field("rating", new Int32(), true),
|
||||
]);
|
||||
|
||||
const result = await table.addColumns(newColumnsSchema);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
new Field("rating", new Int32(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
|
||||
// Verify that new columns are populated with null values
|
||||
const results = await table.query().toArray();
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].price).toBeNull();
|
||||
expect(results[0].category).toBeNull();
|
||||
expect(results[0].rating).toBeNull();
|
||||
});
|
||||
|
||||
it("can add a single column using Field", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Add a single field
|
||||
const priceField = new Field("price", new Float64(), true);
|
||||
const result = await table.addColumns(priceField);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can add multiple columns using array of Fields", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Add multiple fields as array
|
||||
const fields = [
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
];
|
||||
const result = await table.addColumns(fields);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can alter the columns in the schema", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const schema = new Schema([
|
||||
|
||||
599
nodejs/examples/package-lock.json
generated
599
nodejs/examples/package-lock.json
generated
@@ -30,12 +30,15 @@
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
@@ -91,14 +94,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
"version": "7.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
||||
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
|
||||
"integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-validator-identifier": "^7.25.9",
|
||||
"@babel/helper-validator-identifier": "^7.28.5",
|
||||
"js-tokens": "^4.0.0",
|
||||
"picocolors": "^1.0.0"
|
||||
"picocolors": "^1.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -233,19 +237,21 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helper-string-parser": {
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
||||
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
|
||||
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helper-validator-identifier": {
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
||||
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
||||
"version": "7.28.5",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
|
||||
"integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
@@ -260,25 +266,27 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helpers": {
|
||||
"version": "7.26.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.0.tgz",
|
||||
"integrity": "sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw==",
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.6.tgz",
|
||||
"integrity": "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/template": "^7.25.9",
|
||||
"@babel/types": "^7.26.0"
|
||||
"@babel/template": "^7.28.6",
|
||||
"@babel/types": "^7.28.6"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/parser": {
|
||||
"version": "7.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz",
|
||||
"integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz",
|
||||
"integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/types": "^7.26.0"
|
||||
"@babel/types": "^7.29.0"
|
||||
},
|
||||
"bin": {
|
||||
"parser": "bin/babel-parser.js"
|
||||
@@ -510,14 +518,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/template": {
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
|
||||
"integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
|
||||
"integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.25.9",
|
||||
"@babel/parser": "^7.25.9",
|
||||
"@babel/types": "^7.25.9"
|
||||
"@babel/code-frame": "^7.28.6",
|
||||
"@babel/parser": "^7.28.6",
|
||||
"@babel/types": "^7.28.6"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -542,13 +551,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/types": {
|
||||
"version": "7.26.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz",
|
||||
"integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
|
||||
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-string-parser": "^7.25.9",
|
||||
"@babel/helper-validator-identifier": "^7.25.9"
|
||||
"@babel/helper-string-parser": "^7.27.1",
|
||||
"@babel/helper-validator-identifier": "^7.28.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -1151,95 +1161,6 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui": {
|
||||
"version": "8.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
|
||||
"integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
|
||||
"dependencies": {
|
||||
"string-width": "^5.1.2",
|
||||
"string-width-cjs": "npm:string-width@^4.2.0",
|
||||
"strip-ansi": "^7.0.1",
|
||||
"strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
|
||||
"wrap-ansi": "^8.1.0",
|
||||
"wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/ansi-regex": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
|
||||
"integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/ansi-styles": {
|
||||
"version": "6.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
|
||||
"integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/emoji-regex": {
|
||||
"version": "9.2.2",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
|
||||
"integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/string-width": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
|
||||
"integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
|
||||
"dependencies": {
|
||||
"eastasianwidth": "^0.2.0",
|
||||
"emoji-regex": "^9.2.2",
|
||||
"strip-ansi": "^7.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/strip-ansi": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
|
||||
"integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
|
||||
"integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^6.1.0",
|
||||
"string-width": "^5.0.1",
|
||||
"strip-ansi": "^7.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/fs-minipass": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
||||
@@ -1606,15 +1527,6 @@
|
||||
"resolved": "../dist",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@pkgjs/parseargs": {
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
||||
"integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
@@ -1846,6 +1758,7 @@
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -1854,6 +1767,7 @@
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
@@ -2019,13 +1933,15 @@
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2102,6 +2018,19 @@
|
||||
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/call-bind-apply-helpers": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
|
||||
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"function-bind": "^1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/callsites": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
||||
@@ -2298,9 +2227,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "7.0.3",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"path-key": "^3.1.0",
|
||||
"shebang-command": "^2.0.0",
|
||||
@@ -2384,10 +2315,19 @@
|
||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eastasianwidth": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
|
||||
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
|
||||
"node_modules/dunder-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"gopd": "^1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/ejs": {
|
||||
"version": "3.1.10",
|
||||
@@ -2425,7 +2365,8 @@
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/error-ex": {
|
||||
"version": "1.3.2",
|
||||
@@ -2442,6 +2383,51 @@
|
||||
"integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/es-define-property": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-errors": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
|
||||
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-object-atoms": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
|
||||
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-set-tostringtag": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
|
||||
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"get-intrinsic": "^1.2.6",
|
||||
"has-tostringtag": "^1.0.2",
|
||||
"hasown": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
@@ -2554,19 +2540,21 @@
|
||||
}
|
||||
},
|
||||
"node_modules/filelist/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/filelist/node_modules/minimatch": {
|
||||
"version": "5.1.6",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
|
||||
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
|
||||
"version": "5.1.9",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz",
|
||||
"integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
@@ -2604,39 +2592,16 @@
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||
},
|
||||
"node_modules/foreground-child": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
|
||||
"integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
|
||||
"dependencies": {
|
||||
"cross-spawn": "^7.0.0",
|
||||
"signal-exit": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/foreground-child/node_modules/signal-exit": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
|
||||
"integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
|
||||
"integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
|
||||
"version": "4.0.5",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
|
||||
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2684,7 +2649,6 @@
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
||||
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
|
||||
"dev": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
@@ -2707,6 +2671,30 @@
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/get-intrinsic": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
|
||||
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.2",
|
||||
"es-define-property": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"es-object-atoms": "^1.1.1",
|
||||
"function-bind": "^1.1.2",
|
||||
"get-proto": "^1.0.1",
|
||||
"gopd": "^1.2.0",
|
||||
"has-symbols": "^1.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"math-intrinsics": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/get-package-type": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
|
||||
@@ -2716,6 +2704,19 @@
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dunder-proto": "^1.0.1",
|
||||
"es-object-atoms": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stream": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
|
||||
@@ -2758,6 +2759,18 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/gopd": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
|
||||
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/graceful-fs": {
|
||||
"version": "4.2.11",
|
||||
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
|
||||
@@ -2778,11 +2791,37 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/has-symbols": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
|
||||
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/has-tostringtag": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
|
||||
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"has-symbols": "^1.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/hasown": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
||||
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"function-bind": "^1.1.2"
|
||||
},
|
||||
@@ -2882,6 +2921,7 @@
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -2919,7 +2959,8 @@
|
||||
"node_modules/isexe": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
|
||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/istanbul-lib-coverage": {
|
||||
"version": "3.2.2",
|
||||
@@ -2987,20 +3028,6 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/jackspeak": {
|
||||
"version": "3.4.3",
|
||||
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
|
||||
"integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
|
||||
"dependencies": {
|
||||
"@isaacs/cliui": "^8.0.2"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@pkgjs/parseargs": "^0.11.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jake": {
|
||||
"version": "10.9.2",
|
||||
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.2.tgz",
|
||||
@@ -3605,10 +3632,11 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
"version": "3.14.1",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
|
||||
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
|
||||
"version": "3.14.2",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
|
||||
"integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"argparse": "^1.0.7",
|
||||
"esprima": "^4.0.0"
|
||||
@@ -3728,6 +3756,15 @@
|
||||
"tmpl": "1.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/merge-stream": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
||||
@@ -3776,10 +3813,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"version": "3.1.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
|
||||
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
@@ -3796,31 +3834,17 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minizlib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
|
||||
"integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
|
||||
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"minipass": "^7.0.4",
|
||||
"rimraf": "^5.0.5"
|
||||
"minipass": "^7.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/mkdirp": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
|
||||
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
|
||||
"bin": {
|
||||
"mkdirp": "dist/cjs/src/bin.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
@@ -4010,11 +4034,6 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/package-json-from-dist": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz",
|
||||
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="
|
||||
},
|
||||
"node_modules/parse-json": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
|
||||
@@ -4055,6 +4074,7 @@
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
|
||||
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -4065,26 +4085,6 @@
|
||||
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/path-scurry": {
|
||||
"version": "1.11.1",
|
||||
"resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
|
||||
"integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
|
||||
"dependencies": {
|
||||
"lru-cache": "^10.2.0",
|
||||
"minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/path-scurry/node_modules/lru-cache": {
|
||||
"version": "10.4.3",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
|
||||
"integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
|
||||
},
|
||||
"node_modules/picocolors": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||
@@ -4246,61 +4246,6 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "5.0.10",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
|
||||
"integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
|
||||
"dependencies": {
|
||||
"glob": "^10.3.7"
|
||||
},
|
||||
"bin": {
|
||||
"rimraf": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/glob": {
|
||||
"version": "10.4.5",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
|
||||
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
|
||||
"dependencies": {
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/minimatch": {
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.6.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
|
||||
@@ -4354,6 +4299,7 @@
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
||||
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"shebang-regex": "^3.0.0"
|
||||
},
|
||||
@@ -4365,6 +4311,7 @@
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
|
||||
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -4452,20 +4399,7 @@
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width-cjs": {
|
||||
"name": "string-width",
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
@@ -4479,18 +4413,7 @@
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi-cjs": {
|
||||
"name": "strip-ansi",
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
@@ -4541,15 +4464,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tar": {
|
||||
"version": "7.4.3",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
|
||||
"integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
|
||||
"version": "7.5.10",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
|
||||
"integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
"@isaacs/fs-minipass": "^4.0.0",
|
||||
"chownr": "^3.0.0",
|
||||
"minipass": "^7.1.2",
|
||||
"minizlib": "^3.0.1",
|
||||
"mkdirp": "^3.0.1",
|
||||
"minizlib": "^3.1.0",
|
||||
"yallist": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
@@ -4782,6 +4705,7 @@
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"isexe": "^2.0.0"
|
||||
},
|
||||
@@ -4809,23 +4733,6 @@
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrap-ansi-cjs": {
|
||||
"name": "wrap-ansi",
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
|
||||
@@ -5,12 +5,15 @@ import {
|
||||
Table as ArrowTable,
|
||||
Data,
|
||||
DataType,
|
||||
Field,
|
||||
IntoVector,
|
||||
MultiVector,
|
||||
Schema,
|
||||
dataTypeToJson,
|
||||
fromDataToBuffer,
|
||||
fromTableToBuffer,
|
||||
isMultiVector,
|
||||
makeEmptyTable,
|
||||
tableFromIPC,
|
||||
} from "./arrow";
|
||||
|
||||
@@ -84,6 +87,16 @@ export interface OptimizeOptions {
|
||||
* tbl.optimize({cleanupOlderThan: new Date()});
|
||||
*/
|
||||
cleanupOlderThan: Date;
|
||||
/**
|
||||
* Because they may be part of an in-progress transaction, files newer than
|
||||
* 7 days old are not deleted by default. If you are sure that there are no
|
||||
* in-progress transactions, then you can set this to true to delete all
|
||||
* files older than `cleanupOlderThan`.
|
||||
*
|
||||
* **WARNING**: This should only be set to true if you can guarantee that
|
||||
* no other process is currently working on this dataset. Otherwise the
|
||||
* dataset could be put into a corrupted state.
|
||||
*/
|
||||
deleteUnverified: boolean;
|
||||
}
|
||||
|
||||
@@ -381,15 +394,16 @@ export abstract class Table {
|
||||
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
|
||||
/**
|
||||
* Add new columns with defined values.
|
||||
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
||||
* the SQL expression to use to calculate the value of the new column. These
|
||||
* expressions will be evaluated for each row in the table, and can
|
||||
* reference existing columns in the table.
|
||||
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
|
||||
* - An array of objects with column names and SQL expressions to calculate values
|
||||
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table after adding the columns.
|
||||
*/
|
||||
abstract addColumns(
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||
): Promise<AddColumnsResult>;
|
||||
|
||||
/**
|
||||
@@ -501,19 +515,7 @@ export abstract class Table {
|
||||
* - Index: Optimizes the indices, adding new data to existing indices
|
||||
*
|
||||
*
|
||||
* Experimental API
|
||||
* ----------------
|
||||
*
|
||||
* The optimization process is undergoing active development and may change.
|
||||
* Our goal with these changes is to improve the performance of optimization and
|
||||
* reduce the complexity.
|
||||
*
|
||||
* That being said, it is essential today to run optimize if you want the best
|
||||
* performance. It should be stable and safe to use in production, but it our
|
||||
* hope that the API may be simplified (or not even need to be called) in the
|
||||
* future.
|
||||
*
|
||||
* The frequency an application shoudl call optimize is based on the frequency of
|
||||
* The frequency an application should call optimize is based on the frequency of
|
||||
* data modifications. If data is frequently added, deleted, or updated then
|
||||
* optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
* you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -806,9 +808,40 @@ export class LocalTable extends Table {
|
||||
// TODO: Support BatchUDF
|
||||
|
||||
async addColumns(
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||
): Promise<AddColumnsResult> {
|
||||
return await this.inner.addColumns(newColumnTransforms);
|
||||
// Handle single Field -> convert to array of Fields
|
||||
if (newColumnTransforms instanceof Field) {
|
||||
newColumnTransforms = [newColumnTransforms];
|
||||
}
|
||||
|
||||
// Handle array of Fields -> convert to Schema
|
||||
if (
|
||||
Array.isArray(newColumnTransforms) &&
|
||||
newColumnTransforms.length > 0 &&
|
||||
newColumnTransforms[0] instanceof Field
|
||||
) {
|
||||
const fields = newColumnTransforms as Field[];
|
||||
newColumnTransforms = new Schema(fields);
|
||||
}
|
||||
|
||||
// Handle Schema -> use schema-based approach
|
||||
if (newColumnTransforms instanceof Schema) {
|
||||
const schema = newColumnTransforms;
|
||||
// Convert schema to buffer using Arrow IPC format
|
||||
const emptyTable = makeEmptyTable(schema);
|
||||
const schemaBuf = await fromTableToBuffer(emptyTable);
|
||||
return await this.inner.addColumnsWithSchema(schemaBuf);
|
||||
}
|
||||
|
||||
// Handle SQL expressions (existing functionality)
|
||||
if (Array.isArray(newColumnTransforms)) {
|
||||
return await this.inner.addColumns(
|
||||
newColumnTransforms as AddColumnsSql[],
|
||||
);
|
||||
}
|
||||
|
||||
throw new Error("Invalid input type for addColumns");
|
||||
}
|
||||
|
||||
async alterColumns(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4289
nodejs/package-lock.json
generated
4289
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.27.0-beta.4",
|
||||
"version": "0.27.2-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use lancedb::ipc::ipc_file_to_batches;
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
use lancedb::table::{
|
||||
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
|
||||
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
||||
@@ -279,6 +279,23 @@ impl Table {
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn add_columns_with_schema(
|
||||
&self,
|
||||
schema_buf: Buffer,
|
||||
) -> napi::Result<AddColumnsResult> {
|
||||
let schema = ipc_file_to_schema(schema_buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
|
||||
|
||||
let transforms = NewColumnTransform::AllNulls(schema);
|
||||
let res = self
|
||||
.inner_ref()?
|
||||
.add_columns(transforms, None)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn alter_columns(
|
||||
&self,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.0-beta.5"
|
||||
current_version = "0.30.2-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.0-beta.5"
|
||||
version = "0.30.2-beta.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -23,6 +23,7 @@ lance-namespace.workspace = true
|
||||
lance-namespace-impls.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
"attributes",
|
||||
|
||||
@@ -3,10 +3,10 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"numpy",
|
||||
"deprecation>=2.1.0",
|
||||
"numpy>=1.24.0",
|
||||
"overrides>=0.7; python_version<'3.12'",
|
||||
"packaging",
|
||||
"packaging>=23.0",
|
||||
"pyarrow>=16",
|
||||
"pydantic>=1.10",
|
||||
"tqdm>=4.27.0",
|
||||
@@ -48,48 +48,48 @@ pylance = [
|
||||
"pylance>=4.0.0b7",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp",
|
||||
"boto3",
|
||||
"aiohttp>=3.9.0",
|
||||
"boto3>=1.28.57",
|
||||
"pandas>=1.4",
|
||||
"pytest",
|
||||
"pytest-mock",
|
||||
"pytest-asyncio",
|
||||
"duckdb",
|
||||
"pytz",
|
||||
"pytest>=7.0",
|
||||
"pytest-mock>=3.10",
|
||||
"pytest-asyncio>=0.21",
|
||||
"duckdb>=0.9.0",
|
||||
"pytz>=2023.3",
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"tantivy>=0.20.0",
|
||||
"pyarrow-stubs>=16.0",
|
||||
"pylance>=4.0.0b7",
|
||||
"requests",
|
||||
"requests>=2.31.0",
|
||||
"datafusion>=52,<53",
|
||||
]
|
||||
dev = [
|
||||
"ruff",
|
||||
"pre-commit",
|
||||
"pyright",
|
||||
"ruff>=0.3.0",
|
||||
"pre-commit>=3.5.0",
|
||||
"pyright>=1.1.350",
|
||||
'typing-extensions>=4.0.0; python_version < "3.11"',
|
||||
]
|
||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
|
||||
clip = ["torch", "pillow", "open-clip-torch"]
|
||||
siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
|
||||
clip = ["torch", "pillow>=12.1.1", "open-clip-torch"]
|
||||
siglip = ["torch", "pillow>=12.1.1", "transformers>=4.41.0","sentencepiece"]
|
||||
embeddings = [
|
||||
"requests>=2.31.0",
|
||||
"openai>=1.6.1",
|
||||
"sentence-transformers",
|
||||
"torch",
|
||||
"pillow",
|
||||
"open-clip-torch",
|
||||
"cohere",
|
||||
"sentence-transformers>=2.2.0",
|
||||
"torch>=2.0.0",
|
||||
"pillow>=12.1.1",
|
||||
"open-clip-torch>=2.20.0",
|
||||
"cohere>=4.0",
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub",
|
||||
"InstructorEmbedding",
|
||||
"google.generativeai",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google.generativeai>=0.3.0",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.29.57",
|
||||
"awscli>=1.44.38",
|
||||
"botocore>=1.31.57",
|
||||
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
|
||||
"ollama>=0.3.0",
|
||||
"sentencepiece"
|
||||
"sentencepiece>=0.1.99"
|
||||
]
|
||||
azure = ["adlfs>=2024.2.0"]
|
||||
|
||||
|
||||
@@ -135,7 +135,10 @@ class Table:
|
||||
def close(self) -> None: ...
|
||||
async def schema(self) -> pa.Schema: ...
|
||||
async def add(
|
||||
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
|
||||
self,
|
||||
data: pa.RecordBatchReader,
|
||||
mode: Literal["append", "overwrite"],
|
||||
progress: Optional[Any] = None,
|
||||
) -> AddResult: ...
|
||||
async def update(
|
||||
self, updates: Dict[str, str], where: Optional[str]
|
||||
@@ -166,6 +169,8 @@ class Table:
|
||||
async def checkout(self, version: Union[int, str]): ...
|
||||
async def checkout_latest(self): ...
|
||||
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
||||
async def prewarm_index(self, index_name: str) -> None: ...
|
||||
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
|
||||
async def list_indices(self) -> list[IndexConfig]: ...
|
||||
async def delete(self, filter: str) -> DeleteResult: ...
|
||||
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
||||
|
||||
@@ -70,7 +70,7 @@ def ensure_vector_query(
|
||||
) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]:
|
||||
if isinstance(val, list):
|
||||
if len(val) == 0:
|
||||
return ValueError("Vector query must be a non-empty list")
|
||||
raise ValueError("Vector query must be a non-empty list")
|
||||
sample = val[0]
|
||||
else:
|
||||
if isinstance(val, float):
|
||||
@@ -83,7 +83,7 @@ def ensure_vector_query(
|
||||
return val
|
||||
if isinstance(sample, list):
|
||||
if len(sample) == 0:
|
||||
return ValueError("Vector query must be a non-empty list")
|
||||
raise ValueError("Vector query must be a non-empty list")
|
||||
if isinstance(sample[0], float):
|
||||
# val is list of list of floats
|
||||
return val
|
||||
@@ -2205,8 +2205,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._vector_query.select(self._columns)
|
||||
self._fts_query.select(self._columns)
|
||||
if self._where:
|
||||
self._vector_query.where(self._where, self._postfilter)
|
||||
self._fts_query.where(self._where, self._postfilter)
|
||||
self._vector_query.where(self._where, not self._postfilter)
|
||||
self._fts_query.where(self._where, not self._postfilter)
|
||||
if self._with_row_id:
|
||||
self._vector_query.with_row_id(True)
|
||||
self._fts_query.with_row_id(True)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
from functools import cached_property
|
||||
from typing import Dict, Iterable, List, Optional, Union, Literal
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Union, Literal
|
||||
import warnings
|
||||
|
||||
from lancedb._lancedb import (
|
||||
@@ -35,6 +35,7 @@ import pyarrow as pa
|
||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from lancedb.merge import LanceMergeInsertBuilder
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
from lancedb.table import _normalize_progress
|
||||
|
||||
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
|
||||
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
||||
@@ -308,6 +309,7 @@ class RemoteTable(Table):
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||
) -> AddResult:
|
||||
"""Add more data to the [Table](Table). It has the same API signature as
|
||||
the OSS version.
|
||||
@@ -330,17 +332,29 @@ class RemoteTable(Table):
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
progress: bool, callable, or tqdm-like, optional
|
||||
A callback or tqdm-compatible progress bar. See
|
||||
:meth:`Table.add` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddResult
|
||||
An object containing the new version number of the table after adding data.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
progress, owns = _normalize_progress(progress)
|
||||
try:
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data,
|
||||
mode=mode,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
progress=progress,
|
||||
)
|
||||
)
|
||||
)
|
||||
finally:
|
||||
if owns:
|
||||
progress.close()
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -640,6 +654,45 @@ class RemoteTable(Table):
|
||||
def drop_index(self, index_name: str):
|
||||
return LOOP.run(self._table.drop_index(index_name))
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""Prewarm an index in the table.
|
||||
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_data(columns))
|
||||
|
||||
def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
):
|
||||
|
||||
@@ -14,6 +14,7 @@ from functools import cached_property
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
@@ -556,6 +557,21 @@ def _table_uri(base: str, table_name: str) -> str:
|
||||
return join_uri(base, f"{table_name}.lance")
|
||||
|
||||
|
||||
def _normalize_progress(progress):
|
||||
"""Normalize a ``progress`` parameter for :meth:`Table.add`.
|
||||
|
||||
Returns ``(progress_obj, owns)`` where *owns* is True when we created a
|
||||
tqdm bar that the caller must close.
|
||||
"""
|
||||
if progress is True:
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
return tqdm(unit=" rows"), True
|
||||
if progress is False or progress is None:
|
||||
return None, False
|
||||
return progress, False
|
||||
|
||||
|
||||
class Table(ABC):
|
||||
"""
|
||||
A Table is a collection of Records in a LanceDB Database.
|
||||
@@ -974,6 +990,7 @@ class Table(ABC):
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||
) -> AddResult:
|
||||
"""Add more data to the [Table](Table).
|
||||
|
||||
@@ -995,6 +1012,29 @@ class Table(ABC):
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
progress: bool, callable, or tqdm-like, optional
|
||||
Progress reporting during the add operation. Can be:
|
||||
|
||||
- ``True`` to automatically create and display a tqdm progress
|
||||
bar (requires ``tqdm`` to be installed)::
|
||||
|
||||
table.add(data, progress=True)
|
||||
|
||||
- A **callable** that receives a dict with keys ``output_rows``,
|
||||
``output_bytes``, ``total_rows``, ``elapsed_seconds``,
|
||||
``active_tasks``, ``total_tasks``, and ``done``::
|
||||
|
||||
def on_progress(p):
|
||||
print(f"{p['output_rows']}/{p['total_rows']} rows, "
|
||||
f"{p['active_tasks']}/{p['total_tasks']} workers")
|
||||
table.add(data, progress=on_progress)
|
||||
|
||||
- A **tqdm-compatible** progress bar whose ``total`` and
|
||||
``update()`` will be called automatically. The postfix shows
|
||||
write throughput (MB/s) and active worker count::
|
||||
|
||||
with tqdm() as pbar:
|
||||
table.add(data, progress=pbar)
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1506,22 +1546,17 @@ class Table(ABC):
|
||||
in-progress operation (e.g. appending new data) and these files will not
|
||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||
then these files will be deleted regardless of their age.
|
||||
|
||||
.. warning::
|
||||
|
||||
This should only be set to True if you can guarantee that no other
|
||||
process is currently working on this dataset. Otherwise the dataset
|
||||
could be put into a corrupted state.
|
||||
|
||||
retrain: bool, default False
|
||||
This parameter is no longer used and is deprecated.
|
||||
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -2219,12 +2254,18 @@ class LanceTable(Table):
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""
|
||||
Prewarms an index in the table
|
||||
Prewarm an index in the table.
|
||||
|
||||
This loads the entire index into memory
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
If the index does not fit into the available cache this call
|
||||
may be wasteful
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
It is generally wasteful to call this if the index does not fit into the
|
||||
available cache. Not all index types support prewarming; unsupported
|
||||
indices will silently ignore the request.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -2233,6 +2274,29 @@ class LanceTable(Table):
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_data(columns))
|
||||
|
||||
def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
) -> None:
|
||||
@@ -2468,6 +2532,7 @@ class LanceTable(Table):
|
||||
mode: AddMode = "append",
|
||||
on_bad_vectors: OnBadVectorsType = "error",
|
||||
fill_value: float = 0.0,
|
||||
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||
) -> AddResult:
|
||||
"""Add data to the table.
|
||||
If vector columns are missing and the table
|
||||
@@ -2486,17 +2551,29 @@ class LanceTable(Table):
|
||||
One of "error", "drop", "fill", "null".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
progress: bool, callable, or tqdm-like, optional
|
||||
A callback or tqdm-compatible progress bar. See
|
||||
:meth:`Table.add` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
The number of vectors in the table.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
progress, owns = _normalize_progress(progress)
|
||||
try:
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data,
|
||||
mode=mode,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
progress=progress,
|
||||
)
|
||||
)
|
||||
)
|
||||
finally:
|
||||
if owns:
|
||||
progress.close()
|
||||
|
||||
def merge(
|
||||
self,
|
||||
@@ -3018,22 +3095,17 @@ class LanceTable(Table):
|
||||
in-progress operation (e.g. appending new data) and these files will not
|
||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||
then these files will be deleted regardless of their age.
|
||||
|
||||
.. warning::
|
||||
|
||||
This should only be set to True if you can guarantee that no other
|
||||
process is currently working on this dataset. Otherwise the dataset
|
||||
could be put into a corrupted state.
|
||||
|
||||
retrain: bool, default False
|
||||
This parameter is no longer used and is deprecated.
|
||||
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -3634,19 +3706,47 @@ class AsyncTable:
|
||||
"""
|
||||
Prewarm an index in the table.
|
||||
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
It is generally wasteful to call this if the index does not fit into the
|
||||
available cache. Not all index types support prewarming; unsupported
|
||||
indices will silently ignore the request.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
|
||||
Notes
|
||||
-----
|
||||
This will load the index into memory. This may reduce the cold-start time for
|
||||
future queries. If the index does not fit in the cache then this call may be
|
||||
wasteful.
|
||||
"""
|
||||
await self._inner.prewarm_index(name)
|
||||
|
||||
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
await self._inner.prewarm_data(columns)
|
||||
|
||||
async def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
) -> None:
|
||||
@@ -3722,6 +3822,7 @@ class AsyncTable:
|
||||
mode: Optional[Literal["append", "overwrite"]] = "append",
|
||||
on_bad_vectors: Optional[OnBadVectorsType] = None,
|
||||
fill_value: Optional[float] = None,
|
||||
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||
) -> AddResult:
|
||||
"""Add more data to the [Table](Table).
|
||||
|
||||
@@ -3743,6 +3844,9 @@ class AsyncTable:
|
||||
One of "error", "drop", "fill", "null".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
progress: callable or tqdm-like, optional
|
||||
A callback or tqdm-compatible progress bar. See
|
||||
:meth:`Table.add` for details.
|
||||
|
||||
"""
|
||||
schema = await self.schema()
|
||||
@@ -3766,8 +3870,9 @@ class AsyncTable:
|
||||
)
|
||||
_register_optional_converters()
|
||||
data = to_scannable(data)
|
||||
progress, owns = _normalize_progress(progress)
|
||||
try:
|
||||
return await self._inner.add(data, mode or "append")
|
||||
return await self._inner.add(data, mode or "append", progress=progress)
|
||||
except RuntimeError as e:
|
||||
if "Cast error" in str(e):
|
||||
raise ValueError(e)
|
||||
@@ -3775,6 +3880,9 @@ class AsyncTable:
|
||||
raise ValueError(e)
|
||||
else:
|
||||
raise
|
||||
finally:
|
||||
if owns:
|
||||
progress.close()
|
||||
|
||||
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
||||
"""
|
||||
@@ -4573,22 +4681,17 @@ class AsyncTable:
|
||||
in-progress operation (e.g. appending new data) and these files will not
|
||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||
then these files will be deleted regardless of their age.
|
||||
|
||||
.. warning::
|
||||
|
||||
This should only be set to True if you can guarantee that no other
|
||||
process is currently working on this dataset. Otherwise the dataset
|
||||
could be put into a corrupted state.
|
||||
|
||||
retrain: bool, default False
|
||||
This parameter is no longer used and is deprecated.
|
||||
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -4709,7 +4812,16 @@ class IndexStatistics:
|
||||
num_indexed_rows: int
|
||||
num_unindexed_rows: int
|
||||
index_type: Literal[
|
||||
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
|
||||
"IVF_FLAT",
|
||||
"IVF_SQ",
|
||||
"IVF_PQ",
|
||||
"IVF_RQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"FTS",
|
||||
"BTREE",
|
||||
"BITMAP",
|
||||
"LABEL_LIST",
|
||||
]
|
||||
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
||||
num_indices: Optional[int] = None
|
||||
|
||||
@@ -177,6 +177,60 @@ async def test_analyze_plan(table: AsyncTable):
|
||||
assert "metrics=" in res
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def table_with_id(tmpdir_factory) -> Table:
|
||||
tmp_path = str(tmpdir_factory.mktemp("data"))
|
||||
db = lancedb.connect(tmp_path)
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2, 3, 4], type=pa.int64()),
|
||||
"text": pa.array(["a", "b", "cat", "dog"]),
|
||||
"vector": pa.array(
|
||||
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
|
||||
type=pa.list_(pa.float32(), list_size=2),
|
||||
),
|
||||
}
|
||||
)
|
||||
table = db.create_table("test_with_id", data)
|
||||
table.create_fts_index("text", with_position=False, use_tantivy=False)
|
||||
return table
|
||||
|
||||
|
||||
def test_hybrid_prefilter_explain_plan(table_with_id: Table):
|
||||
"""
|
||||
Verify that the prefilter logic is not inverted in LanceHybridQueryBuilder.
|
||||
"""
|
||||
plan_prefilter = (
|
||||
table_with_id.search(query_type="hybrid")
|
||||
.vector([0.0, 0.0])
|
||||
.text("dog")
|
||||
.where("id = 1", prefilter=True)
|
||||
.limit(2)
|
||||
.explain_plan(verbose=True)
|
||||
)
|
||||
|
||||
plan_postfilter = (
|
||||
table_with_id.search(query_type="hybrid")
|
||||
.vector([0.0, 0.0])
|
||||
.text("dog")
|
||||
.where("id = 1", prefilter=False)
|
||||
.limit(2)
|
||||
.explain_plan(verbose=True)
|
||||
)
|
||||
|
||||
# prefilter=True: filter is pushed into the LanceRead scan.
|
||||
# The FTS sub-plan exposes this as "full_filter=id = Int64(1)" inside LanceRead.
|
||||
assert "full_filter=id = Int64(1)" in plan_prefilter, (
|
||||
f"Should push the filter into the scan.\nPlan:\n{plan_prefilter}"
|
||||
)
|
||||
|
||||
# prefilter=False: filter is applied as a separate FilterExec after the search.
|
||||
# The filter must NOT be embedded in the scan.
|
||||
assert "full_filter=id = Int64(1)" not in plan_postfilter, (
|
||||
f"Should NOT push the filter into the scan.\nPlan:\n{plan_postfilter}"
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_scores():
|
||||
cases = [
|
||||
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
from datetime import timedelta
|
||||
import random
|
||||
from typing import get_args, get_type_hints
|
||||
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
@@ -22,6 +23,7 @@ from lancedb.index import (
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from lancedb.table import IndexStatistics
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
@@ -283,3 +285,23 @@ async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
|
||||
for v in range(256):
|
||||
res = await binary_table.query().nearest_to([v] * 128).to_arrow()
|
||||
assert res["id"][0].as_py() == v
|
||||
|
||||
|
||||
def test_index_statistics_index_type_lists_all_supported_values():
|
||||
expected_index_types = {
|
||||
"IVF_FLAT",
|
||||
"IVF_SQ",
|
||||
"IVF_PQ",
|
||||
"IVF_RQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"FTS",
|
||||
"BTREE",
|
||||
"BITMAP",
|
||||
"LABEL_LIST",
|
||||
}
|
||||
|
||||
assert (
|
||||
set(get_args(get_type_hints(IndexStatistics)["index_type"]))
|
||||
== expected_index_types
|
||||
)
|
||||
|
||||
@@ -147,7 +147,12 @@ class TrackingNamespace(LanceNamespace):
|
||||
This simulates a credential rotation system where each call returns
|
||||
new credentials that expire after credential_expires_in_seconds.
|
||||
"""
|
||||
modified = copy.deepcopy(storage_options) if storage_options else {}
|
||||
# Start from base storage options (endpoint, region, allow_http, etc.)
|
||||
# because DirectoryNamespace returns None for storage_options from
|
||||
# describe_table/declare_table when no credential vendor is configured.
|
||||
modified = copy.deepcopy(self.base_storage_options)
|
||||
if storage_options:
|
||||
modified.update(storage_options)
|
||||
|
||||
# Increment credentials to simulate rotation
|
||||
modified["aws_access_key_id"] = f"AKID_{count}"
|
||||
|
||||
@@ -30,6 +30,7 @@ from lancedb.query import (
|
||||
PhraseQuery,
|
||||
Query,
|
||||
FullTextSearchQuery,
|
||||
ensure_vector_query,
|
||||
)
|
||||
from lancedb.rerankers.cross_encoder import CrossEncoderReranker
|
||||
from lancedb.table import AsyncTable, LanceTable
|
||||
@@ -1501,6 +1502,18 @@ def test_search_empty_table(mem_db):
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_ensure_vector_query_empty_list():
|
||||
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
|
||||
with pytest.raises(ValueError, match="non-empty"):
|
||||
ensure_vector_query([])
|
||||
|
||||
|
||||
def test_ensure_vector_query_nested_empty_list():
|
||||
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
|
||||
with pytest.raises(ValueError, match="non-empty"):
|
||||
ensure_vector_query([[]])
|
||||
|
||||
|
||||
def test_fast_search(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
|
||||
@@ -527,6 +527,102 @@ async def test_add_async(mem_db_async: AsyncConnection):
|
||||
assert await table.count_rows() == 3
|
||||
|
||||
|
||||
def test_add_progress_callback(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
"test",
|
||||
data=[{"id": 1}, {"id": 2}],
|
||||
)
|
||||
|
||||
updates = []
|
||||
table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p)))
|
||||
|
||||
assert len(table) == 4
|
||||
# The done callback always fires, so we should always get at least one.
|
||||
assert len(updates) >= 1, "expected at least one progress callback"
|
||||
for p in updates:
|
||||
assert "output_rows" in p
|
||||
assert "output_bytes" in p
|
||||
assert "total_rows" in p
|
||||
assert "elapsed_seconds" in p
|
||||
assert "active_tasks" in p
|
||||
assert "total_tasks" in p
|
||||
assert "done" in p
|
||||
# The last callback should have done=True.
|
||||
assert updates[-1]["done"] is True
|
||||
|
||||
|
||||
def test_add_progress_tqdm_like(mem_db: DBConnection):
|
||||
"""Test that a tqdm-like object gets total set and update() called."""
|
||||
|
||||
class FakeBar:
|
||||
def __init__(self):
|
||||
self.total = None
|
||||
self.n = 0
|
||||
self.postfix = None
|
||||
|
||||
def update(self, n):
|
||||
self.n += n
|
||||
|
||||
def set_postfix_str(self, s):
|
||||
self.postfix = s
|
||||
|
||||
def refresh(self):
|
||||
pass
|
||||
|
||||
table = mem_db.create_table(
|
||||
"test",
|
||||
data=[{"id": 1}, {"id": 2}],
|
||||
)
|
||||
|
||||
bar = FakeBar()
|
||||
table.add([{"id": 3}, {"id": 4}], progress=bar)
|
||||
|
||||
assert len(table) == 4
|
||||
# Postfix should contain throughput and worker count
|
||||
if bar.postfix is not None:
|
||||
assert "MB/s" in bar.postfix
|
||||
assert "workers" in bar.postfix
|
||||
|
||||
|
||||
def test_add_progress_bool(mem_db: DBConnection):
|
||||
"""Test that progress=True creates and closes a tqdm bar automatically."""
|
||||
table = mem_db.create_table(
|
||||
"test",
|
||||
data=[{"id": 1}, {"id": 2}],
|
||||
)
|
||||
|
||||
table.add([{"id": 3}, {"id": 4}], progress=True)
|
||||
assert len(table) == 4
|
||||
|
||||
# progress=False should be the same as None
|
||||
table.add([{"id": 5}], progress=False)
|
||||
assert len(table) == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_progress_callback_async(mem_db_async: AsyncConnection):
|
||||
"""Progress callbacks work through the async path too."""
|
||||
table = await mem_db_async.create_table("test", data=[{"id": 1}, {"id": 2}])
|
||||
|
||||
updates = []
|
||||
await table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p)))
|
||||
|
||||
assert await table.count_rows() == 4
|
||||
assert len(updates) >= 1
|
||||
assert updates[-1]["done"] is True
|
||||
|
||||
|
||||
def test_add_progress_callback_error(mem_db: DBConnection):
|
||||
"""A failing callback must not prevent the write from succeeding."""
|
||||
table = mem_db.create_table("test", data=[{"id": 1}, {"id": 2}])
|
||||
|
||||
def bad_callback(p):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
table.add([{"id": 3}, {"id": 4}], progress=bad_callback)
|
||||
assert len(table) == 4
|
||||
|
||||
|
||||
def test_polars(mem_db: DBConnection):
|
||||
data = {
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
|
||||
@@ -316,6 +316,19 @@ impl<'py> IntoPyObject<'py> for PySelect {
|
||||
Select::All => Ok(py.None().into_bound(py).into_any()),
|
||||
Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
||||
Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
||||
Select::Expr(pairs) => {
|
||||
// Serialize DataFusion Expr -> SQL string so Python sees the same
|
||||
// format as Select::Dynamic: a list of (name, sql_string) tuples.
|
||||
let sql_pairs: PyResult<Vec<(String, String)>> = pairs
|
||||
.into_iter()
|
||||
.map(|(name, expr)| {
|
||||
lancedb::expr::expr_to_sql_string(&expr)
|
||||
.map(|sql| (name, sql))
|
||||
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
||||
})
|
||||
.collect();
|
||||
Ok(sql_pairs?.into_pyobject(py)?.into_any())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use lancedb::table::{
|
||||
Table as LanceDbTable,
|
||||
};
|
||||
use pyo3::{
|
||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||
@@ -299,10 +299,12 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (data, mode, progress=None))]
|
||||
pub fn add<'a>(
|
||||
self_: PyRef<'a, Self>,
|
||||
data: PyScannable,
|
||||
mode: String,
|
||||
progress: Option<Py<PyAny>>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let mut op = self_.inner_ref()?.add(data);
|
||||
if mode == "append" {
|
||||
@@ -312,6 +314,81 @@ impl Table {
|
||||
} else {
|
||||
return Err(PyValueError::new_err(format!("Invalid mode: {}", mode)));
|
||||
}
|
||||
if let Some(progress_obj) = progress {
|
||||
let is_callable = Python::attach(|py| progress_obj.bind(py).is_callable());
|
||||
if is_callable {
|
||||
// Callback: call with a dict of progress info.
|
||||
op = op.progress(move |p| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
if let Err(e) = dict
|
||||
.set_item("output_rows", p.output_rows())
|
||||
.and_then(|_| dict.set_item("output_bytes", p.output_bytes()))
|
||||
.and_then(|_| dict.set_item("total_rows", p.total_rows()))
|
||||
.and_then(|_| {
|
||||
dict.set_item("elapsed_seconds", p.elapsed().as_secs_f64())
|
||||
})
|
||||
.and_then(|_| dict.set_item("active_tasks", p.active_tasks()))
|
||||
.and_then(|_| dict.set_item("total_tasks", p.total_tasks()))
|
||||
.and_then(|_| dict.set_item("done", p.done()))
|
||||
{
|
||||
log::warn!("progress dict error: {e}");
|
||||
return;
|
||||
}
|
||||
if let Err(e) = progress_obj.call1(py, (dict,)) {
|
||||
log::warn!("progress callback error: {e}");
|
||||
}
|
||||
});
|
||||
});
|
||||
} else {
|
||||
// tqdm-like: has update() method.
|
||||
let mut last_rows: usize = 0;
|
||||
let mut total_set = false;
|
||||
op = op.progress(move |p| {
|
||||
let current = p.output_rows();
|
||||
let prev = last_rows;
|
||||
last_rows = current;
|
||||
Python::attach(|py| {
|
||||
if let Some(total) = p.total_rows()
|
||||
&& !total_set
|
||||
{
|
||||
if let Err(e) = progress_obj.setattr(py, "total", total) {
|
||||
log::warn!("progress setattr error: {e}");
|
||||
}
|
||||
total_set = true;
|
||||
}
|
||||
let delta = current.saturating_sub(prev);
|
||||
if delta > 0 {
|
||||
if let Err(e) = progress_obj.call_method1(py, "update", (delta,)) {
|
||||
log::warn!("progress update error: {e}");
|
||||
}
|
||||
// Show throughput and active workers in tqdm postfix.
|
||||
let elapsed = p.elapsed().as_secs_f64();
|
||||
if elapsed > 0.0 {
|
||||
let mb_per_sec = p.output_bytes() as f64 / elapsed / 1_000_000.0;
|
||||
let postfix = format!(
|
||||
"{:.1} MB/s | {}/{} workers",
|
||||
mb_per_sec,
|
||||
p.active_tasks(),
|
||||
p.total_tasks()
|
||||
);
|
||||
if let Err(e) =
|
||||
progress_obj.call_method1(py, "set_postfix_str", (postfix,))
|
||||
{
|
||||
log::warn!("progress set_postfix_str error: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.done() {
|
||||
// Force a final refresh so the bar shows completion.
|
||||
if let Err(e) = progress_obj.call_method0(py, "refresh") {
|
||||
log::warn!("progress refresh error: {e}");
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
future_into_py(self_.py(), async move {
|
||||
let result = op.execute().await.infer_error()?;
|
||||
@@ -426,6 +503,17 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prewarm_data(
|
||||
self_: PyRef<'_, Self>,
|
||||
columns: Option<Vec<String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.prewarm_data(columns).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.27.0-beta.4"
|
||||
version = "0.27.2-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -596,11 +596,8 @@ pub struct ConnectBuilder {
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 3] = [
|
||||
("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name"),
|
||||
("AZURE_CLIENT_ID", "azure_client_id"),
|
||||
("AZURE_TENANT_ID", "azure_tenant_id"),
|
||||
];
|
||||
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
|
||||
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
|
||||
|
||||
impl ConnectBuilder {
|
||||
/// Create a new [`ConnectOptions`] with the given database URI.
|
||||
|
||||
@@ -339,6 +339,12 @@ impl PermutationReader {
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
Select::Expr(columns) => {
|
||||
// For Expr projections, we check if any alias is _rowid.
|
||||
// We can't validate the expression itself (it may differ from _rowid)
|
||||
// but we allow it through; the column will be included.
|
||||
Ok(columns.iter().any(|(alias, _)| alias == ROW_ID))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,25 @@ pub enum Select {
|
||||
///
|
||||
/// See [`Query::select`] for more details and examples
|
||||
Dynamic(Vec<(String, String)>),
|
||||
/// Advanced selection using type-safe DataFusion expressions
|
||||
///
|
||||
/// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of
|
||||
/// raw SQL strings. Use [`crate::expr`] helpers to build expressions:
|
||||
///
|
||||
/// ```
|
||||
/// use lancedb::expr::{col, lit};
|
||||
/// use lancedb::query::Select;
|
||||
///
|
||||
/// // SELECT id, id * 2 AS id2 FROM ...
|
||||
/// let selection = Select::expr_projection(&[
|
||||
/// ("id", col("id")),
|
||||
/// ("id2", col("id") * lit(2)),
|
||||
/// ]);
|
||||
/// ```
|
||||
///
|
||||
/// Note: For remote/server-side queries the expressions are serialized to SQL strings
|
||||
/// automatically (same as [`Select::Dynamic`]).
|
||||
Expr(Vec<(String, datafusion_expr::Expr)>),
|
||||
}
|
||||
|
||||
impl Select {
|
||||
@@ -69,6 +88,29 @@ impl Select {
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
/// Create a typed-expression projection.
|
||||
///
|
||||
/// This is a convenience method for creating a [`Select::Expr`] variant from
|
||||
/// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`].
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use lancedb::expr::{col, lit};
|
||||
/// use lancedb::query::Select;
|
||||
///
|
||||
/// let selection = Select::expr_projection(&[
|
||||
/// ("id", col("id")),
|
||||
/// ("id2", col("id") * lit(2)),
|
||||
/// ]);
|
||||
/// ```
|
||||
pub fn expr_projection(columns: &[(impl AsRef<str>, datafusion_expr::Expr)]) -> Self {
|
||||
Self::Expr(
|
||||
columns
|
||||
.iter()
|
||||
.map(|(name, expr)| (name.as_ref().to_string(), expr.clone()))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for converting a type to a query vector
|
||||
@@ -1591,6 +1633,58 @@ mod tests {
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_select_with_expr_projection() {
|
||||
// Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test_expr.lance");
|
||||
let uri = dataset_path.to_str().unwrap();
|
||||
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
use crate::expr::{col, lit};
|
||||
let query = table.query().limit(10).select(Select::expr_projection(&[
|
||||
("id2", col("id") * lit(2i32)),
|
||||
("id", col("id")),
|
||||
]));
|
||||
|
||||
let schema = query.output_schema().await.unwrap();
|
||||
assert_eq!(
|
||||
schema,
|
||||
Arc::new(ArrowSchema::new(vec![
|
||||
ArrowField::new("id2", DataType::Int32, true),
|
||||
ArrowField::new("id", DataType::Int32, true),
|
||||
]))
|
||||
);
|
||||
|
||||
let result = query.execute().await;
|
||||
let mut batches = result
|
||||
.expect("should have result")
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
let batch = batches.pop().unwrap();
|
||||
|
||||
// id and id2
|
||||
assert_eq!(batch.num_columns(), 2);
|
||||
|
||||
let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
|
||||
let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
|
||||
|
||||
id.iter().zip(id2.iter()).for_each(|(id, id2)| {
|
||||
let id = id.unwrap();
|
||||
let id2 = id2.unwrap();
|
||||
assert_eq!(id * 2, id2);
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_no_vector() {
|
||||
// TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
|
||||
|
||||
@@ -426,14 +426,11 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
})?,
|
||||
);
|
||||
}
|
||||
if db_prefix.is_some() {
|
||||
if let Some(prefix) = db_prefix {
|
||||
headers.insert(
|
||||
HeaderName::from_static("x-lancedb-database-prefix"),
|
||||
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
||||
message: format!(
|
||||
"non-ascii database prefix '{}' provided",
|
||||
db_prefix.unwrap()
|
||||
),
|
||||
HeaderValue::from_str(prefix).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii database prefix '{}' provided", prefix),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
@@ -446,23 +443,13 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
})?,
|
||||
);
|
||||
}
|
||||
// Map azure storage options to x-azure-* headers.
|
||||
// The option key uses underscores (e.g. "azure_client_id") while the
|
||||
// header uses hyphens (e.g. "x-azure-client-id").
|
||||
let azure_opts: [(&str, &str); 3] = [
|
||||
("azure_storage_account_name", "x-azure-storage-account-name"),
|
||||
("azure_client_id", "x-azure-client-id"),
|
||||
("azure_tenant_id", "x-azure-tenant-id"),
|
||||
];
|
||||
for (opt_key, header_name) in azure_opts {
|
||||
if let Some(v) = options.0.get(opt_key) {
|
||||
headers.insert(
|
||||
HeaderName::from_static(header_name),
|
||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii value '{}' for option '{}'", v, opt_key),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
if let Some(v) = options.0.get("azure_storage_account_name") {
|
||||
headers.insert(
|
||||
HeaderName::from_static("x-azure-storage-account-name"),
|
||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
for (key, value) in &config.extra_headers {
|
||||
@@ -1085,34 +1072,4 @@ mod tests {
|
||||
_ => panic!("Expected Runtime error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_headers_azure_opts() {
|
||||
let mut opts = HashMap::new();
|
||||
opts.insert(
|
||||
"azure_storage_account_name".to_string(),
|
||||
"myaccount".to_string(),
|
||||
);
|
||||
opts.insert("azure_client_id".to_string(), "my-client-id".to_string());
|
||||
opts.insert("azure_tenant_id".to_string(), "my-tenant-id".to_string());
|
||||
let remote_opts = RemoteOptions::new(opts);
|
||||
|
||||
let headers = RestfulLanceDbClient::<Sender>::default_headers(
|
||||
"test-key",
|
||||
"us-east-1",
|
||||
"testdb",
|
||||
false,
|
||||
&remote_opts,
|
||||
None,
|
||||
&ClientConfig::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
headers.get("x-azure-storage-account-name").unwrap(),
|
||||
"myaccount"
|
||||
);
|
||||
assert_eq!(headers.get("x-azure-client-id").unwrap(), "my-client-id");
|
||||
assert_eq!(headers.get("x-azure-tenant-id").unwrap(), "my-tenant-id");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,10 @@ impl ServerVersion {
|
||||
pub fn support_structural_fts(&self) -> bool {
|
||||
self.0 >= semver::Version::new(0, 3, 0)
|
||||
}
|
||||
|
||||
pub fn support_multipart_write(&self) -> bool {
|
||||
self.0 >= semver::Version::new(0, 4, 0)
|
||||
}
|
||||
}
|
||||
|
||||
pub const OPT_REMOTE_PREFIX: &str = "remote_database_";
|
||||
@@ -778,12 +782,7 @@ impl RemoteOptions {
|
||||
|
||||
impl From<StorageOptions> for RemoteOptions {
|
||||
fn from(options: StorageOptions) -> Self {
|
||||
let supported_opts = vec![
|
||||
"account_name",
|
||||
"azure_storage_account_name",
|
||||
"azure_client_id",
|
||||
"azure_tenant_id",
|
||||
];
|
||||
let supported_opts = vec!["account_name", "azure_storage_account_name"];
|
||||
let mut filtered = HashMap::new();
|
||||
for opt in supported_opts {
|
||||
if let Some(v) = options.0.get(opt) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,10 +11,14 @@ use arrow_ipc::CompressionType;
|
||||
use datafusion_common::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_physical_expr::EquivalenceProperties;
|
||||
use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
|
||||
use datafusion_physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use lance::io::exec::utils::InstrumentedRecordBatchStreamAdapter;
|
||||
|
||||
use crate::Error;
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
@@ -22,13 +26,16 @@ use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender};
|
||||
use crate::remote::table::RemoteTable;
|
||||
use crate::table::AddResult;
|
||||
use crate::table::datafusion::insert::COUNT_SCHEMA;
|
||||
use crate::table::write_progress::WriteProgressTracker;
|
||||
|
||||
/// ExecutionPlan for inserting data into a remote LanceDB table.
|
||||
///
|
||||
/// This plan:
|
||||
/// 1. Requires single partition (no parallel remote inserts yet)
|
||||
/// 2. Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint
|
||||
/// 3. Stores AddResult for retrieval after execution
|
||||
/// Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint.
|
||||
///
|
||||
/// When `upload_id` is set, inserts are staged as part of a multipart write
|
||||
/// session and the plan supports multiple partitions for parallel uploads.
|
||||
/// Without `upload_id`, the plan requires a single partition and commits
|
||||
/// immediately.
|
||||
#[derive(Debug)]
|
||||
pub struct RemoteInsertExec<S: HttpSend = Sender> {
|
||||
table_name: String,
|
||||
@@ -38,21 +45,69 @@ pub struct RemoteInsertExec<S: HttpSend = Sender> {
|
||||
overwrite: bool,
|
||||
properties: PlanProperties,
|
||||
add_result: Arc<Mutex<Option<AddResult>>>,
|
||||
metrics: ExecutionPlanMetricsSet,
|
||||
upload_id: Option<String>,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
}
|
||||
|
||||
impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
/// Create a new RemoteInsertExec.
|
||||
/// Create a new single-partition RemoteInsertExec.
|
||||
pub fn new(
|
||||
table_name: String,
|
||||
identifier: String,
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
) -> Self {
|
||||
Self::new_inner(
|
||||
table_name, identifier, client, input, overwrite, None, tracker,
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a multi-partition RemoteInsertExec for use with multipart writes.
|
||||
///
|
||||
/// Each partition's insert is staged under the given `upload_id` without
|
||||
/// committing. The caller is responsible for calling the complete (or abort)
|
||||
/// endpoint after all partitions finish.
|
||||
pub fn new_multipart(
|
||||
table_name: String,
|
||||
identifier: String,
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
upload_id: String,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
) -> Self {
|
||||
Self::new_inner(
|
||||
table_name,
|
||||
identifier,
|
||||
client,
|
||||
input,
|
||||
overwrite,
|
||||
Some(upload_id),
|
||||
tracker,
|
||||
)
|
||||
}
|
||||
|
||||
fn new_inner(
|
||||
table_name: String,
|
||||
identifier: String,
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
upload_id: Option<String>,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
) -> Self {
|
||||
let num_partitions = if upload_id.is_some() {
|
||||
input.output_partitioning().partition_count()
|
||||
} else {
|
||||
1
|
||||
};
|
||||
let schema = COUNT_SCHEMA.clone();
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(schema),
|
||||
datafusion_physical_plan::Partitioning::UnknownPartitioning(1),
|
||||
datafusion_physical_plan::Partitioning::UnknownPartitioning(num_partitions),
|
||||
datafusion_physical_plan::execution_plan::EmissionType::Final,
|
||||
datafusion_physical_plan::execution_plan::Boundedness::Bounded,
|
||||
);
|
||||
@@ -65,6 +120,9 @@ impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
overwrite,
|
||||
properties,
|
||||
add_result: Arc::new(Mutex::new(None)),
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
upload_id,
|
||||
tracker,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +141,7 @@ impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
fn stream_as_http_body(
|
||||
data: SendableRecordBatchStream,
|
||||
error_tx: tokio::sync::oneshot::Sender<DataFusionError>,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
) -> DataFusionResult<reqwest::Body> {
|
||||
let options = arrow_ipc::writer::IpcWriteOptions::default()
|
||||
.try_with_compression(Some(CompressionType::LZ4_FRAME))?;
|
||||
@@ -94,37 +153,46 @@ impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
|
||||
let stream = futures::stream::try_unfold(
|
||||
(data, writer, Some(error_tx), false),
|
||||
move |(mut data, mut writer, error_tx, finished)| async move {
|
||||
if finished {
|
||||
return Ok(None);
|
||||
}
|
||||
match data.next().await {
|
||||
Some(Ok(batch)) => {
|
||||
writer
|
||||
.write(&batch)
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
Ok(Some((buffer, (data, writer, error_tx, false))))
|
||||
move |(mut data, mut writer, error_tx, finished)| {
|
||||
let tracker = tracker.clone();
|
||||
async move {
|
||||
if finished {
|
||||
return Ok(None);
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
// Send the original error through the channel before
|
||||
// returning a generic error to reqwest.
|
||||
if let Some(tx) = error_tx {
|
||||
let _ = tx.send(e);
|
||||
match data.next().await {
|
||||
Some(Ok(batch)) => {
|
||||
writer
|
||||
.write(&batch)
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
if let Some(ref t) = tracker {
|
||||
t.record_bytes(buffer.len());
|
||||
}
|
||||
Ok(Some((buffer, (data, writer, error_tx, false))))
|
||||
}
|
||||
Err(std::io::Error::other(
|
||||
"input stream error (see error channel)",
|
||||
))
|
||||
}
|
||||
None => {
|
||||
writer
|
||||
.finish()
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
if buffer.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some((buffer, (data, writer, None, true))))
|
||||
Some(Err(e)) => {
|
||||
// Send the original error through the channel before
|
||||
// returning a generic error to reqwest.
|
||||
if let Some(tx) = error_tx {
|
||||
let _ = tx.send(e);
|
||||
}
|
||||
Err(std::io::Error::other(
|
||||
"input stream error (see error channel)",
|
||||
))
|
||||
}
|
||||
None => {
|
||||
writer
|
||||
.finish()
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
if buffer.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
if let Some(ref t) = tracker {
|
||||
t.record_bytes(buffer.len());
|
||||
}
|
||||
Ok(Some((buffer, (data, writer, None, true))))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,8 +242,11 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<datafusion_physical_plan::Distribution> {
|
||||
// Until we have a separate commit endpoint, we need to do all inserts in a single partition
|
||||
vec![datafusion_physical_plan::Distribution::SinglePartition]
|
||||
if self.upload_id.is_some() {
|
||||
vec![datafusion_physical_plan::Distribution::UnspecifiedDistribution]
|
||||
} else {
|
||||
vec![datafusion_physical_plan::Distribution::SinglePartition]
|
||||
}
|
||||
}
|
||||
|
||||
fn benefits_from_input_partitioning(&self) -> Vec<bool> {
|
||||
@@ -191,12 +262,14 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
"RemoteInsertExec requires exactly one child".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(Arc::new(Self::new(
|
||||
Ok(Arc::new(Self::new_inner(
|
||||
self.table_name.clone(),
|
||||
self.identifier.clone(),
|
||||
self.client.clone(),
|
||||
children[0].clone(),
|
||||
self.overwrite,
|
||||
self.upload_id.clone(),
|
||||
self.tracker.clone(),
|
||||
)))
|
||||
}
|
||||
|
||||
@@ -205,18 +278,29 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> DataFusionResult<SendableRecordBatchStream> {
|
||||
if partition != 0 {
|
||||
if self.upload_id.is_none() && partition != 0 {
|
||||
return Err(DataFusionError::Internal(
|
||||
"RemoteInsertExec only supports single partition execution".to_string(),
|
||||
"RemoteInsertExec only supports single partition execution without upload_id"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let input_stream = self.input.execute(0, context)?;
|
||||
let input_stream = self.input.execute(partition, context)?;
|
||||
let input_schema = input_stream.schema();
|
||||
let input_stream: SendableRecordBatchStream =
|
||||
Box::pin(InstrumentedRecordBatchStreamAdapter::new(
|
||||
input_schema,
|
||||
input_stream,
|
||||
partition,
|
||||
&self.metrics,
|
||||
));
|
||||
let client = self.client.clone();
|
||||
let identifier = self.identifier.clone();
|
||||
let overwrite = self.overwrite;
|
||||
let add_result = self.add_result.clone();
|
||||
let table_name = self.table_name.clone();
|
||||
let upload_id = self.upload_id.clone();
|
||||
let tracker = self.tracker.clone();
|
||||
|
||||
let stream = futures::stream::once(async move {
|
||||
let mut request = client
|
||||
@@ -226,9 +310,12 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
if overwrite {
|
||||
request = request.query(&[("mode", "overwrite")]);
|
||||
}
|
||||
if let Some(ref uid) = upload_id {
|
||||
request = request.query(&[("upload_id", uid.as_str())]);
|
||||
}
|
||||
|
||||
let (error_tx, mut error_rx) = tokio::sync::oneshot::channel();
|
||||
let body = Self::stream_as_http_body(input_stream, error_tx)?;
|
||||
let body = Self::stream_as_http_body(input_stream, error_tx, tracker)?;
|
||||
let request = request.body(body);
|
||||
|
||||
let result: DataFusionResult<(String, _)> = async {
|
||||
@@ -262,32 +349,43 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
|
||||
let (request_id, response) = result?;
|
||||
|
||||
let body_text = response.text().await.map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: Box::new(e),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?;
|
||||
|
||||
let parsed_result = if body_text.trim().is_empty() {
|
||||
// Backward compatible with old servers
|
||||
AddResult { version: 0 }
|
||||
} else {
|
||||
serde_json::from_str(&body_text).map_err(|e| {
|
||||
// For multipart writes, the staging response is not the final
|
||||
// version. Only parse AddResult for non-multipart inserts.
|
||||
if upload_id.is_none() {
|
||||
let body_text = response.text().await.map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: format!("Failed to parse add response: {}", e).into(),
|
||||
source: Box::new(e),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?
|
||||
};
|
||||
})?;
|
||||
|
||||
let parsed_result = if body_text.trim().is_empty() {
|
||||
// Backward compatible with old servers
|
||||
AddResult { version: 0 }
|
||||
} else {
|
||||
serde_json::from_str(&body_text).map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: format!("Failed to parse add response: {}", e).into(),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?
|
||||
};
|
||||
|
||||
{
|
||||
let mut res_lock = add_result.lock().map_err(|_| {
|
||||
DataFusionError::Execution("Failed to acquire lock for add_result".to_string())
|
||||
})?;
|
||||
*res_lock = Some(parsed_result);
|
||||
} else {
|
||||
// We don't use the body in this case, but we should still consume it.
|
||||
let _ = response.bytes().await.map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: Box::new(e),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?;
|
||||
}
|
||||
|
||||
// Return a single batch with count 0 (actual count is tracked in add_result)
|
||||
@@ -301,6 +399,10 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
stream,
|
||||
)))
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -74,7 +74,10 @@ pub mod optimize;
|
||||
pub mod query;
|
||||
pub mod schema_evolution;
|
||||
pub mod update;
|
||||
pub mod write_progress;
|
||||
use crate::index::waiter::wait_for_index;
|
||||
#[cfg(feature = "remote")]
|
||||
pub(crate) use add_data::PreprocessingOutput;
|
||||
pub use add_data::{AddDataBuilder, AddDataMode, AddResult, NaNVectorBehavior};
|
||||
pub use chrono::Duration;
|
||||
pub use delete::DeleteResult;
|
||||
@@ -277,8 +280,13 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
|
||||
/// Drop an index from the table.
|
||||
async fn drop_index(&self, name: &str) -> Result<()>;
|
||||
/// Prewarm an index in the table
|
||||
/// Prewarm an index in the table.
|
||||
async fn prewarm_index(&self, name: &str) -> Result<()>;
|
||||
/// Prewarm data for the table.
|
||||
///
|
||||
/// Currently only supported on remote tables.
|
||||
/// If `columns` is `None`, all columns are prewarmed.
|
||||
async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()>;
|
||||
/// Get statistics about the index.
|
||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
|
||||
/// Merge insert new records into the table.
|
||||
@@ -435,6 +443,34 @@ mod test_utils {
|
||||
embedding_registry: Arc::new(MemoryRegistry::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_handler_version_and_config<T>(
|
||||
name: impl Into<String>,
|
||||
version: semver::Version,
|
||||
handler: impl Fn(reqwest::Request) -> http::Response<T> + Clone + Send + Sync + 'static,
|
||||
config: crate::remote::ClientConfig,
|
||||
) -> Self
|
||||
where
|
||||
T: Into<reqwest::Body>,
|
||||
{
|
||||
let inner = Arc::new(
|
||||
crate::remote::table::RemoteTable::new_mock_with_version_and_config(
|
||||
name.into(),
|
||||
handler.clone(),
|
||||
Some(version),
|
||||
config.clone(),
|
||||
),
|
||||
);
|
||||
let database = Arc::new(crate::remote::db::RemoteDatabase::new_mock_with_config(
|
||||
handler, config,
|
||||
));
|
||||
Self {
|
||||
inner,
|
||||
database: Some(database),
|
||||
// Registry is unused.
|
||||
embedding_registry: Arc::new(MemoryRegistry::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -946,17 +982,7 @@ impl Table {
|
||||
/// * Prune: Removes old versions of the dataset
|
||||
/// * Index: Optimizes the indices, adding new data to existing indices
|
||||
///
|
||||
/// <section class="warning">Experimental API</section>
|
||||
///
|
||||
/// The optimization process is undergoing active development and may change.
|
||||
/// Our goal with these changes is to improve the performance of optimization and
|
||||
/// reduce the complexity.
|
||||
///
|
||||
/// That being said, it is essential today to run optimize if you want the best
|
||||
/// performance. It should be stable and safe to use in production, but it our
|
||||
/// hope that the API may be simplified (or not even need to be called) in the future.
|
||||
///
|
||||
/// The frequency an application shoudl call optimize is based on the frequency of
|
||||
/// The frequency an application should call optimize is based on the frequency of
|
||||
/// data modifications. If data is frequently added, deleted, or updated then
|
||||
/// optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
/// you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -1123,22 +1149,45 @@ impl Table {
|
||||
self.inner.drop_index(name).await
|
||||
}
|
||||
|
||||
/// Prewarm an index in the table
|
||||
/// Prewarm an index in the table.
|
||||
///
|
||||
/// This is a hint to fully load the index into memory. It can be used to
|
||||
/// avoid cold starts
|
||||
/// This is a hint to the database that the index will be accessed in the
|
||||
/// future and should be loaded into memory if possible. This can reduce
|
||||
/// cold-start latency for subsequent queries.
|
||||
///
|
||||
/// This call initiates prewarming and returns once the request is accepted.
|
||||
/// It is idempotent and safe to call from multiple clients concurrently.
|
||||
///
|
||||
/// It is generally wasteful to call this if the index does not fit into the
|
||||
/// available cache.
|
||||
///
|
||||
/// Note: This function is not yet supported on all indices, in which case it
|
||||
/// may do nothing.
|
||||
/// available cache. Not all index types support prewarming; unsupported
|
||||
/// indices will silently ignore the request.
|
||||
///
|
||||
/// Use [`Self::list_indices()`] to find the names of the indices.
|
||||
pub async fn prewarm_index(&self, name: &str) -> Result<()> {
|
||||
self.inner.prewarm_index(name).await
|
||||
}
|
||||
|
||||
/// Prewarm data for the table.
|
||||
///
|
||||
/// This is a hint to the database that the given columns will be accessed in
|
||||
/// the future and the database should prefetch the data if possible. This
|
||||
/// can reduce cold-start latency for subsequent queries. Currently only
|
||||
/// supported on remote tables.
|
||||
///
|
||||
/// This call initiates prewarming and returns once the request is accepted.
|
||||
/// It is idempotent and safe to call from multiple clients concurrently —
|
||||
/// calling it on already-prewarmed columns is a no-op on the server.
|
||||
///
|
||||
/// This operation has a large upfront cost but can speed up future queries
|
||||
/// that need to fetch the given columns. Large columns such as embeddings
|
||||
/// or binary data may not be practical to prewarm. This feature is intended
|
||||
/// for workloads that issue many queries against the same columns.
|
||||
///
|
||||
/// If `columns` is `None`, all columns are prewarmed.
|
||||
pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
|
||||
self.inner.prewarm_data(columns).await
|
||||
}
|
||||
|
||||
/// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
|
||||
/// are not fully indexed within the timeout.
|
||||
pub async fn wait_for_index(
|
||||
@@ -2180,21 +2229,26 @@ impl BaseTable for NativeTable {
|
||||
|
||||
let table_schema = Schema::from(&ds.schema().clone());
|
||||
|
||||
// Peek at the first batch to estimate a good partition count for
|
||||
// write parallelism.
|
||||
let mut peeked = PeekedScannable::new(add.data);
|
||||
let num_partitions = if let Some(first_batch) = peeked.peek().await {
|
||||
let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus();
|
||||
estimate_write_partitions(
|
||||
first_batch.get_array_memory_size(),
|
||||
first_batch.num_rows(),
|
||||
peeked.num_rows(),
|
||||
max_partitions,
|
||||
)
|
||||
let num_partitions = if let Some(parallelism) = add.write_parallelism {
|
||||
parallelism
|
||||
} else {
|
||||
1
|
||||
// Peek at the first batch to estimate a good partition count for
|
||||
// write parallelism.
|
||||
let mut peeked = PeekedScannable::new(add.data);
|
||||
let n = if let Some(first_batch) = peeked.peek().await {
|
||||
let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus();
|
||||
estimate_write_partitions(
|
||||
first_batch.get_array_memory_size(),
|
||||
first_batch.num_rows(),
|
||||
peeked.num_rows(),
|
||||
max_partitions,
|
||||
)
|
||||
} else {
|
||||
1
|
||||
};
|
||||
add.data = Box::new(peeked);
|
||||
n
|
||||
};
|
||||
add.data = Box::new(peeked);
|
||||
|
||||
let output = add.into_plan(&table_schema, &table_def)?;
|
||||
|
||||
@@ -2223,13 +2277,21 @@ impl BaseTable for NativeTable {
|
||||
|
||||
let insert_exec = Arc::new(InsertExec::new(ds_wrapper.clone(), ds, plan, lance_params));
|
||||
|
||||
let tracker_for_tasks = output.tracker.clone();
|
||||
if let Some(ref t) = tracker_for_tasks {
|
||||
t.set_total_tasks(num_partitions);
|
||||
}
|
||||
let _finish = write_progress::FinishOnDrop(output.tracker);
|
||||
|
||||
// Execute all partitions in parallel.
|
||||
let task_ctx = Arc::new(TaskContext::default());
|
||||
let handles = FuturesUnordered::new();
|
||||
for partition in 0..num_partitions {
|
||||
let exec = insert_exec.clone();
|
||||
let ctx = task_ctx.clone();
|
||||
let tracker = tracker_for_tasks.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _guard = tracker.as_ref().map(|t| t.track_task());
|
||||
let mut stream = exec
|
||||
.execute(partition, ctx)
|
||||
.map_err(|e| -> Error { e.into() })?;
|
||||
@@ -2290,6 +2352,12 @@ impl BaseTable for NativeTable {
|
||||
Ok(dataset.prewarm_index(index_name).await?)
|
||||
}
|
||||
|
||||
async fn prewarm_data(&self, _columns: Option<Vec<String>>) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "prewarm_data is currently only supported on remote tables.".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
|
||||
// Delegate to the submodule implementation
|
||||
update::execute_update(self, update).await
|
||||
|
||||
@@ -13,6 +13,9 @@ use crate::embeddings::EmbeddingRegistry;
|
||||
use crate::table::datafusion::cast::cast_to_table_schema;
|
||||
use crate::table::datafusion::reject_nan::reject_nan_vectors;
|
||||
use crate::table::datafusion::scannable_exec::ScannableExec;
|
||||
use crate::table::write_progress::ProgressCallback;
|
||||
use crate::table::write_progress::WriteProgress;
|
||||
use crate::table::write_progress::WriteProgressTracker;
|
||||
use crate::{Error, Result};
|
||||
|
||||
use super::{BaseTable, TableDefinition, WriteOptions};
|
||||
@@ -52,6 +55,8 @@ pub struct AddDataBuilder {
|
||||
pub(crate) write_options: WriteOptions,
|
||||
pub(crate) on_nan_vectors: NaNVectorBehavior,
|
||||
pub(crate) embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
pub(crate) progress_callback: Option<ProgressCallback>,
|
||||
pub(crate) write_parallelism: Option<usize>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for AddDataBuilder {
|
||||
@@ -77,6 +82,8 @@ impl AddDataBuilder {
|
||||
write_options: WriteOptions::default(),
|
||||
on_nan_vectors: NaNVectorBehavior::default(),
|
||||
embedding_registry,
|
||||
progress_callback: None,
|
||||
write_parallelism: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,7 +108,43 @@ impl AddDataBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a callback to receive progress updates during the add operation.
|
||||
///
|
||||
/// The callback is invoked once per batch written, and once more with
|
||||
/// [`WriteProgress::done`] set to `true` when the write completes.
|
||||
///
|
||||
/// ```
|
||||
/// # use lancedb::Table;
|
||||
/// # async fn example(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let batch = arrow_array::record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||
/// table.add(batch)
|
||||
/// .progress(|p| println!("{}/{:?} rows", p.output_rows(), p.total_rows()))
|
||||
/// .execute()
|
||||
/// .await?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn progress(mut self, callback: impl FnMut(&WriteProgress) + Send + 'static) -> Self {
|
||||
self.progress_callback = Some(Arc::new(std::sync::Mutex::new(callback)));
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the number of parallel write streams.
|
||||
///
|
||||
/// By default, the number of streams is estimated from the data size.
|
||||
/// Setting this to `1` disables parallel writes.
|
||||
pub fn write_parallelism(mut self, parallelism: usize) -> Self {
|
||||
self.write_parallelism = Some(parallelism);
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn execute(self) -> Result<AddResult> {
|
||||
if self.write_parallelism.map(|p| p == 0).unwrap_or(false) {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "write_parallelism must be greater than 0".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
self.parent.clone().add(self).await
|
||||
}
|
||||
|
||||
@@ -130,8 +173,11 @@ impl AddDataBuilder {
|
||||
scannable_with_embeddings(self.data, table_def, self.embedding_registry.as_ref())?;
|
||||
|
||||
let rescannable = self.data.rescannable();
|
||||
let tracker = self
|
||||
.progress_callback
|
||||
.map(|cb| Arc::new(WriteProgressTracker::new(cb, self.data.num_rows())));
|
||||
let plan: Arc<dyn datafusion_physical_plan::ExecutionPlan> =
|
||||
Arc::new(ScannableExec::new(self.data));
|
||||
Arc::new(ScannableExec::new(self.data, tracker.clone()));
|
||||
// Skip casting when overwriting — the input schema replaces the table schema.
|
||||
let plan = if overwrite {
|
||||
plan
|
||||
@@ -149,6 +195,7 @@ impl AddDataBuilder {
|
||||
rescannable,
|
||||
write_options: self.write_options,
|
||||
mode: self.mode,
|
||||
tracker,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -161,6 +208,7 @@ pub struct PreprocessingOutput {
|
||||
pub rescannable: bool,
|
||||
pub write_options: WriteOptions,
|
||||
pub mode: AddDataMode,
|
||||
pub tracker: Option<Arc<WriteProgressTracker>>,
|
||||
}
|
||||
|
||||
/// Check that the input schema is valid for insert.
|
||||
|
||||
@@ -12,13 +12,16 @@ use datafusion_common::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
|
||||
use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
|
||||
use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use datafusion_physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use lance::Dataset;
|
||||
use lance::dataset::transaction::{Operation, Transaction};
|
||||
use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams};
|
||||
use lance::io::exec::utils::InstrumentedRecordBatchStreamAdapter;
|
||||
use lance_table::format::Fragment;
|
||||
|
||||
use crate::table::dataset::DatasetConsistencyWrapper;
|
||||
@@ -80,6 +83,7 @@ pub struct InsertExec {
|
||||
write_params: WriteParams,
|
||||
properties: PlanProperties,
|
||||
partial_transactions: Arc<Mutex<Vec<Transaction>>>,
|
||||
metrics: ExecutionPlanMetricsSet,
|
||||
}
|
||||
|
||||
impl InsertExec {
|
||||
@@ -105,6 +109,7 @@ impl InsertExec {
|
||||
write_params,
|
||||
properties,
|
||||
partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))),
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -176,6 +181,19 @@ impl ExecutionPlan for InsertExec {
|
||||
let total_partitions = self.input.output_partitioning().partition_count();
|
||||
let ds_wrapper = self.ds_wrapper.clone();
|
||||
|
||||
let output_bytes = MetricBuilder::new(&self.metrics).output_bytes(partition);
|
||||
let input_schema = input_stream.schema();
|
||||
let input_stream: SendableRecordBatchStream =
|
||||
Box::pin(InstrumentedRecordBatchStreamAdapter::new(
|
||||
input_schema,
|
||||
input_stream.map_ok(move |batch| {
|
||||
output_bytes.add(batch.get_array_memory_size());
|
||||
batch
|
||||
}),
|
||||
partition,
|
||||
&self.metrics,
|
||||
));
|
||||
|
||||
let stream = futures::stream::once(async move {
|
||||
let transaction = InsertBuilder::new(dataset.clone())
|
||||
.with_params(&write_params)
|
||||
@@ -215,6 +233,10 @@ impl ExecutionPlan for InsertExec {
|
||||
stream,
|
||||
)))
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -7,17 +7,21 @@ use std::sync::{Arc, Mutex};
|
||||
use datafusion_common::{DataFusionError, Result as DFResult, Statistics, stats::Precision};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use datafusion_physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, execution_plan::EmissionType,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use crate::table::write_progress::WriteProgressTracker;
|
||||
use crate::{arrow::SendableRecordBatchStreamExt, data::scannable::Scannable};
|
||||
|
||||
pub struct ScannableExec {
|
||||
// We don't require Scannable to by Sync, so we wrap it in a Mutex to allow safe concurrent access.
|
||||
pub(crate) struct ScannableExec {
|
||||
// We don't require Scannable to be Sync, so we wrap it in a Mutex to allow safe concurrent access.
|
||||
source: Mutex<Box<dyn Scannable>>,
|
||||
num_rows: Option<usize>,
|
||||
properties: PlanProperties,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ScannableExec {
|
||||
@@ -30,7 +34,7 @@ impl std::fmt::Debug for ScannableExec {
|
||||
}
|
||||
|
||||
impl ScannableExec {
|
||||
pub fn new(source: Box<dyn Scannable>) -> Self {
|
||||
pub fn new(source: Box<dyn Scannable>, tracker: Option<Arc<WriteProgressTracker>>) -> Self {
|
||||
let schema = source.schema();
|
||||
let eq_properties = EquivalenceProperties::new(schema);
|
||||
let properties = PlanProperties::new(
|
||||
@@ -46,6 +50,7 @@ impl ScannableExec {
|
||||
source,
|
||||
num_rows,
|
||||
properties,
|
||||
tracker,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,7 +107,18 @@ impl ExecutionPlan for ScannableExec {
|
||||
Err(poison) => poison.into_inner().scan_as_stream(),
|
||||
};
|
||||
|
||||
Ok(stream.into_df_stream())
|
||||
let tracker = self.tracker.clone();
|
||||
let stream = stream.into_df_stream().map_ok(move |batch| {
|
||||
if let Some(ref t) = tracker {
|
||||
t.record_batch(batch.num_rows(), batch.get_array_memory_size());
|
||||
}
|
||||
batch
|
||||
});
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamAdapter::new(
|
||||
self.schema(),
|
||||
stream,
|
||||
)))
|
||||
}
|
||||
|
||||
fn partition_statistics(&self, _partition: Option<usize>) -> DFResult<Statistics> {
|
||||
|
||||
@@ -64,6 +64,9 @@ pub enum OptimizeAction {
|
||||
older_than: Option<Duration>,
|
||||
/// Because they may be part of an in-progress transaction, files newer than 7 days old are not deleted by default.
|
||||
/// If you are sure that there are no in-progress transactions, then you can set this to True to delete all files older than `older_than`.
|
||||
///
|
||||
/// **WARNING**: This should only be set to true if you can guarantee that no other process is
|
||||
/// currently working on this dataset. Otherwise the dataset could be put into a corrupted state.
|
||||
delete_unverified: Option<bool>,
|
||||
/// If true, an error will be returned if there are any old versions that are still tagged.
|
||||
error_if_tagged_old_versions: Option<bool>,
|
||||
@@ -117,6 +120,10 @@ pub(crate) async fn optimize_indices(table: &NativeTable, options: &OptimizeOpti
|
||||
/// If you are sure that there are no in-progress transactions, then you
|
||||
/// can set this to True to delete all files older than `older_than`.
|
||||
///
|
||||
/// **WARNING**: This should only be set to true if you can guarantee that
|
||||
/// no other process is currently working on this dataset. Otherwise the
|
||||
/// dataset could be put into a corrupted state.
|
||||
///
|
||||
/// This calls into [lance::dataset::Dataset::cleanup_old_versions] and
|
||||
/// returns the result.
|
||||
pub(crate) async fn cleanup_old_versions(
|
||||
|
||||
@@ -186,6 +186,13 @@ pub async fn create_plan(
|
||||
Select::Dynamic(ref select_with_transform) => {
|
||||
scanner.project_with_transform(select_with_transform.as_slice())?;
|
||||
}
|
||||
Select::Expr(ref expr_pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = expr_pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
scanner.project_with_transform(sql_pairs?.as_slice())?;
|
||||
}
|
||||
Select::All => {}
|
||||
}
|
||||
|
||||
@@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
Select::Expr(pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
let sql_pairs = sql_pairs?;
|
||||
Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: None,
|
||||
column_aliases: Some(sql_pairs.into_iter().collect()),
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
// Check for unsupported features
|
||||
@@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
Select::Expr(pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
let sql_pairs = sql_pairs?;
|
||||
Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: None,
|
||||
column_aliases: Some(sql_pairs.into_iter().collect()),
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
// Handle full text search if present
|
||||
|
||||
379
rust/lancedb/src/table/write_progress.rs
Normal file
379
rust/lancedb/src/table/write_progress.rs
Normal file
@@ -0,0 +1,379 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
//! Progress monitoring for write operations.
|
||||
//!
|
||||
//! You can add a callback to process progress in [`crate::table::AddDataBuilder::progress`].
|
||||
//! [`WriteProgress`] is the struct passed to the callback.
|
||||
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Progress snapshot for a write operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WriteProgress {
|
||||
// These are private and only accessible via getters, to make it easy to add
|
||||
// new fields without breaking existing callbacks.
|
||||
elapsed: Duration,
|
||||
output_rows: usize,
|
||||
output_bytes: usize,
|
||||
total_rows: Option<usize>,
|
||||
active_tasks: usize,
|
||||
total_tasks: usize,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl WriteProgress {
|
||||
/// Wall-clock time since monitoring started.
|
||||
pub fn elapsed(&self) -> Duration {
|
||||
self.elapsed
|
||||
}
|
||||
|
||||
/// Number of rows written so far.
|
||||
pub fn output_rows(&self) -> usize {
|
||||
self.output_rows
|
||||
}
|
||||
|
||||
/// Number of bytes written so far.
|
||||
pub fn output_bytes(&self) -> usize {
|
||||
self.output_bytes
|
||||
}
|
||||
|
||||
/// Total rows expected.
|
||||
///
|
||||
/// Populated when the input source reports a row count (e.g. a
|
||||
/// [`arrow_array::RecordBatch`]). Always `Some` when [`WriteProgress::done`]
|
||||
/// is `true` — falling back to the actual number of rows written.
|
||||
pub fn total_rows(&self) -> Option<usize> {
|
||||
self.total_rows
|
||||
}
|
||||
|
||||
/// Number of parallel write tasks currently in flight.
|
||||
pub fn active_tasks(&self) -> usize {
|
||||
self.active_tasks
|
||||
}
|
||||
|
||||
/// Total number of parallel write tasks (i.e. the write parallelism).
|
||||
pub fn total_tasks(&self) -> usize {
|
||||
self.total_tasks
|
||||
}
|
||||
|
||||
/// Whether the write operation has completed.
|
||||
///
|
||||
/// The final callback always has `done = true`. Callers can use this to
|
||||
/// finalize progress bars or perform cleanup.
|
||||
pub fn done(&self) -> bool {
|
||||
self.done
|
||||
}
|
||||
}
|
||||
|
||||
/// Callback type for progress updates.
|
||||
///
|
||||
/// Callbacks are serialized by the tracker and are never invoked reentrantly,
|
||||
/// so `FnMut` is safe to use here.
|
||||
pub type ProgressCallback = Arc<Mutex<dyn FnMut(&WriteProgress) + Send>>;
|
||||
|
||||
/// Tracks progress of a write operation and invokes a [`ProgressCallback`].
|
||||
///
|
||||
/// Call [`WriteProgressTracker::record_batch`] for each batch written.
|
||||
/// Call [`WriteProgressTracker::finish`] once after all data is written.
|
||||
///
|
||||
/// The callback is never invoked reentrantly: all state updates and callback
|
||||
/// invocations are serialized behind a single lock.
|
||||
impl std::fmt::Debug for WriteProgressTracker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("WriteProgressTracker")
|
||||
.field("total_rows", &self.total_rows)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct WriteProgressTracker {
|
||||
rows_and_bytes: std::sync::Mutex<(usize, usize)>,
|
||||
/// Wire bytes tracked separately by the insert layer. When set (> 0),
|
||||
/// this takes precedence over the in-memory bytes from `rows_and_bytes`.
|
||||
wire_bytes: AtomicUsize,
|
||||
active_tasks: Arc<AtomicUsize>,
|
||||
total_tasks: AtomicUsize,
|
||||
start: Instant,
|
||||
/// Known total rows from the input source, if available.
|
||||
total_rows: Option<usize>,
|
||||
callback: ProgressCallback,
|
||||
}
|
||||
|
||||
impl WriteProgressTracker {
|
||||
pub fn new(callback: ProgressCallback, total_rows: Option<usize>) -> Self {
|
||||
Self {
|
||||
rows_and_bytes: std::sync::Mutex::new((0, 0)),
|
||||
wire_bytes: AtomicUsize::new(0),
|
||||
active_tasks: Arc::new(AtomicUsize::new(0)),
|
||||
total_tasks: AtomicUsize::new(1),
|
||||
start: Instant::now(),
|
||||
total_rows,
|
||||
callback,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the total number of parallel write tasks (the write parallelism).
|
||||
pub fn set_total_tasks(&self, n: usize) {
|
||||
self.total_tasks.store(n, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Increment the active task count. Returns a guard that decrements on drop.
|
||||
pub fn track_task(&self) -> ActiveTaskGuard {
|
||||
self.active_tasks.fetch_add(1, Ordering::Relaxed);
|
||||
ActiveTaskGuard(self.active_tasks.clone())
|
||||
}
|
||||
|
||||
/// Record a batch of rows passing through the scan node.
|
||||
pub fn record_batch(&self, rows: usize, bytes: usize) {
|
||||
// Lock order: callback first, then rows_and_bytes. This is the only
|
||||
// order used anywhere, so deadlocks cannot occur.
|
||||
let mut cb = self.callback.lock().unwrap();
|
||||
let mut guard = self.rows_and_bytes.lock().unwrap();
|
||||
guard.0 += rows;
|
||||
guard.1 += bytes;
|
||||
let progress = self.snapshot(guard.0, guard.1, false);
|
||||
drop(guard);
|
||||
cb(&progress);
|
||||
}
|
||||
|
||||
/// Record wire bytes from the insert layer (e.g. IPC-encoded bytes for
|
||||
/// remote writes). When wire bytes are recorded, they take precedence over
|
||||
/// the in-memory Arrow bytes tracked by [`record_batch`].
|
||||
pub fn record_bytes(&self, bytes: usize) {
|
||||
self.wire_bytes.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Emit the final progress callback indicating the write is complete.
|
||||
///
|
||||
/// `total_rows` is always `Some` on the final callback: it uses the known
|
||||
/// total if available, or falls back to the number of rows actually written.
|
||||
pub fn finish(&self) {
|
||||
let mut cb = self.callback.lock().unwrap();
|
||||
let guard = self.rows_and_bytes.lock().unwrap();
|
||||
let mut snap = self.snapshot(guard.0, guard.1, true);
|
||||
snap.total_rows = Some(self.total_rows.unwrap_or(guard.0));
|
||||
drop(guard);
|
||||
cb(&snap);
|
||||
}
|
||||
|
||||
fn snapshot(&self, rows: usize, in_memory_bytes: usize, done: bool) -> WriteProgress {
|
||||
let wire = self.wire_bytes.load(Ordering::Relaxed);
|
||||
// Prefer wire bytes (actual I/O size) when the insert layer is
|
||||
// tracking them; fall back to in-memory Arrow size otherwise.
|
||||
// TODO: for local writes, track actual bytes written by Lance
|
||||
// instead of using in-memory Arrow size as a proxy.
|
||||
let output_bytes = if wire > 0 { wire } else { in_memory_bytes };
|
||||
WriteProgress {
|
||||
elapsed: self.start.elapsed(),
|
||||
output_rows: rows,
|
||||
output_bytes,
|
||||
total_rows: self.total_rows,
|
||||
active_tasks: self.active_tasks.load(Ordering::Relaxed),
|
||||
total_tasks: self.total_tasks.load(Ordering::Relaxed),
|
||||
done,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// RAII guard that decrements the active task count when dropped.
|
||||
pub(crate) struct ActiveTaskGuard(Arc<AtomicUsize>);
|
||||
|
||||
impl Drop for ActiveTaskGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// RAII guard that calls [`WriteProgressTracker::finish`] on drop.
|
||||
///
|
||||
/// This ensures the final `done=true` callback fires even if the write
|
||||
/// errors or the future is cancelled.
|
||||
pub(crate) struct FinishOnDrop(pub Option<Arc<WriteProgressTracker>>);
|
||||
|
||||
impl Drop for FinishOnDrop {
|
||||
fn drop(&mut self) {
|
||||
if let Some(t) = self.0.take() {
|
||||
t.finish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use arrow_array::record_batch;
|
||||
|
||||
use crate::connect;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_progress_monitor_fires_callback() {
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
|
||||
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||
let table = db
|
||||
.create_table("progress_test", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let callback_count = Arc::new(AtomicUsize::new(0));
|
||||
let last_rows = Arc::new(AtomicUsize::new(0));
|
||||
let max_active = Arc::new(AtomicUsize::new(0));
|
||||
let last_total_tasks = Arc::new(AtomicUsize::new(0));
|
||||
let cb_count = callback_count.clone();
|
||||
let cb_rows = last_rows.clone();
|
||||
let cb_active = max_active.clone();
|
||||
let cb_total_tasks = last_total_tasks.clone();
|
||||
|
||||
let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap();
|
||||
table
|
||||
.add(new_data)
|
||||
.progress(move |p| {
|
||||
cb_count.fetch_add(1, Ordering::SeqCst);
|
||||
cb_rows.store(p.output_rows(), Ordering::SeqCst);
|
||||
cb_active.fetch_max(p.active_tasks(), Ordering::SeqCst);
|
||||
cb_total_tasks.store(p.total_tasks(), Ordering::SeqCst);
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 6);
|
||||
assert!(callback_count.load(Ordering::SeqCst) >= 1);
|
||||
// Progress tracks the newly inserted rows, not the total table size.
|
||||
assert_eq!(last_rows.load(Ordering::SeqCst), 3);
|
||||
// At least one callback should have seen an active task.
|
||||
assert!(max_active.load(Ordering::SeqCst) >= 1);
|
||||
// total_tasks should reflect the write parallelism.
|
||||
assert!(last_total_tasks.load(Ordering::SeqCst) >= 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_progress_done_fires_at_end() {
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||
let table = db
|
||||
.create_table("progress_done", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let seen_done = Arc::new(std::sync::Mutex::new(Vec::<bool>::new()));
|
||||
let seen = seen_done.clone();
|
||||
|
||||
let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap();
|
||||
table
|
||||
.add(new_data)
|
||||
.progress(move |p| {
|
||||
seen.lock().unwrap().push(p.done());
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let done_flags = seen_done.lock().unwrap();
|
||||
assert!(!done_flags.is_empty(), "at least one callback must fire");
|
||||
// Only the last callback should have done=true.
|
||||
let last = *done_flags.last().unwrap();
|
||||
assert!(last, "last callback must have done=true");
|
||||
// All earlier callbacks should have done=false.
|
||||
for &d in done_flags.iter().rev().skip(1) {
|
||||
assert!(!d, "non-final callbacks must have done=false");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_progress_total_rows_known() {
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
|
||||
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||
let table = db
|
||||
.create_table("total_known", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let seen_total = Arc::new(std::sync::Mutex::new(Vec::new()));
|
||||
let seen = seen_total.clone();
|
||||
|
||||
// RecordBatch implements Scannable with num_rows() -> Some(3)
|
||||
let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap();
|
||||
table
|
||||
.add(new_data)
|
||||
.progress(move |p| {
|
||||
seen.lock().unwrap().push(p.total_rows());
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let totals = seen_total.lock().unwrap();
|
||||
// All callbacks (including done) should have total_rows = Some(3)
|
||||
assert!(
|
||||
totals.contains(&Some(3)),
|
||||
"expected total_rows=Some(3) in at least one callback, got: {:?}",
|
||||
*totals
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_progress_total_rows_unknown() {
|
||||
use arrow_array::RecordBatchIterator;
|
||||
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
|
||||
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||
let table = db
|
||||
.create_table("total_unknown", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let seen_total = Arc::new(std::sync::Mutex::new(Vec::new()));
|
||||
let seen = seen_total.clone();
|
||||
|
||||
// RecordBatchReader does not provide num_rows, so total_rows should be
|
||||
// None in intermediate callbacks but always Some on the done callback.
|
||||
let schema = arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
"id",
|
||||
arrow_schema::DataType::Int32,
|
||||
false,
|
||||
)]);
|
||||
let new_data: Box<dyn arrow_array::RecordBatchReader + Send> =
|
||||
Box::new(RecordBatchIterator::new(
|
||||
vec![Ok(record_batch!(("id", Int32, [4, 5, 6])).unwrap())],
|
||||
Arc::new(schema),
|
||||
));
|
||||
table
|
||||
.add(new_data)
|
||||
.progress(move |p| {
|
||||
seen.lock().unwrap().push((p.total_rows(), p.done()));
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let entries = seen_total.lock().unwrap();
|
||||
assert!(!entries.is_empty(), "at least one callback must fire");
|
||||
for (total, done) in entries.iter() {
|
||||
if *done {
|
||||
assert!(
|
||||
total.is_some(),
|
||||
"done callback must have total_rows set, got: {:?}",
|
||||
total
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
*total, None,
|
||||
"intermediate callback must have total_rows=None, got: {:?}",
|
||||
total
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user