Compare commits

...

10 Commits

Author SHA1 Message Date
Lance Release
79a1cd60ee Bump version: 0.25.2-beta.0 → 0.25.2-beta.1 2025-09-30 19:30:39 +00:00
Colin Patrick McCabe
88807a59a4 fix: have CI download from ci-support-binaries (#2692)
Have CI download from ci-support-binaries to fix the build.
2025-09-30 11:54:43 -07:00
Jack Ye
e0e7e01ea8 fix: inflated release size due to lance-namespace transitive dependency (#2691)
Fixed the issue on lance-namespace side to avoid pinning to a specific
lance version. This should fix the issue of the increased release
artifact size and build time.
2025-09-30 11:18:32 -07:00
Ayush Chaurasia
a416ebc11d fix: use correct nodejs path for ci (#2689) 2025-09-30 14:18:42 +05:30
Ayush Chaurasia
f941054baf docs: fix doc deployment and remove recipes workflow trigger (#2688) 2025-09-30 13:10:39 +05:30
Ayush Chaurasia
1a81c46505 docs: transition to new docs (#2681) 2025-09-29 11:37:08 +05:30
Colin Patrick McCabe
82b25a71e9 feat: add support for test_remote_connections (#2666)
Add a new test feature which allows for running the lancedb tests
against a remote server. Convert over a few tests in src/connection.rs
as a proof of concept.

To make local development easier, the remote tests can be run locally
from a Makefile. This file can also be used to run the feature tests,
with a single invocation of 'make'. (The feature tests require bringing
up a docker compose environment.)
2025-09-26 11:24:43 -07:00
Jack Ye
13c613d45f chore: upgrade lance to v0.37.1-beta.1 (#2682) 2025-09-25 23:12:09 -07:00
Weston Pace
e07389a36c feat: allow bitmap indexes on large-string, binary, large-binary, and bitmap (#2678)
The underlying `pylance` already supported this, it was just blocked out
by an over-eager validation function

Closes #1981
2025-09-25 09:46:42 -07:00
Lance Release
e7e9e80b1d Bump version: 0.22.1 → 0.22.2-beta.0 2025-09-24 22:54:54 +00:00
39 changed files with 1113 additions and 1578 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.22.1" current_version = "0.22.2-beta.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -56,8 +56,9 @@ jobs:
with: with:
node-version: 20 node-version: 20
cache: 'npm' cache: 'npm'
cache-dependency-path: docs/package-lock.json
- name: Install node dependencies - name: Install node dependencies
working-directory: node working-directory: nodejs
run: | run: |
sudo apt update sudo apt update
sudo apt install -y protobuf-compiler libssl-dev sudo apt install -y protobuf-compiler libssl-dev

View File

@@ -96,6 +96,7 @@ jobs:
# Need up-to-date compilers for kernels # Need up-to-date compilers for kernels
CC: clang-18 CC: clang-18
CXX: clang++-18 CXX: clang++-18
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@@ -117,15 +118,14 @@ jobs:
sudo chmod 600 /swapfile sudo chmod 600 /swapfile
sudo mkswap /swapfile sudo mkswap /swapfile
sudo swapon /swapfile sudo swapon /swapfile
- name: Start S3 integration test environment
working-directory: .
run: docker compose up --detach --wait
- name: Build - name: Build
run: cargo build --all-features --tests --locked --examples run: cargo build --all-features --tests --locked --examples
- name: Run tests - name: Run feature tests
run: cargo test --all-features --locked run: make -C ./lancedb feature-tests
- name: Run examples - name: Run examples
run: cargo run --example simple --locked run: cargo run --example simple --locked
- name: Run remote tests
run: make -C ./lancedb remote-tests
macos: macos:
timeout-minutes: 30 timeout-minutes: 30

View File

@@ -1,26 +0,0 @@
name: Trigger vectordb-recipers workflow
on:
push:
branches: [ main ]
pull_request:
paths:
- .github/workflows/trigger-vectordb-recipes.yml
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Trigger vectordb-recipes workflow
uses: actions/github-script@v6
with:
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
script: |
const result = await github.rest.actions.createWorkflowDispatch({
owner: 'lancedb',
repo: 'vectordb-recipes',
workflow_id: 'examples-test.yml',
ref: 'main'
});
console.log(result);

2117
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,15 +15,15 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] } lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.37.0", default-features = false } lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = "=0.37.0" lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = "=0.37.0" lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = "=0.37.0" lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = "=0.37.0" lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = "=0.37.0" lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = "=0.37.0" lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace = "0.0.15" lance-namespace = "0.0.16"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "55.1" arrow-array = "55.1"
@@ -31,7 +31,6 @@ arrow-data = "55.1"
arrow-ipc = "55.1" arrow-ipc = "55.1"
arrow-ord = "55.1" arrow-ord = "55.1"
arrow-schema = "55.1" arrow-schema = "55.1"
arrow-arith = "55.1"
arrow-cast = "55.1" arrow-cast = "55.1"
async-trait = "0" async-trait = "0"
datafusion = { version = "49.0", default-features = false } datafusion = { version = "49.0", default-features = false }
@@ -52,7 +51,6 @@ pin-project = "1.0.7"
snafu = "0.8" snafu = "0.8"
url = "2" url = "2"
num-traits = "0.2" num-traits = "0.2"
rand = "0.9"
regex = "1.10" regex = "1.10"
lazy_static = "1" lazy_static = "1"
semver = "1.0.25" semver = "1.0.25"
@@ -60,7 +58,16 @@ crunchy = "0.2.4"
# Temporary pins to work around downstream issues # Temporary pins to work around downstream issues
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
chrono = "=0.4.41" chrono = "=0.4.41"
# https://github.com/RustCrypto/formats/issues/1684
base64ct = "=1.6.0"
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
bytemuck_derive = ">=1.8.1, <1.9.0" bytemuck_derive = ">=1.8.1, <1.9.0"
[patch.crates-io]
# Force to use the same lance version as the rest of the project to avoid duplicate dependencies
lance = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }

View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
export RUST_LOG=info
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"

18
ci/run_with_docker_compose.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
#
# A script for running the given command together with a docker compose environment.
#
# Bring down the docker setup once the command is done running.
tear_down() {
docker compose -p fixture down
}
trap tear_down EXIT
set +xe
# Clean up any existing docker setup and bring up a new one.
docker compose -p fixture up --detach --wait || exit 1
"${@}"

68
ci/run_with_test_connection.sh Executable file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env bash
#
# A script for running the given command together with the lancedb cli.
#
die() {
echo $?
exit 1
}
check_command_exists() {
command="${1}"
which ${command} &> /dev/null || \
die "Unable to locate command: ${command}. Did you install it?"
}
if [[ ! -e ./lancedb ]]; then
if [[ -v SOPHON_READ_TOKEN ]]; then
INPUT="lancedb-linux-x64"
gh release \
--repo lancedb/lancedb \
download ci-support-binaries \
--pattern "${INPUT}" \
|| die "failed to fetch cli."
check_command_exists openssl
openssl enc -aes-256-cbc \
-d -pbkdf2 \
-pass "env:SOPHON_READ_TOKEN" \
-in "${INPUT}" \
-out ./lancedb-linux-x64.tar.gz \
|| die "openssl failed"
TARGET="${INPUT}.tar.gz"
else
ARCH="x64"
if [[ $OSTYPE == 'darwin'* ]]; then
UNAME=$(uname -m)
if [[ $UNAME == 'arm64' ]]; then
ARCH='arm64'
fi
OSTYPE="macos"
elif [[ $OSTYPE == 'linux'* ]]; then
if [[ $UNAME == 'aarch64' ]]; then
ARCH='arm64'
fi
OSTYPE="linux"
else
die "unknown OSTYPE: $OSTYPE"
fi
check_command_exists gh
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
gh release \
--repo lancedb/sophon \
download lancedb-cli-v0.0.3 \
--pattern "${TARGET}" \
|| die "failed to fetch cli."
fi
check_command_exists tar
tar xvf "${TARGET}" || die "tar failed."
[[ -e ./lancedb ]] || die "failed to extract lancedb."
fi
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
"${@}"

View File

@@ -117,7 +117,7 @@ def update_cargo_toml(line_updater):
lance_line = "" lance_line = ""
is_parsing_lance_line = False is_parsing_lance_line = False
for line in lines: for line in lines:
if line.startswith("lance"): if line.startswith("lance") and not line.startswith("lance-namespace"):
# Check if this is a single-line or multi-line entry # Check if this is a single-line or multi-line entry
# Single-line entries either: # Single-line entries either:
# 1. End with } (complete inline table) # 1. End with } (complete inline table)

View File

@@ -70,6 +70,22 @@ plugins:
- mkdocs-jupyter - mkdocs-jupyter
- render_swagger: - render_swagger:
allow_arbitrary_locations: true allow_arbitrary_locations: true
- redirects:
redirect_maps:
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
# other sub-pages are handled by the ingected js in overrides/partials/header.html
'index.md': 'https://lancedb.com/docs/'
'guides/tables.md': 'https://lancedb.com/docs/tables/'
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
'basic.md': 'https://lancedb.com/docs/quickstart/'
'faq.md': 'https://lancedb.com/docs/faq/'
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
'integrations.md': 'https://lancedb.com/docs/integrations/'
'examples.md': 'https://lancedb.com/docs/tutorials/'
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
markdown_extensions: markdown_extensions:
- admonition - admonition

View File

@@ -19,7 +19,13 @@
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE. IN THE SOFTWARE.
--> -->
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
<p style="margin: 0; font-size: 1.1em;">
<strong>This documentation site is deprecated.</strong>
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
lancedb.com/docs</a> for the latest information.
</p>
</div>
{% set class = "md-header" %} {% set class = "md-header" %}
{% if "navigation.tabs.sticky" in features %} {% if "navigation.tabs.sticky" in features %}
{% set class = class ~ " md-header--shadow md-header--lifted" %} {% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -174,3 +180,76 @@
{% endif %} {% endif %}
{% endif %} {% endif %}
</header> </header>
<script>
(function() {
function checkPathAndRedirect() {
var banner = document.getElementById('deprecation-banner');
if (document.querySelector('meta[http-equiv="refresh"]')) {
return; // The redirects plugin is already handling this page.
}
var currentPath = window.location.pathname;
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
? currentPath.slice(0, -1)
: currentPath;
// These are the ONLY paths that should remain on the old site
var apiPaths = [
'/lancedb/python',
'/lancedb/javascript',
'/lancedb/js',
'/lancedb/api_reference'
];
var isApiPage = apiPaths.some(function(apiPath) {
return cleanPath.startsWith(apiPath);
});
if (isApiPage) {
if (banner) {
banner.style.display = 'none';
}
} else {
if (banner) {
banner.style.display = 'block';
}
// Add noindex meta tag to prevent indexing of old docs for seo
var noindexMeta = document.createElement('meta');
noindexMeta.setAttribute('name', 'robots');
noindexMeta.setAttribute('content', 'noindex, follow');
document.head.appendChild(noindexMeta);
// Add canonical link to point to the new docs to reward new site for seo
var canonicalLink = document.createElement('link');
canonicalLink.setAttribute('rel', 'canonical');
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
document.head.appendChild(canonicalLink);
window.location.replace('https://lancedb.com/docs');
}
}
// Run the check only if doc is ready. This makes sure we catch the initial load
// and redirect.
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
} else {
checkPathAndRedirect();
}
// Use an interval to handle subsequent navigation clicks.
var lastPath = window.location.pathname;
setInterval(function() {
if (window.location.pathname !== lastPath) {
lastPath = window.location.pathname;
checkPathAndRedirect();
}
}, 2000); // keeping it 2 second to make it easy for user to understand
// what's happening
})();
</script>

View File

@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
griffe griffe
mkdocs-render-swagger-plugin mkdocs-render-swagger-plugin
pydantic pydantic
mkdocs-redirects

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version> <version>0.22.2-beta.0</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version> <version>0.22.2-beta.0</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version> <version>0.22.2-beta.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description> <description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.22.1" version = "0.22.2-beta.0"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.1", "version": "0.22.2-beta.0",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.1", "version": "0.22.2-beta.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.1", "version": "0.22.2-beta.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.22.1", "version": "0.22.2-beta.0",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.25.2-beta.0" current_version = "0.25.2-beta.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.25.2-beta.0" version = "0.25.2-beta.1"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -10,7 +10,7 @@ dependencies = [
"pyarrow>=16", "pyarrow>=16",
"pydantic>=1.10", "pydantic>=1.10",
"tqdm>=4.27.0", "tqdm>=4.27.0",
"lance-namespace==0.0.6" "lance-namespace>=0.0.16"
] ]
description = "lancedb" description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }] authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]

View File

@@ -35,6 +35,8 @@ async def some_table(db_async):
"tags": [ "tags": [
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS) [f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
], ],
"is_active": [random.choice([True, False]) for _ in range(NROWS)],
"data": [random.randbytes(random.randint(0, 128)) for _ in range(NROWS)],
} }
) )
return await db_async.create_table( return await db_async.create_table(
@@ -99,10 +101,17 @@ async def test_create_fixed_size_binary_index(some_table: AsyncTable):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_create_bitmap_index(some_table: AsyncTable): async def test_create_bitmap_index(some_table: AsyncTable):
await some_table.create_index("id", config=Bitmap()) await some_table.create_index("id", config=Bitmap())
await some_table.create_index("is_active", config=Bitmap())
await some_table.create_index("data", config=Bitmap())
indices = await some_table.list_indices() indices = await some_table.list_indices()
assert str(indices) == '[Index(Bitmap, columns=["id"], name="id_idx")]' assert len(indices) == 3
indices = await some_table.list_indices() assert indices[0].index_type == "Bitmap"
assert len(indices) == 1 assert indices[0].columns == ["id"]
assert indices[1].index_type == "Bitmap"
assert indices[1].columns == ["is_active"]
assert indices[2].index_type == "Bitmap"
assert indices[2].columns == ["data"]
index_name = indices[0].name index_name = indices[0].name
stats = await some_table.index_stats(index_name) stats = await some_table.index_stats(index_name)
assert stats.index_type == "BITMAP" assert stats.index_type == "BITMAP"
@@ -111,6 +120,11 @@ async def test_create_bitmap_index(some_table: AsyncTable):
assert stats.num_unindexed_rows == 0 assert stats.num_unindexed_rows == 0
assert stats.num_indices == 1 assert stats.num_indices == 1
assert (
"ScalarIndexQuery"
in await some_table.query().where("is_active = TRUE").explain_plan()
)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_create_label_list_index(some_table: AsyncTable): async def test_create_label_list_index(some_table: AsyncTable):

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.22.1" version = "0.22.2-beta.0"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
@@ -82,6 +82,7 @@ crunchy.workspace = true
bytemuck_derive.workspace = true bytemuck_derive.workspace = true
[dev-dependencies] [dev-dependencies]
anyhow = "1"
tempfile = "3.5.0" tempfile = "3.5.0"
rand = { version = "0.9", features = ["small_rng"] } rand = { version = "0.9", features = ["small_rng"] }
random_word = { version = "0.4.3", features = ["en"] } random_word = { version = "0.4.3", features = ["en"] }

19
rust/lancedb/Makefile Normal file
View File

@@ -0,0 +1,19 @@
#
# Makefile for running tests.
#
# Run all tests.
all-tests: feature-tests remote-tests
# Run tests for every feature. This requires using docker compose to set up
# the environment.
feature-tests:
../../ci/run_with_docker_compose.sh \
cargo test --all-features --tests --locked --examples
.PHONY: feature-tests
# Run tests against remote endpoints.
remote-tests:
../../ci/run_with_test_connection.sh \
cargo test --features remote --locked
.PHONY: remote-tests

View File

@@ -1170,6 +1170,7 @@ mod tests {
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig}; use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase; use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions}; use crate::query::{ExecutableQuery, QueryExecutionOptions};
use crate::test_connection::test_utils::new_test_connection;
use arrow::compute::concat_batches; use arrow::compute::concat_batches;
use arrow_array::RecordBatchReader; use arrow_array::RecordBatchReader;
use arrow_schema::{DataType, Field, Schema}; use arrow_schema::{DataType, Field, Schema};
@@ -1185,11 +1186,8 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_connect() { async fn test_connect() {
let tmp_dir = tempdir().unwrap(); let tc = new_test_connection().await.unwrap();
let uri = tmp_dir.path().to_str().unwrap(); assert_eq!(tc.connection.uri, tc.uri);
let db = connect(uri).execute().await.unwrap();
assert_eq!(db.uri, uri);
} }
#[cfg(not(windows))] #[cfg(not(windows))]
@@ -1255,16 +1253,10 @@ mod tests {
assert_eq!(tables, names[..7]); assert_eq!(tables, names[..7]);
} }
#[tokio::test]
async fn test_connect_s3() {
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
}
#[tokio::test] #[tokio::test]
async fn test_open_table() { async fn test_open_table() {
let tmp_dir = tempdir().unwrap(); let tc = new_test_connection().await.unwrap();
let uri = tmp_dir.path().to_str().unwrap(); let db = tc.connection;
let db = connect(uri).execute().await.unwrap();
assert_eq!(db.table_names().execute().await.unwrap().len(), 0); assert_eq!(db.table_names().execute().await.unwrap().len(), 0);
// open non-exist table // open non-exist table

View File

@@ -728,7 +728,7 @@ impl Database for ListingDatabase {
let target_uri = self.table_uri(&request.target_table_name)?; let target_uri = self.table_uri(&request.target_table_name)?;
source_dataset source_dataset
.shallow_clone(&target_uri, version_ref, storage_params) .shallow_clone(&target_uri, version_ref, Some(storage_params))
.await .await
.map_err(|e| Error::Lance { source: e })?; .map_err(|e| Error::Lance { source: e })?;

View File

@@ -206,6 +206,7 @@ pub mod query;
pub mod remote; pub mod remote;
pub mod rerankers; pub mod rerankers;
pub mod table; pub mod table;
pub mod test_connection;
pub mod utils; pub mod utils;
use std::fmt::Display; use std::fmt::Display;

View File

@@ -2760,6 +2760,7 @@ mod tests {
RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray, RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
UInt32Array, UInt32Array,
}; };
use arrow_array::{BinaryArray, LargeBinaryArray};
use arrow_data::ArrayDataBuilder; use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType, Field, Schema, TimeUnit}; use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt; use futures::TryStreamExt;
@@ -3725,6 +3726,10 @@ mod tests {
let schema = Arc::new(Schema::new(vec![ let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false), Field::new("id", DataType::Int32, false),
Field::new("category", DataType::Utf8, true), Field::new("category", DataType::Utf8, true),
Field::new("large_category", DataType::LargeUtf8, true),
Field::new("is_active", DataType::Boolean, true),
Field::new("data", DataType::Binary, true),
Field::new("large_data", DataType::LargeBinary, true),
])); ]));
let batch = RecordBatch::try_new( let batch = RecordBatch::try_new(
@@ -3734,6 +3739,16 @@ mod tests {
Arc::new(StringArray::from_iter_values( Arc::new(StringArray::from_iter_values(
(0..100).map(|i| format!("category_{}", i % 5)), (0..100).map(|i| format!("category_{}", i % 5)),
)), )),
Arc::new(LargeStringArray::from_iter_values(
(0..100).map(|i| format!("large_category_{}", i % 5)),
)),
Arc::new(BooleanArray::from_iter((0..100).map(|i| Some(i % 2 == 0)))),
Arc::new(BinaryArray::from_iter_values(
(0_u32..100).map(|i| i.to_le_bytes()),
)),
Arc::new(LargeBinaryArray::from_iter_values(
(0_u32..100).map(|i| i.to_le_bytes()),
)),
], ],
) )
.unwrap(); .unwrap();
@@ -3754,12 +3769,58 @@ mod tests {
.await .await
.unwrap(); .unwrap();
// Create bitmap index on the "is_active" column
table
.create_index(&["is_active"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "data" column
table
.create_index(&["data"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "large_data" column
table
.create_index(&["large_data"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "large_category" column
table
.create_index(&["large_category"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Verify the index was created // Verify the index was created
let index_configs = table.list_indices().await.unwrap(); let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1); assert_eq!(index_configs.len(), 5);
let index = index_configs.into_iter().next().unwrap();
let mut configs_iter = index_configs.into_iter();
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap); assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]); assert_eq!(index.columns, vec!["category".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["is_active".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["data".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["large_data".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["large_category".to_string()]);
} }
#[tokio::test] #[tokio::test]

View File

@@ -0,0 +1,126 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! Functions for testing connections.
#[cfg(test)]
pub mod test_utils {
use regex::Regex;
use std::env;
use std::io::{BufRead, BufReader};
use std::process::{Child, ChildStdout, Command, Stdio};
use crate::{connect, Connection};
use anyhow::{bail, Result};
use tempfile::{tempdir, TempDir};
pub struct TestConnection {
pub uri: String,
pub connection: Connection,
_temp_dir: Option<TempDir>,
_process: Option<TestProcess>,
}
struct TestProcess {
child: Child,
}
impl Drop for TestProcess {
#[allow(unused_must_use)]
fn drop(&mut self) {
self.child.kill();
}
}
pub async fn new_test_connection() -> Result<TestConnection> {
match env::var("CREATE_LANCEDB_TEST_CONNECTION_SCRIPT") {
Ok(script_path) => new_remote_connection(&script_path).await,
Err(_e) => new_local_connection().await,
}
}
async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
let temp_dir = tempdir()?;
let data_path = temp_dir.path().to_str().unwrap().to_string();
let child_result = Command::new(script_path)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.arg(data_path.clone())
.spawn();
if child_result.is_err() {
bail!(format!(
"Unable to run {}: {:?}",
script_path,
child_result.err()
));
}
let mut process = TestProcess {
child: child_result.unwrap(),
};
let stdout = BufReader::new(process.child.stdout.take().unwrap());
let port = read_process_port(stdout)?;
let uri = "db://test";
let host_override = format!("http://localhost:{}", port);
let connection = create_new_connection(uri, &host_override).await?;
Ok(TestConnection {
uri: uri.to_string(),
connection,
_temp_dir: Some(temp_dir),
_process: Some(process),
})
}
fn read_process_port(mut stdout: BufReader<ChildStdout>) -> Result<String> {
let mut line = String::new();
let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
loop {
let result = stdout.read_line(&mut line);
if let Err(err) = result {
bail!(format!(
"read_process_port: error while reading from process output: {}",
err
));
} else if result.unwrap() == 0 {
bail!("read_process_port: hit EOF before reading port from process output.");
}
if re.is_match(&line) {
let caps = re.captures(&line).unwrap();
return Ok(caps[1].to_string());
}
}
}
#[cfg(feature = "remote")]
async fn create_new_connection(
uri: &str,
host_override: &str,
) -> crate::error::Result<Connection> {
connect(uri)
.region("us-east-1")
.api_key("sk_localtest")
.host_override(host_override)
.execute()
.await
}
#[cfg(not(feature = "remote"))]
async fn create_new_connection(
_uri: &str,
_host_override: &str,
) -> crate::error::Result<Connection> {
panic!("remote feature not supported");
}
async fn new_local_connection() -> Result<TestConnection> {
let temp_dir = tempdir()?;
let uri = temp_dir.path().to_str().unwrap();
let connection = connect(uri).execute().await?;
Ok(TestConnection {
uri: uri.to_string(),
connection,
_temp_dir: Some(temp_dir),
_process: None,
})
}
}

View File

@@ -195,7 +195,15 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
} }
pub fn supported_bitmap_data_type(dtype: &DataType) -> bool { pub fn supported_bitmap_data_type(dtype: &DataType) -> bool {
dtype.is_integer() || matches!(dtype, DataType::Utf8) dtype.is_integer()
|| matches!(
dtype,
DataType::Utf8
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary
| DataType::Boolean
)
} }
pub fn supported_label_list_data_type(dtype: &DataType) -> bool { pub fn supported_label_list_data_type(dtype: &DataType) -> bool {