Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
8c64498c72 Bump version: 0.22.4-beta.1 → 0.22.4-beta.2 2025-11-19 20:25:21 +00:00
66 changed files with 404 additions and 1459 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.4-beta.3"
current_version = "0.22.4-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -19,7 +19,7 @@ rustflags = [
"-Wclippy::string_add_assign",
"-Wclippy::string_add",
"-Wclippy::string_lit_as_bytes",
"-Wclippy::implicit_clone",
"-Wclippy::string_to_string",
"-Wclippy::use_self",
"-Dclippy::cargo",
"-Dclippy::dbg_macro",

View File

@@ -18,6 +18,6 @@ body:
label: Link
description: >
Provide a link to the existing documentation, if applicable.
placeholder: ex. https://lancedb.com/docs/tables/...
placeholder: ex. https://lancedb.github.io/lancedb/guides/tables/...
validations:
required: false

View File

@@ -31,7 +31,7 @@ runs:
with:
command: build
working-directory: python
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
target: x86_64-unknown-linux-gnu
manylinux: ${{ inputs.manylinux }}
args: ${{ inputs.args }}
@@ -46,7 +46,7 @@ runs:
with:
command: build
working-directory: python
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
target: aarch64-unknown-linux-gnu
manylinux: ${{ inputs.manylinux }}
args: ${{ inputs.args }}

View File

@@ -22,5 +22,5 @@ runs:
command: build
# TODO: pass through interpreter
args: ${{ inputs.args }}
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
working-directory: python

View File

@@ -26,7 +26,7 @@ runs:
with:
command: build
args: ${{ inputs.args }}
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
working-directory: python
- uses: actions/upload-artifact@v4
with:

View File

@@ -1,9 +1,6 @@
name: Codex Update Lance Dependency
on:
pull_request:
paths:
- '.github/workflows/codex-update-lance-dependency.yml'
workflow_call:
inputs:
tag:
@@ -14,8 +11,7 @@ on:
inputs:
tag:
description: "Tag name from Lance"
required: false
default: "v1.0.0-rc.1"
required: true
type: string
permissions:
@@ -29,7 +25,7 @@ jobs:
steps:
- name: Show inputs
run: |
echo "tag = ${{ inputs.tag || 'v1.0.0-rc.1' }}"
echo "tag = ${{ inputs.tag }}"
- name: Checkout Repo LanceDB
uses: actions/checkout@v4
@@ -69,7 +65,7 @@ jobs:
- name: Run Codex to update Lance dependency
env:
TAG: ${{ inputs.tag || 'v1.0.0-rc.1' }}
TAG: ${{ inputs.tag }}
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
@@ -102,50 +98,3 @@ jobs:
printenv OPENAI_API_KEY | codex login --with-api-key
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
- name: Debug token access
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
echo "=== Checking authenticated user ==="
gh api user --jq '.login' || echo "Failed to get user info"
echo ""
echo "=== Listing repos in lancedb org that token can access ==="
gh repo list lancedb --limit 50 --json name,visibility --jq '.[] | "\(.name) (\(.visibility))"' || echo "Failed to list repos"
echo ""
echo "=== Checking if sophon repo exists and is accessible ==="
gh repo view lancedb/sophon --json name,visibility 2>&1 || echo "Cannot access lancedb/sophon"
echo ""
echo "=== Checking token scopes ==="
gh api -i user 2>&1 | grep -i "x-oauth-scopes" || echo "Could not determine token scopes"
- name: Trigger sophon dependency update
env:
TAG: ${{ inputs.tag || 'v1.0.0-rc.1' }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
VERSION="${TAG#refs/tags/}"
VERSION="${VERSION#v}"
LANCEDB_BRANCH="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
echo "Triggering sophon workflow with:"
echo " lance_ref: ${TAG#refs/tags/}"
echo " lancedb_ref: ${LANCEDB_BRANCH}"
gh workflow run codex-bump-lancedb-lance.yml \
--repo lancedb/sophon \
-f lance_ref="${TAG#refs/tags/}" \
-f lancedb_ref="${LANCEDB_BRANCH}"
- name: Show latest sophon workflow run
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
echo "Latest sophon workflow run:"
gh run list --repo lancedb/sophon --workflow codex-bump-lancedb-lance.yml --limit 1 --json databaseId,url,displayTitle

View File

@@ -24,7 +24,7 @@ env:
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
jobs:
# Single deploy job since we're just deploying
@@ -50,8 +50,8 @@ jobs:
- name: Build Python
working-directory: python
run: |
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
- name: Set up node
uses: actions/setup-node@v3
with:

View File

@@ -59,4 +59,4 @@ jobs:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,url,displayTitle
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle

View File

@@ -97,6 +97,12 @@ jobs:
fail-fast: false
matrix:
settings:
- target: x86_64-apple-darwin
host: macos-latest
features: ","
pre_build: |-
brew install protobuf
rustup target add x86_64-apple-darwin
- target: aarch64-apple-darwin
host: macos-latest
features: fp16kernels

View File

@@ -11,7 +11,7 @@ on:
- Cargo.toml # Change in dependency frequently breaks builds
env:
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
jobs:
linux:
@@ -64,6 +64,8 @@ jobs:
strategy:
matrix:
config:
- target: x86_64-apple-darwin
runner: macos-13
- target: aarch64-apple-darwin
runner: warp-macos-14-arm64-6x
env:

View File

@@ -18,7 +18,7 @@ env:
# Color output for pytest is off by default.
PYTEST_ADDOPTS: "--color=yes"
FORCE_COLOR: "1"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
RUST_BACKTRACE: "1"
jobs:
@@ -79,7 +79,7 @@ jobs:
doctest:
name: "Doctest"
timeout-minutes: 30
runs-on: ubuntu-2404-8x-x64
runs-on: "ubuntu-24.04"
defaults:
run:
shell: bash
@@ -100,7 +100,7 @@ jobs:
sudo apt install -y protobuf-compiler
- name: Install
run: |
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
pip install tantivy
pip install mlx
- name: Doctest
@@ -143,9 +143,16 @@ jobs:
- name: Delete wheels
run: rm -rf target/wheels
platform:
name: "Mac"
name: "Mac: ${{ matrix.config.name }}"
timeout-minutes: 30
runs-on: macos-14
strategy:
matrix:
config:
- name: x86
runner: macos-13
- name: Arm
runner: macos-14
runs-on: "${{ matrix.config.runner }}"
defaults:
run:
shell: bash
@@ -219,7 +226,7 @@ jobs:
run: |
pip install "pydantic<2"
pip install pyarrow==16
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
pip install tantivy
- name: Run tests
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests

View File

@@ -15,7 +15,7 @@ runs:
- name: Install lancedb
shell: bash
run: |
pip3 install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
- name: Setup localstack for integration tests
if: ${{ inputs.integration == 'true' }}
shell: bash

View File

@@ -122,7 +122,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
mac-runner: ["macos-14", "macos-15"]
mac-runner: ["macos-13", "macos-14"]
runs-on: "${{ matrix.mac-runner }}"
defaults:
run:

771
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.3", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }

View File

@@ -15,7 +15,7 @@
# **The Multimodal AI Lakehouse**
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.github.io/lancedb/) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
**The ultimate multimodal data platform for AI/ML applications.**

View File

@@ -1,8 +1,8 @@
# LanceDB Documentation
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
Docs is built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
unreleased features.

View File

@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
console.log(results);
```
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
## Development

View File

@@ -147,7 +147,7 @@ A new PermutationBuilder instance
#### Example
```ts
builder.splitCalculated({ calculation: "user_id % 3" });
builder.splitCalculated("user_id % 3");
```
***

View File

@@ -89,4 +89,4 @@ optional storageOptions: Record<string, string>;
(For LanceDB OSS only): configuration for object storage.
The available options are described at https://lancedb.com/docs/storage/
The available options are described at https://lancedb.github.io/lancedb/guides/storage/

View File

@@ -97,4 +97,4 @@ Configuration for object storage.
Options already set on the connection will be inherited by the table,
but can be overridden here.
The available options are described at https://lancedb.com/docs/storage/
The available options are described at https://lancedb.github.io/lancedb/guides/storage/

View File

@@ -42,4 +42,4 @@ Configuration for object storage.
Options already set on the connection will be inherited by the table,
but can be overridden here.
The available options are described at https://lancedb.com/docs/storage/
The available options are described at https://lancedb.github.io/lancedb/guides/storage/

View File

@@ -30,12 +30,6 @@ is also an [asynchronous API client](#connections-asynchronous).
::: lancedb.table.Table
::: lancedb.table.FragmentStatistics
::: lancedb.table.FragmentSummaryStats
::: lancedb.table.Tags
## Querying (Synchronous)
::: lancedb.query.Query
@@ -64,14 +58,6 @@ is also an [asynchronous API client](#connections-asynchronous).
::: lancedb.embeddings.open_clip.OpenClipEmbeddings
## Remote configuration
::: lancedb.remote.ClientConfig
::: lancedb.remote.TimeoutConfig
::: lancedb.remote.RetryConfig
## Context
::: lancedb.context.contextualize
@@ -129,8 +115,6 @@ Table hold your actual data as a collection of records / rows.
::: lancedb.table.AsyncTable
::: lancedb.table.AsyncTags
## Indices (Asynchronous)
Indices can be created on a table to speed up queries. This section
@@ -152,8 +136,6 @@ lists the indices that LanceDb supports.
::: lancedb.index.IvfFlat
::: lancedb.table.IndexStatistics
## Querying (Asynchronous)
Queries allow you to return data from your database. Basic queries can be

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<version>0.22.4-beta.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<version>0.22.4-beta.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<version>0.22.4-beta.2</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.4-beta.3"
version = "0.22.4-beta.2"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
console.log(results);
```
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
## Development

View File

@@ -42,7 +42,7 @@ export interface CreateTableOptions {
* Options already set on the connection will be inherited by the table,
* but can be overridden here.
*
* The available options are described at https://lancedb.com/docs/storage/
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
*/
storageOptions?: Record<string, string>;
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
* Options already set on the connection will be inherited by the table,
* but can be overridden here.
*
* The available options are described at https://lancedb.com/docs/storage/
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
*/
storageOptions?: Record<string, string>;
/**

View File

@@ -118,7 +118,7 @@ export class PermutationBuilder {
* @returns A new PermutationBuilder instance
* @example
* ```ts
* builder.splitCalculated({ calculation: "user_id % 3" });
* builder.splitCalculated("user_id % 3");
* ```
*/
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.4-beta.3",
"version": "0.22.4-beta.2",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -35,7 +35,7 @@ pub struct ConnectionOptions {
pub read_consistency_interval: Option<f64>,
/// (For LanceDB OSS only): configuration for object storage.
///
/// The available options are described at https://lancedb.com/docs/storage/
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
pub storage_options: Option<HashMap<String, String>>,
/// (For LanceDB OSS only): the session to use for this connection. Holds
/// shared caches and other session-specific state.

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.25.4-beta.3"
current_version = "0.25.4-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.25.4-beta.3"
version = "0.25.4-beta.2"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -1,11 +1,11 @@
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lancedb/
help: ## Show this help.
@sed -ne '/@sed/!s/## //p' $(MAKEFILE_LIST)
.PHONY: develop
develop: ## Install the package in development mode.
PIP_EXTRA_INDEX_URL="$(PIP_EXTRA_INDEX_URL)" maturin develop --extras tests,dev,embeddings
PIP_EXTRA_INDEX_URL=$(PIP_EXTRA_INDEX_URL) maturin develop --extras tests,dev,embeddings
.PHONY: format
format: ## Format the code.

View File

@@ -10,7 +10,7 @@ dependencies = [
"pyarrow>=16",
"pydantic>=1.10",
"tqdm>=4.27.0",
"lance-namespace>=0.2.1"
"lance-namespace>=0.0.21"
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
pylance = [
"pylance>=1.0.0b14",
"pylance>=0.25",
]
tests = [
"aiohttp",
@@ -59,7 +59,7 @@ tests = [
"polars>=0.19, <=1.3.0",
"tantivy",
"pyarrow-stubs",
"pylance>=1.0.0b14",
"pylance>=1.0.0b4",
"requests",
"datafusion",
]

View File

@@ -72,7 +72,7 @@ def connect(
default configuration is used.
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://lancedb.com/docs/storage/>
<https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
@@ -174,7 +174,7 @@ async def connect_async(
default configuration is used.
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://lancedb.com/docs/storage/>
<https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure

View File

@@ -26,7 +26,7 @@ class Connection(object):
async def close(self): ...
async def list_namespaces(
self,
namespace: Optional[List[str]],
namespace: List[str],
page_token: Optional[str],
limit: Optional[int],
) -> List[str]: ...
@@ -34,7 +34,7 @@ class Connection(object):
async def drop_namespace(self, namespace: List[str]) -> None: ...
async def table_names(
self,
namespace: Optional[List[str]],
namespace: List[str],
start_after: Optional[str],
limit: Optional[int],
) -> list[str]: ...
@@ -43,7 +43,7 @@ class Connection(object):
name: str,
mode: str,
data: pa.RecordBatchReader,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
@@ -53,7 +53,7 @@ class Connection(object):
name: str,
mode: str,
schema: pa.Schema,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
@@ -61,7 +61,7 @@ class Connection(object):
async def open_table(
self,
name: str,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
@@ -71,7 +71,7 @@ class Connection(object):
self,
target_table_name: str,
source_uri: str,
target_namespace: Optional[List[str]] = None,
target_namespace: List[str] = [],
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -80,13 +80,11 @@ class Connection(object):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
) -> None: ...
async def drop_table(
self, name: str, namespace: Optional[List[str]] = None
) -> None: ...
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
async def drop_table(self, name: str, namespace: List[str] = []) -> None: ...
async def drop_all_tables(self, namespace: List[str] = []) -> None: ...
class Table:
def name(self) -> str: ...

View File

@@ -96,7 +96,7 @@ def data_to_reader(
f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
"pyarrow Table/RecordBatch, or Pydantic models. "
"See https://lancedb.com/docs/tables/ for examples."
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
)

View File

@@ -54,7 +54,7 @@ class DBConnection(EnforceOverrides):
def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -75,8 +75,6 @@ class DBConnection(EnforceOverrides):
Iterable of str
List of immediate child namespace names
"""
if namespace is None:
namespace = []
return []
def create_namespace(self, namespace: List[str]) -> None:
@@ -109,7 +107,7 @@ class DBConnection(EnforceOverrides):
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Iterable[str]:
"""List all tables in this database, in sorted order
@@ -144,7 +142,7 @@ class DBConnection(EnforceOverrides):
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
@@ -193,11 +191,7 @@ class DBConnection(EnforceOverrides):
Additional options for the storage backend. Options already set on the
connection will be inherited by the table, but can be overridden here.
See available options at
<https://lancedb.com/docs/storage/>
To enable stable row IDs (row IDs remain stable after compaction,
update, delete, and merges), set `new_table_enable_stable_row_ids`
to `"true"` in storage_options when connecting to the database.
<https://lancedb.github.io/lancedb/guides/storage/>
data_storage_version: optional, str, default "stable"
Deprecated. Set `storage_options` when connecting to the database and set
`new_table_data_storage_version` in the options.
@@ -314,7 +308,7 @@ class DBConnection(EnforceOverrides):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
@@ -345,7 +339,7 @@ class DBConnection(EnforceOverrides):
Additional options for the storage backend. Options already set on the
connection will be inherited by the table, but can be overridden here.
See available options at
<https://lancedb.com/docs/storage/>
<https://lancedb.github.io/lancedb/guides/storage/>
Returns
-------
@@ -353,7 +347,7 @@ class DBConnection(EnforceOverrides):
"""
raise NotImplementedError
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: List[str] = []):
"""Drop a table from the database.
Parameters
@@ -364,16 +358,14 @@ class DBConnection(EnforceOverrides):
The namespace to drop the table from.
Empty list represents root namespace.
"""
if namespace is None:
namespace = []
raise NotImplementedError
def rename_table(
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename a table in the database.
@@ -390,10 +382,6 @@ class DBConnection(EnforceOverrides):
The namespace to move the table to.
If not specified, defaults to the same as cur_namespace.
"""
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
raise NotImplementedError
def drop_database(self):
@@ -403,7 +391,7 @@ class DBConnection(EnforceOverrides):
"""
raise NotImplementedError
def drop_all_tables(self, namespace: Optional[List[str]] = None):
def drop_all_tables(self, namespace: List[str] = []):
"""
Drop all tables from the database
@@ -413,8 +401,6 @@ class DBConnection(EnforceOverrides):
The namespace to drop all tables from.
None or empty list represents root namespace.
"""
if namespace is None:
namespace = []
raise NotImplementedError
@property
@@ -555,7 +541,7 @@ class LanceDBConnection(DBConnection):
@override
def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -576,8 +562,6 @@ class LanceDBConnection(DBConnection):
Iterable of str
List of immediate child namespace names
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_namespaces(
namespace=namespace, page_token=page_token, limit=limit
@@ -612,7 +596,7 @@ class LanceDBConnection(DBConnection):
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Iterable[str]:
"""Get the names of all tables in the database. The names are sorted.
@@ -630,8 +614,6 @@ class LanceDBConnection(DBConnection):
Iterator of str.
A list of table names.
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.table_names(
namespace=namespace, start_after=page_token, limit=limit
@@ -656,7 +638,7 @@ class LanceDBConnection(DBConnection):
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
@@ -673,8 +655,6 @@ class LanceDBConnection(DBConnection):
---
DBConnection.create_table
"""
if namespace is None:
namespace = []
if mode.lower() not in ["create", "overwrite"]:
raise ValueError("mode must be either 'create' or 'overwrite'")
validate_table_name(name)
@@ -700,7 +680,7 @@ class LanceDBConnection(DBConnection):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
@@ -718,8 +698,6 @@ class LanceDBConnection(DBConnection):
-------
A LanceTable object representing the table.
"""
if namespace is None:
namespace = []
if index_cache_size is not None:
import warnings
@@ -745,7 +723,7 @@ class LanceDBConnection(DBConnection):
target_table_name: str,
source_uri: str,
*,
target_namespace: Optional[List[str]] = None,
target_namespace: List[str] = [],
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -778,8 +756,6 @@ class LanceDBConnection(DBConnection):
-------
A LanceTable object representing the cloned table.
"""
if target_namespace is None:
target_namespace = []
LOOP.run(
self._conn.clone_table(
target_table_name,
@@ -800,7 +776,7 @@ class LanceDBConnection(DBConnection):
def drop_table(
self,
name: str,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
ignore_missing: bool = False,
):
"""Drop a table from the database.
@@ -814,8 +790,6 @@ class LanceDBConnection(DBConnection):
ignore_missing: bool, default False
If True, ignore if the table does not exist.
"""
if namespace is None:
namespace = []
LOOP.run(
self._conn.drop_table(
name, namespace=namespace, ignore_missing=ignore_missing
@@ -823,9 +797,7 @@ class LanceDBConnection(DBConnection):
)
@override
def drop_all_tables(self, namespace: Optional[List[str]] = None):
if namespace is None:
namespace = []
def drop_all_tables(self, namespace: List[str] = []):
LOOP.run(self._conn.drop_all_tables(namespace=namespace))
@override
@@ -833,8 +805,8 @@ class LanceDBConnection(DBConnection):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename a table in the database.
@@ -849,10 +821,6 @@ class LanceDBConnection(DBConnection):
new_namespace: List[str], optional
The namespace to move the table to.
"""
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
LOOP.run(
self._conn.rename_table(
cur_name,
@@ -942,7 +910,7 @@ class AsyncConnection(object):
async def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -963,8 +931,6 @@ class AsyncConnection(object):
Iterable of str
List of immediate child namespace names (not full paths)
"""
if namespace is None:
namespace = []
return await self._inner.list_namespaces(
namespace=namespace, page_token=page_token, limit=limit
)
@@ -992,7 +958,7 @@ class AsyncConnection(object):
async def table_names(
self,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
start_after: Optional[str] = None,
limit: Optional[int] = None,
) -> Iterable[str]:
@@ -1016,8 +982,6 @@ class AsyncConnection(object):
-------
Iterable of str
"""
if namespace is None:
namespace = []
return await self._inner.table_names(
namespace=namespace, start_after=start_after, limit=limit
)
@@ -1034,7 +998,7 @@ class AsyncConnection(object):
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
location: Optional[str] = None,
) -> AsyncTable:
@@ -1081,11 +1045,7 @@ class AsyncConnection(object):
Additional options for the storage backend. Options already set on the
connection will be inherited by the table, but can be overridden here.
See available options at
<https://lancedb.com/docs/storage/>
To enable stable row IDs (row IDs remain stable after compaction,
update, delete, and merges), set `new_table_enable_stable_row_ids`
to `"true"` in storage_options when connecting to the database.
<https://lancedb.github.io/lancedb/guides/storage/>
Returns
-------
@@ -1195,8 +1155,6 @@ class AsyncConnection(object):
... await db.create_table("table4", make_batches(), schema=schema)
>>> asyncio.run(iterable_example())
"""
if namespace is None:
namespace = []
metadata = None
if embedding_functions is not None:
@@ -1254,7 +1212,7 @@ class AsyncConnection(object):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
@@ -1273,7 +1231,7 @@ class AsyncConnection(object):
Additional options for the storage backend. Options already set on the
connection will be inherited by the table, but can be overridden here.
See available options at
<https://lancedb.com/docs/storage/>
<https://lancedb.github.io/lancedb/guides/storage/>
index_cache_size: int, default 256
**Deprecated**: Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to lancedb.connect().
@@ -1296,8 +1254,6 @@ class AsyncConnection(object):
-------
A LanceTable object representing the table.
"""
if namespace is None:
namespace = []
table = await self._inner.open_table(
name,
namespace=namespace,
@@ -1313,7 +1269,7 @@ class AsyncConnection(object):
target_table_name: str,
source_uri: str,
*,
target_namespace: Optional[List[str]] = None,
target_namespace: List[str] = [],
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -1346,8 +1302,6 @@ class AsyncConnection(object):
-------
An AsyncTable object representing the cloned table.
"""
if target_namespace is None:
target_namespace = []
table = await self._inner.clone_table(
target_table_name,
source_uri,
@@ -1362,8 +1316,8 @@ class AsyncConnection(object):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename a table in the database.
@@ -1380,10 +1334,6 @@ class AsyncConnection(object):
The namespace to move the table to.
If not specified, defaults to the same as cur_namespace.
"""
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
await self._inner.rename_table(
cur_name, new_name, cur_namespace=cur_namespace, new_namespace=new_namespace
)
@@ -1392,7 +1342,7 @@ class AsyncConnection(object):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
ignore_missing: bool = False,
):
"""Drop a table from the database.
@@ -1407,8 +1357,6 @@ class AsyncConnection(object):
ignore_missing: bool, default False
If True, ignore if the table does not exist.
"""
if namespace is None:
namespace = []
try:
await self._inner.drop_table(name, namespace=namespace)
except ValueError as e:
@@ -1417,7 +1365,7 @@ class AsyncConnection(object):
if f"Table '{name}' was not found" not in str(e):
raise e
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
async def drop_all_tables(self, namespace: List[str] = []):
"""Drop all tables from the database.
Parameters
@@ -1426,8 +1374,6 @@ class AsyncConnection(object):
The namespace to drop all tables from.
None or empty list represents root namespace.
"""
if namespace is None:
namespace = []
await self._inner.drop_all_tables(namespace=namespace)
@deprecation.deprecated(

View File

@@ -609,19 +609,9 @@ class IvfPq:
class IvfRq:
"""Describes an IVF RQ Index
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
and organizes them into IVF partitions.
The compression scheme is called RabitQ quantization. Each dimension is
quantized into a small number of bits. The parameters `num_bits` and
`num_partitions` control this process, providing a tradeoff between
index size (and thus search speed) and index accuracy.
The partitioning process is called IVF and the `num_partitions` parameter
controls how many groups to create.
Note that training an IVF RQ index on a large dataset is a slow operation
and currently is also a memory intensive operation.
IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
residual quantization and organizes them into IVF partitions. Parameters
largely mirror IVF-PQ for consistency.
Attributes
----------
@@ -638,7 +628,7 @@ class IvfRq:
Number of IVF partitions to create.
num_bits: int, default 1
Number of bits to encode each dimension in the RabitQ codebook.
Number of bits to encode each dimension.
max_iterations: int, default 50
Max iterations to train kmeans when computing IVF partitions.

View File

@@ -127,17 +127,13 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider):
Examples
--------
Create a provider and fetch storage options::
from lance_namespace import connect as namespace_connect
# Connect to namespace (requires a running namespace server)
namespace = namespace_connect("rest", {"uri": "https://..."})
provider = LanceNamespaceStorageOptionsProvider(
namespace=namespace,
table_id=["my_namespace", "my_table"]
)
options = provider.fetch_storage_options()
>>> from lance_namespace import connect as namespace_connect
>>> namespace = namespace_connect("rest", {"url": "https://..."})
>>> provider = LanceNamespaceStorageOptionsProvider(
... namespace=namespace,
... table_id=["my_namespace", "my_table"]
... )
>>> options = provider.fetch_storage_options()
"""
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
@@ -239,10 +235,8 @@ class LanceNamespaceDBConnection(DBConnection):
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Iterable[str]:
if namespace is None:
namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
return response.tables if response.tables else []
@@ -259,14 +253,12 @@ class LanceNamespaceDBConnection(DBConnection):
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> Table:
if namespace is None:
namespace = []
if mode.lower() not in ["create", "overwrite"]:
raise ValueError("mode must be either 'create' or 'overwrite'")
validate_table_name(name)
@@ -355,13 +347,11 @@ class LanceNamespaceDBConnection(DBConnection):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> Table:
if namespace is None:
namespace = []
table_id = namespace + [name]
request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request)
@@ -391,10 +381,8 @@ class LanceNamespaceDBConnection(DBConnection):
)
@override
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: List[str] = []):
# Use namespace drop_table directly
if namespace is None:
namespace = []
table_id = namespace + [name]
request = DropTableRequest(id=table_id)
self._ns.drop_table(request)
@@ -404,13 +392,9 @@ class LanceNamespaceDBConnection(DBConnection):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
raise NotImplementedError(
"rename_table is not supported for namespace connections"
)
@@ -422,16 +406,14 @@ class LanceNamespaceDBConnection(DBConnection):
)
@override
def drop_all_tables(self, namespace: Optional[List[str]] = None):
if namespace is None:
namespace = []
def drop_all_tables(self, namespace: List[str] = []):
for table_name in self.table_names(namespace=namespace):
self.drop_table(table_name, namespace=namespace)
@override
def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -453,8 +435,6 @@ class LanceNamespaceDBConnection(DBConnection):
Iterable[str]
Names of child namespaces.
"""
if namespace is None:
namespace = []
request = ListNamespacesRequest(
id=namespace, page_token=page_token, limit=limit
)
@@ -492,15 +472,13 @@ class LanceNamespaceDBConnection(DBConnection):
name: str,
table_uri: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> LanceTable:
# Open a table directly from a URI using the location parameter
# Note: storage_options should already be merged by the caller
if namespace is None:
namespace = []
temp_conn = LanceDBConnection(
table_uri, # Use the table location as the connection URI
read_consistency_interval=self.read_consistency_interval,
@@ -561,11 +539,9 @@ class AsyncLanceNamespaceDBConnection:
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Iterable[str]:
"""List table names in the namespace."""
if namespace is None:
namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
return response.tables if response.tables else []
@@ -581,15 +557,13 @@ class AsyncLanceNamespaceDBConnection:
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> AsyncTable:
"""Create a new table in the namespace."""
if namespace is None:
namespace = []
if mode.lower() not in ["create", "overwrite"]:
raise ValueError("mode must be either 'create' or 'overwrite'")
validate_table_name(name)
@@ -681,14 +655,12 @@ class AsyncLanceNamespaceDBConnection:
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> AsyncTable:
"""Open an existing table from the namespace."""
if namespace is None:
namespace = []
table_id = namespace + [name]
request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request)
@@ -729,10 +701,8 @@ class AsyncLanceNamespaceDBConnection:
lance_table = await asyncio.to_thread(_open_table)
return lance_table._table
async def drop_table(self, name: str, namespace: Optional[List[str]] = None):
async def drop_table(self, name: str, namespace: List[str] = []):
"""Drop a table from the namespace."""
if namespace is None:
namespace = []
table_id = namespace + [name]
request = DropTableRequest(id=table_id)
self._ns.drop_table(request)
@@ -741,14 +711,10 @@ class AsyncLanceNamespaceDBConnection:
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename is not supported for namespace connections."""
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
raise NotImplementedError(
"rename_table is not supported for namespace connections"
)
@@ -759,17 +725,15 @@ class AsyncLanceNamespaceDBConnection:
"drop_database is deprecated, use drop_all_tables instead"
)
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
async def drop_all_tables(self, namespace: List[str] = []):
"""Drop all tables in the namespace."""
if namespace is None:
namespace = []
table_names = await self.table_names(namespace=namespace)
for table_name in table_names:
await self.drop_table(table_name, namespace=namespace)
async def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -791,8 +755,6 @@ class AsyncLanceNamespaceDBConnection:
Iterable[str]
Names of child namespaces.
"""
if namespace is None:
namespace = []
request = ListNamespacesRequest(
id=namespace, page_token=page_token, limit=limit
)

View File

@@ -883,7 +883,7 @@ class LanceQueryBuilder(ABC):
----------
where: str
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
@@ -1356,7 +1356,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
----------
where: str
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
@@ -1495,7 +1495,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
if self._phrase_query:
if isinstance(query, str):
if not query.startswith('"') or not query.endswith('"'):
self._query = f'"{query}"'
query = f'"{query}"'
elif isinstance(query, FullTextQuery) and not isinstance(
query, PhraseQuery
):
@@ -2429,8 +2429,9 @@ class AsyncQueryBase(object):
>>> from lancedb import connect_async
>>> async def doctest_example():
... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
@@ -2439,7 +2440,6 @@ class AsyncQueryBase(object):
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
<BLANKLINE>
Parameters
----------
@@ -3141,9 +3141,10 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
>>> from lancedb.index import FTS
>>> async def doctest_example():
... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0], "text": "hello world"}])
... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
... await table.create_index("text", config=FTS(with_position=False))
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Vector Search Plan:
@@ -3417,8 +3418,9 @@ class BaseQueryBuilder(object):
>>> from lancedb import connect_async
>>> async def doctest_example():
... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
@@ -3427,7 +3429,6 @@ class BaseQueryBuilder(object):
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
<BLANKLINE>
Parameters
----------

View File

@@ -104,7 +104,7 @@ class RemoteDBConnection(DBConnection):
@override
def list_namespaces(
self,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
@@ -125,8 +125,6 @@ class RemoteDBConnection(DBConnection):
Iterable of str
List of immediate child namespace names
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_namespaces(
namespace=namespace, page_token=page_token, limit=limit
@@ -161,7 +159,7 @@ class RemoteDBConnection(DBConnection):
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Iterable[str]:
"""List the names of all tables in the database.
@@ -179,8 +177,6 @@ class RemoteDBConnection(DBConnection):
-------
An iterator of table names.
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.table_names(
namespace=namespace, start_after=page_token, limit=limit
@@ -192,7 +188,7 @@ class RemoteDBConnection(DBConnection):
self,
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None,
) -> Table:
@@ -212,8 +208,6 @@ class RemoteDBConnection(DBConnection):
"""
from .table import RemoteTable
if namespace is None:
namespace = []
if index_cache_size is not None:
logging.info(
"index_cache_size is ignored in LanceDb Cloud"
@@ -228,7 +222,7 @@ class RemoteDBConnection(DBConnection):
target_table_name: str,
source_uri: str,
*,
target_namespace: Optional[List[str]] = None,
target_namespace: List[str] = [],
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -258,8 +252,6 @@ class RemoteDBConnection(DBConnection):
"""
from .table import RemoteTable
if target_namespace is None:
target_namespace = []
table = LOOP.run(
self._conn.clone_table(
target_table_name,
@@ -283,7 +275,7 @@ class RemoteDBConnection(DBConnection):
mode: Optional[str] = None,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
) -> Table:
"""Create a [Table][lancedb.table.Table] in the database.
@@ -380,8 +372,6 @@ class RemoteDBConnection(DBConnection):
LanceTable(table4)
"""
if namespace is None:
namespace = []
validate_table_name(name)
if embedding_functions is not None:
logging.warning(
@@ -406,7 +396,7 @@ class RemoteDBConnection(DBConnection):
return RemoteTable(table, self.db_name)
@override
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: List[str] = []):
"""Drop a table from the database.
Parameters
@@ -417,8 +407,6 @@ class RemoteDBConnection(DBConnection):
The namespace to drop the table from.
None or empty list represents root namespace.
"""
if namespace is None:
namespace = []
LOOP.run(self._conn.drop_table(name, namespace=namespace))
@override
@@ -426,8 +414,8 @@ class RemoteDBConnection(DBConnection):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename a table in the database.
@@ -438,10 +426,6 @@ class RemoteDBConnection(DBConnection):
new_name: str
The new name of the table.
"""
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
LOOP.run(
self._conn.rename_table(
cur_name,

View File

@@ -652,17 +652,6 @@ class RemoteTable(Table):
"migrate_v2_manifest_paths() is not supported on the LanceDB Cloud"
)
def head(self, n=5) -> pa.Table:
"""
Return the first `n` rows of the table.
Parameters
----------
n: int, default 5
The number of rows to return.
"""
return LOOP.run(self._table.query().limit(n).to_arrow())
def add_index(tbl: pa.Table, i: int) -> pa.Table:
return tbl.add_column(

View File

@@ -178,7 +178,7 @@ def _into_pyarrow_reader(
f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
"pyarrow Table/RecordBatch, or Pydantic models. "
"See https://lancedb.com/docs/tables/ for examples."
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
)
@@ -1018,7 +1018,7 @@ class Table(ABC):
... .when_not_matched_insert_all() \\
... .execute(new_data)
>>> res
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
>>> # The order of new rows is non-deterministic since we use
>>> # a hash-join as part of this operation and so we sort here
>>> table.to_arrow().sort_by("a").to_pandas()
@@ -1708,15 +1708,13 @@ class LanceTable(Table):
connection: "LanceDBConnection",
name: str,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
_async: AsyncTable = None,
):
if namespace is None:
namespace = []
self._conn = connection
self._namespace = namespace
self._location = location # Store location for use in _dataset_path
@@ -1768,14 +1766,12 @@ class LanceTable(Table):
db,
name,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
):
if namespace is None:
namespace = []
tbl = cls(
db,
name,
@@ -2627,7 +2623,7 @@ class LanceTable(Table):
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: Optional[List[str]] = None,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str | bool]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
@@ -2687,8 +2683,6 @@ class LanceTable(Table):
Deprecated. Set `storage_options` when connecting to the database and set
`new_table_enable_v2_manifest_paths` in the options.
"""
if namespace is None:
namespace = []
self = cls.__new__(cls)
self._conn = db
self._namespace = namespace
@@ -3634,7 +3628,7 @@ class AsyncTable:
... .when_not_matched_insert_all() \\
... .execute(new_data)
>>> res
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
>>> # The order of new rows is non-deterministic since we use
>>> # a hash-join as part of this operation and so we sort here
>>> table.to_arrow().sort_by("a").to_pandas()

View File

@@ -441,150 +441,6 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
assert re.match(r"\d{20}\.manifest", manifest)
@pytest.mark.asyncio
async def test_create_table_stable_row_ids_via_storage_options(tmp_path):
"""Test stable_row_ids via storage_options at connect time."""
import lance
# Connect with stable row IDs enabled as default for new tables
db_with = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Connect without stable row IDs (default)
db_without = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
)
# Create table using connection with stable row IDs enabled
await db_with.create_table(
"with_stable_via_opts",
data=[{"id": i} for i in range(10)],
)
lance_ds_with = lance.dataset(tmp_path / "with_stable_via_opts.lance")
fragments_with = lance_ds_with.get_fragments()
assert len(fragments_with) > 0
assert fragments_with[0].metadata.row_id_meta is not None
# Create table using connection without stable row IDs
await db_without.create_table(
"without_stable_via_opts",
data=[{"id": i} for i in range(10)],
)
lance_ds_without = lance.dataset(tmp_path / "without_stable_via_opts.lance")
fragments_without = lance_ds_without.get_fragments()
assert len(fragments_without) > 0
assert fragments_without[0].metadata.row_id_meta is None
def test_create_table_stable_row_ids_via_storage_options_sync(tmp_path):
"""Test that enable_stable_row_ids can be set via storage_options (sync API)."""
# Connect with stable row IDs enabled as default for new tables
db_with = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Connect without stable row IDs (default)
db_without = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
)
# Create table using connection with stable row IDs enabled
tbl_with = db_with.create_table(
"with_stable_sync",
data=[{"id": i} for i in range(10)],
)
lance_ds_with = tbl_with.to_lance()
fragments_with = lance_ds_with.get_fragments()
assert len(fragments_with) > 0
assert fragments_with[0].metadata.row_id_meta is not None
# Create table using connection without stable row IDs
tbl_without = db_without.create_table(
"without_stable_sync",
data=[{"id": i} for i in range(10)],
)
lance_ds_without = tbl_without.to_lance()
fragments_without = lance_ds_without.get_fragments()
assert len(fragments_without) > 0
assert fragments_without[0].metadata.row_id_meta is None
@pytest.mark.asyncio
async def test_create_table_stable_row_ids_table_level_override(tmp_path):
"""Test that stable_row_ids can be enabled/disabled at create_table level."""
import lance
# Connect without any stable row ID setting
db_default = await lancedb.connect_async(tmp_path)
# Connect with stable row IDs enabled at connection level
db_with_stable = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Case 1: No connection setting, enable at table level
await db_default.create_table(
"table_level_enabled",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "true"},
)
lance_ds = lance.dataset(tmp_path / "table_level_enabled.lance")
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is not None, (
"Table should have stable row IDs when enabled at table level"
)
# Case 2: Connection has stable row IDs, override with false at table level
await db_with_stable.create_table(
"table_level_disabled",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "false"},
)
lance_ds = lance.dataset(tmp_path / "table_level_disabled.lance")
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is None, (
"Table should NOT have stable row IDs when disabled at table level"
)
def test_create_table_stable_row_ids_table_level_override_sync(tmp_path):
"""Test that stable_row_ids can be enabled/disabled at create_table level (sync)."""
# Connect without any stable row ID setting
db_default = lancedb.connect(tmp_path)
# Connect with stable row IDs enabled at connection level
db_with_stable = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Case 1: No connection setting, enable at table level
tbl = db_default.create_table(
"table_level_enabled_sync",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "true"},
)
lance_ds = tbl.to_lance()
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is not None, (
"Table should have stable row IDs when enabled at table level"
)
# Case 2: Connection has stable row IDs, override with false at table level
tbl = db_with_stable.create_table(
"table_level_disabled_sync",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "false"},
)
lance_ds = tbl.to_lance()
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is None, (
"Table should NOT have stable row IDs when disabled at table level"
)
def test_open_table_sync(tmp_db: lancedb.DBConnection):
tmp_db.create_table("test", data=[{"id": 0}])
assert tmp_db.open_table("test").count_rows() == 1

View File

@@ -325,18 +325,11 @@ def test_search_fts_phrase_query(table):
pass
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
results = table.search("puppy").limit(100).to_list()
# Test with quotation marks
phrase_results = table.search('"puppy runs"').limit(100).to_list()
assert len(results) > len(phrase_results)
assert len(phrase_results) > 0
# Test with .phrase_query()
phrase_results = table.search("puppy runs").phrase_query().limit(100).to_list()
assert len(results) > len(phrase_results)
assert len(phrase_results) > 0
# Test with PhraseQuery()
# Test with a query
phrase_results = (
table.search(PhraseQuery("puppy runs", "text")).limit(100).to_list()
)

View File

@@ -546,22 +546,6 @@ def query_test_table(query_handler, *, server_version=Version("0.1.0")):
yield table
def test_head():
def handler(body):
assert body == {
"k": 5,
"prefilter": True,
"vector": [],
"version": None,
}
return pa.table({"id": [1, 2, 3]})
with query_test_table(handler) as table:
data = table.head(5)
assert data == pa.table({"id": [1, 2, 3]})
def test_query_sync_minimal():
def handler(body):
assert body == {

View File

@@ -1487,7 +1487,7 @@ def setup_hybrid_search_table(db: DBConnection, embedding_func):
table.add([{"text": p} for p in phrases])
# Create a fts index
table.create_fts_index("text", with_position=True)
table.create_fts_index("text")
return table, MyTable, emb

View File

@@ -690,7 +690,7 @@ impl FTSQuery {
}
pub fn get_query(&self) -> String {
self.fts_query.query.query().clone()
self.fts_query.query.query().to_owned()
}
pub fn to_query_request(&self) -> PyQueryRequest {

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.4-beta.3"
version = "0.22.4-beta.2"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -105,12 +105,12 @@ test-log = "0.2"
[features]
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
aws = ["lance/aws", "lance-io/aws"]
oss = ["lance/oss", "lance-io/oss"]
gcs = ["lance/gcp", "lance-io/gcp"]
azure = ["lance/azure", "lance-io/azure"]
dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
remote = ["dep:reqwest", "dep:http"]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
bedrock = ["dep:aws-sdk-bedrockruntime"]

View File

@@ -239,7 +239,7 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// Options already set on the connection will be inherited by the table,
/// but can be overridden here.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_options = self
.request
@@ -259,7 +259,7 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// Options already set on the connection will be inherited by the table,
/// but can be overridden here.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_options(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
@@ -442,7 +442,7 @@ impl OpenTableBuilder {
/// Options already set on the connection will be inherited by the table,
/// but can be overridden here.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let storage_options = self
.request
@@ -461,7 +461,7 @@ impl OpenTableBuilder {
/// Options already set on the connection will be inherited by the table,
/// but can be overridden here.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_options(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
@@ -959,7 +959,7 @@ impl ConnectBuilder {
/// Set an option for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.request.options.insert(key.into(), value.into());
self
@@ -967,7 +967,7 @@ impl ConnectBuilder {
/// Set multiple options for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_options(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
@@ -1102,7 +1102,7 @@ impl ConnectNamespaceBuilder {
/// Set an option for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.storage_options.insert(key.into(), value.into());
self
@@ -1110,7 +1110,7 @@ impl ConnectNamespaceBuilder {
/// Set multiple options for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_options(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,

View File

@@ -35,7 +35,6 @@ pub const LANCE_FILE_EXTENSION: &str = "lance";
pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version";
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids";
/// Controls how new tables should be created
#[derive(Clone, Debug, Default)]
@@ -49,12 +48,6 @@ pub struct NewTableConfig {
/// V2 manifest paths are more efficient than V2 manifest paths but are not
/// supported by old clients.
pub enable_v2_manifest_paths: Option<bool>,
/// Whether to enable stable row IDs for new tables
///
/// When enabled, row IDs remain stable after compaction, update, delete,
/// and merges. This is useful for materialized views and other use cases
/// that need to track source rows across these operations.
pub enable_stable_row_ids: Option<bool>,
}
/// Options specific to the listing database
@@ -67,7 +60,7 @@ pub struct ListingDatabaseOptions {
/// These are used to create/list tables and they are inherited by all tables
/// opened by this database.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub storage_options: HashMap<String, String>,
}
@@ -94,14 +87,6 @@ impl ListingDatabaseOptions {
})
})
.transpose()?,
enable_stable_row_ids: map
.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)
.map(|s| {
s.parse::<bool>().map_err(|_| Error::InvalidInput {
message: format!("enable_stable_row_ids must be a boolean, received {}", s),
})
})
.transpose()?,
};
// We just assume that any options that are not new table config options are storage options
let storage_options = map
@@ -109,7 +94,6 @@ impl ListingDatabaseOptions {
.filter(|(key, _)| {
key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION
&& key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS
&& key.as_str() != OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS
})
.map(|(key, value)| (key.clone(), value.clone()))
.collect();
@@ -134,12 +118,6 @@ impl DatabaseOptions for ListingDatabaseOptions {
enable_v2_manifest_paths.to_string(),
);
}
if let Some(enable_stable_row_ids) = self.new_table_config.enable_stable_row_ids {
map.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
enable_stable_row_ids.to_string(),
);
}
}
}
@@ -179,7 +157,7 @@ impl ListingDatabaseOptionsBuilder {
/// Set an option for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.options
.storage_options
@@ -189,7 +167,7 @@ impl ListingDatabaseOptionsBuilder {
/// Set multiple options for the storage layer.
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
pub fn storage_options(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
@@ -497,7 +475,7 @@ impl ListingDatabase {
// this error is not lance::Error::DatasetNotFound, as the method
// `remove_dir_all` may be used to remove something not be a dataset
lance::Error::NotFound { .. } => Error::TableNotFound {
name: name.clone(),
name: name.to_owned(),
source: Box::new(err),
},
_ => Error::from(err),
@@ -519,7 +497,7 @@ impl ListingDatabase {
fn extract_storage_overrides(
&self,
request: &CreateTableRequest,
) -> Result<(Option<LanceFileVersion>, Option<bool>, Option<bool>)> {
) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
let storage_options = request
.write_options
.lance_write_params
@@ -540,19 +518,7 @@ impl ListingDatabase {
message: "enable_v2_manifest_paths must be a boolean".to_string(),
})?;
let stable_row_ids_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
.map(|s| s.parse::<bool>())
.transpose()
.map_err(|_| Error::InvalidInput {
message: "enable_stable_row_ids must be a boolean".to_string(),
})?;
Ok((
storage_version_override,
v2_manifest_override,
stable_row_ids_override,
))
Ok((storage_version_override, v2_manifest_override))
}
/// Prepare write parameters for table creation
@@ -561,7 +527,6 @@ impl ListingDatabase {
request: &CreateTableRequest,
storage_version_override: Option<LanceFileVersion>,
v2_manifest_override: Option<bool>,
stable_row_ids_override: Option<bool>,
) -> lance::dataset::WriteParams {
let mut write_params = request
.write_options
@@ -606,13 +571,6 @@ impl ListingDatabase {
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
}
// Apply enable_stable_row_ids: table-level override takes precedence over connection config
if let Some(enable_stable_row_ids) =
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
{
write_params.enable_stable_row_ids = enable_stable_row_ids;
}
if matches!(&request.mode, CreateTableMode::Overwrite) {
write_params.mode = WriteMode::Overwrite;
}
@@ -748,15 +706,11 @@ impl Database for ListingDatabase {
.clone()
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
let (storage_version_override, v2_manifest_override, stable_row_ids_override) =
let (storage_version_override, v2_manifest_override) =
self.extract_storage_overrides(&request)?;
let write_params = self.prepare_write_params(
&request,
storage_version_override,
v2_manifest_override,
stable_row_ids_override,
);
let write_params =
self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
let data_schema = request.data.arrow_schema();
@@ -967,7 +921,7 @@ impl Database for ListingDatabase {
mod tests {
use super::*;
use crate::connection::ConnectRequest;
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest, WriteOptions};
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest};
use crate::table::{Table, TableDefinition};
use arrow_array::{Int32Array, RecordBatch, StringArray};
use arrow_schema::{DataType, Field, Schema};
@@ -1667,267 +1621,4 @@ mod tests {
// Cloned table should have all 8 rows from the latest version
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
}
#[tokio::test]
async fn test_create_table_with_stable_row_ids_connection_level() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Create database with stable row IDs enabled at connection level
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let db = ListingDatabase::connect_with_options(&request)
.await
.unwrap();
// Verify the config was parsed correctly
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
// Create a table - it should inherit the stable row IDs setting
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let table = db
.create_table(CreateTableRequest {
name: "test_stable".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_create_table_with_stable_row_ids_table_level() {
let (_tempdir, db) = setup_database().await;
// Verify connection has no stable row IDs config
assert_eq!(db.new_table_config.enable_stable_row_ids, None);
// Create a table with stable row IDs enabled at table level via storage_options
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let mut storage_options = HashMap::new();
storage_options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
}),
};
let table = db
.create_table(CreateTableRequest {
name: "test_stable_table_level".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options,
location: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_create_table_stable_row_ids_table_overrides_connection() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Create database with stable row IDs enabled at connection level
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let db = ListingDatabase::connect_with_options(&request)
.await
.unwrap();
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
// Create table with stable row IDs disabled at table level (overrides connection)
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let mut storage_options = HashMap::new();
storage_options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"false".to_string(),
);
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
}),
};
let table = db
.create_table(CreateTableRequest {
name: "test_override".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options,
location: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_stable_row_ids_invalid_value() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Try to create database with invalid stable row IDs value
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"not_a_boolean".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let result = ListingDatabase::connect_with_options(&request).await;
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
Error::InvalidInput { message } if message.contains("enable_stable_row_ids must be a boolean")
));
}
#[test]
fn test_stable_row_ids_config_serialization() {
// Test that ListingDatabaseOptions correctly serializes stable_row_ids
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
// Parse the options
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(
db_options.new_table_config.enable_stable_row_ids,
Some(true)
);
// Serialize back to map
let mut serialized = HashMap::new();
db_options.serialize_into_map(&mut serialized);
assert_eq!(
serialized.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS),
Some(&"true".to_string())
);
}
#[test]
fn test_stable_row_ids_config_parse_false() {
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"false".to_string(),
);
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(
db_options.new_table_config.enable_stable_row_ids,
Some(false)
);
}
#[test]
fn test_stable_row_ids_config_not_set() {
let options = HashMap::new();
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
}
}

View File

@@ -71,7 +71,7 @@
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
//! columns as vector columns.
//!
//! For more details, please refer to the [LanceDB documentation](https://lancedb.com/docs).
//! For more details, please refer to [LanceDB documentation](https://lancedb.github.io/lancedb/).
//!
//! #### Create a table
//!

View File

@@ -90,7 +90,7 @@ pub struct RemoteDatabaseOptions {
pub host_override: Option<String>,
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
///
/// See available options at <https://lancedb.com/docs/storage/>
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
///
/// These options are only used for LanceDB Enterprise and only a subset of options
/// are supported.