Compare commits

..

9 Commits

Author SHA1 Message Date
Lance Release
d11819c90c Bump version: 0.22.0-beta.10 → 0.22.0-beta.11 2025-04-25 05:01:57 +00:00
BubbleCal
9b902272f1 fix: sync hybrid search ignores the distance range params (#2356)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added support for distance range filtering in hybrid vector queries,
allowing users to specify lower and upper bounds for search results.

- **Tests**
- Introduced new tests to validate distance range filtering and
reranking in both synchronous and asynchronous hybrid query scenarios.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-04-25 13:01:22 +08:00
Will Jones
8c0622fa2c fix: remote limit to avoid "Limit must be non-negative" (#2354)
To workaround this issue: https://github.com/lancedb/lancedb/issues/2211

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Bug Fixes**
- Improved handling of large query parameters to prevent potential
overflow issues when using the "k" parameter in queries.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-04-24 15:04:06 -07:00
Philip Meier
2191f948c3 fix: add missing pydantic model config compat (#2316)
Fixes #2315.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Refactor**
- Enhanced query processing to maintain smooth functionality across
different dependency versions, ensuring improved stability and
performance.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-04-22 14:46:10 -07:00
Will Jones
acc3b03004 ci: fix docs deploy (#2351)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Improved CI workflow for documentation builds by optimizing Rust build
settings and updating the runner environment.
  - Fixed a typo in a workflow step name.
- Streamlined caching steps to reduce redundancy and improve efficiency.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-04-22 13:55:34 -07:00
Lance Release
7f091b8c8e Updating package-lock.json 2025-04-22 19:16:43 +00:00
Lance Release
c19bdd9a24 Updating package-lock.json 2025-04-22 18:24:16 +00:00
Lance Release
dad0ff5cd2 Updating package-lock.json 2025-04-22 18:23:59 +00:00
Lance Release
a705621067 Bump version: 0.19.0-beta.9 → 0.19.0-beta.10 2025-04-22 18:23:39 +00:00
26 changed files with 141 additions and 61 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.19.0-beta.9"
current_version = "0.19.0-beta.10"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -18,17 +18,24 @@ concurrency:
group: "pages"
cancel-in-progress: true
env:
# This reduces the disk space needed for the build
RUSTFLAGS: "-C debuginfo=0"
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
jobs:
# Single deploy job since we're just deploying
build:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: buildjet-8vcpu-ubuntu-2204
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install dependecies needed for ubuntu
- name: Install dependencies needed for ubuntu
run: |
sudo apt install -y protobuf-compiler libssl-dev
rustup update && rustup default
@@ -38,6 +45,7 @@ jobs:
python-version: "3.10"
cache: "pip"
cache-dependency-path: "docs/requirements.txt"
- uses: Swatinem/rust-cache@v2
- name: Build Python
working-directory: python
run: |
@@ -49,7 +57,6 @@ jobs:
node-version: 20
cache: 'npm'
cache-dependency-path: node/package-lock.json
- uses: Swatinem/rust-cache@v2
- name: Install node dependencies
working-directory: node
run: |

8
Cargo.lock generated
View File

@@ -4136,7 +4136,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
dependencies = [
"arrow",
"arrow-array",
@@ -4223,7 +4223,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4248,7 +4248,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4266,7 +4266,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.22.0-beta.9"
version = "0.22.0-beta.10"
dependencies = [
"arrow",
"env_logger",

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.19.0-beta.9</version>
<version>0.19.0-beta.10</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.19.0-beta.9</version>
<version>0.19.0-beta.10</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.9",
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.9",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.9",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.9",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.9"
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.10",
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.10",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.10",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.10",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.10"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.19.0-beta.9",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.0-beta.9.tgz",
"integrity": "sha512-Sc4FFWT7ck7U2BKXnFl5EdPsb2ay8yOX3AqpIYbhenhpAkzLBS8NPUztuhybFoPeLSWKVUtwp9NGAxNyL9lgrQ==",
"version": "0.19.0-beta.10",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.0-beta.10.tgz",
"integrity": "sha512-4PvsrE+hJ+AqFY33yHIEVET2ayv0pzpiWqCnMQ5OdpakQZvfykmp9ykc5KI80VuWAlniJDYuW+fju3z8/wiUHQ==",
"cpu": [
"arm64"
],
@@ -340,9 +340,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.19.0-beta.9",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.0-beta.9.tgz",
"integrity": "sha512-KiYzFFX7Hvq/OvsA7VZt0xfGRW4eoDcLwgWdXxgsNzB6X6d3eupUqkrTKe3KlHrnm170s5fwSFrJHIU/NPoA/A==",
"version": "0.19.0-beta.10",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.0-beta.10.tgz",
"integrity": "sha512-YHXHkD4mmIry+KMoTX7Qts5Ea9fG0DklywIiF8TS7h/9XbXLG74lf+GUy2Eh/s1wKLd4LtRh2SbHpOtZoOH4lA==",
"cpu": [
"x64"
],
@@ -353,9 +353,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.19.0-beta.9",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.0-beta.9.tgz",
"integrity": "sha512-TCK/PEqvoVa/oxYHWsjpB+BaQ/KDfwOI76grbezAEDGYS76USAnNb8KB8bDnnadOu9/i4Zsha0Bk0fsHfDSSpg==",
"version": "0.19.0-beta.10",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.0-beta.10.tgz",
"integrity": "sha512-c019rw30N25WIXnkhAwJ4QkpUcUJqbkGay3RiR3vTmGQ5YOZWw5V5g/v2y7APcv+ZlZfJ4YgDjFH8wqtiECNJQ==",
"cpu": [
"arm64"
],
@@ -366,9 +366,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.19.0-beta.9",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.0-beta.9.tgz",
"integrity": "sha512-Ti7Z2Am5ziYZmQBpLfVwGWe+zl11xed084Gv6WFcYfyLz6FJ/0+CEMVx1wIsoZ1gq6tvH0vgZLYxsMF5C5xVnw==",
"version": "0.19.0-beta.10",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.0-beta.10.tgz",
"integrity": "sha512-xnbC6rqpuJDv2q6xNBKrrocNOTcM4z6+8Zi7wP+Sb+WXvLzkR7hm7ZS0gyeExRknEEd91imhL/ZuAxEq1892YA==",
"cpu": [
"x64"
],
@@ -379,9 +379,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.19.0-beta.9",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.0-beta.9.tgz",
"integrity": "sha512-MUSECQGbpRi3qbG6CsLJAInT0ZIMpr/RBhEx5CUlwZiEIWaUc/CQMoNJh7W+1s5Cd3UMBgkMpgmfj21Qd9fsHw==",
"version": "0.19.0-beta.10",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.0-beta.10.tgz",
"integrity": "sha512-bUDKvY4tmMEeBpzPfRu2lVo+07nRGzUoUm60WzfvRpa/Y6rwjcCCRuuTOvfTcrnbGYN/kw5yoUu8ZDsZ7mT77Q==",
"cpu": [
"x64"
],

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -89,10 +89,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.9",
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.9",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.9",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.9",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.9"
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.10",
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.10",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.10",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.10",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.10"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.19.0-beta.9",
"version": "0.19.0-beta.10",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.0-beta.10"
current_version = "0.22.0-beta.11"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.22.0-beta.10"
version = "0.22.0-beta.11"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -28,6 +28,8 @@ import pyarrow.compute as pc
import pyarrow.fs as pa_fs
import pydantic
from lancedb.pydantic import PYDANTIC_VERSION
from . import __version__
from .arrow import AsyncRecordBatchReader
from .dependencies import pandas as pd
@@ -498,10 +500,14 @@ class Query(pydantic.BaseModel):
)
return query
class Config:
# This tells pydantic to allow custom types (needed for the `vector` query since
# pa.Array wouln't be allowed otherwise)
arbitrary_types_allowed = True
# This tells pydantic to allow custom types (needed for the `vector` query since
# pa.Array wouln't be allowed otherwise)
if PYDANTIC_VERSION.major < 2: # Pydantic 1.x compat
class Config:
arbitrary_types_allowed = True
else:
model_config = {"arbitrary_types_allowed": True}
class LanceQueryBuilder(ABC):
@@ -1586,6 +1592,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
self._refine_factor = None
self._distance_type = None
self._phrase_query = None
self._lower_bound = None
self._upper_bound = None
def _validate_query(self, query, vector=None, text=None):
if query is not None and (vector is not None or text is not None):
@@ -1665,6 +1673,10 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
self._vector_query.ef(self._ef)
if self._bypass_vector_index:
self._vector_query.bypass_vector_index()
if self._lower_bound or self._upper_bound:
self._vector_query.distance_range(
lower_bound=self._lower_bound, upper_bound=self._upper_bound
)
if self._reranker is None:
self._reranker = RRFReranker()

View File

@@ -4,13 +4,32 @@
import lancedb
from lancedb.query import LanceHybridQueryBuilder
from lancedb.rerankers.rrf import RRFReranker
import pyarrow as pa
import pyarrow.compute as pc
import pytest
import pytest_asyncio
from lancedb.index import FTS
from lancedb.table import AsyncTable
from lancedb.table import AsyncTable, Table
@pytest.fixture
def sync_table(tmpdir_factory) -> Table:
tmp_path = str(tmpdir_factory.mktemp("data"))
db = lancedb.connect(tmp_path)
data = pa.table(
{
"text": pa.array(["a", "b", "cat", "dog"]),
"vector": pa.array(
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
type=pa.list_(pa.float32(), list_size=2),
),
}
)
table = db.create_table("test", data)
table.create_fts_index("text", with_position=False, use_tantivy=False)
return table
@pytest_asyncio.fixture
@@ -102,6 +121,42 @@ async def test_async_hybrid_query_default_limit(table: AsyncTable):
assert texts.count("a") == 1
def test_hybrid_query_distance_range(sync_table: Table):
reranker = RRFReranker(return_score="all")
result = (
sync_table.search(query_type="hybrid")
.vector([0.0, 0.4])
.text("cat and dog")
.distance_range(lower_bound=0.2, upper_bound=0.5)
.rerank(reranker)
.limit(2)
.to_arrow()
)
assert len(result) == 2
print(result)
for dist in result["_distance"]:
if dist.is_valid:
assert 0.2 <= dist.as_py() <= 0.5
@pytest.mark.asyncio
async def test_hybrid_query_distance_range_async(table: AsyncTable):
reranker = RRFReranker(return_score="all")
result = await (
table.query()
.nearest_to([0.0, 0.4])
.nearest_to_text("cat and dog")
.distance_range(lower_bound=0.2, upper_bound=0.5)
.rerank(reranker)
.limit(2)
.to_arrow()
)
assert len(result) == 2
for dist in result["_distance"]:
if dist.is_valid:
assert 0.2 <= dist.as_py() <= 0.5
@pytest.mark.asyncio
async def test_explain_plan(table: AsyncTable):
plan = await (

View File

@@ -652,6 +652,11 @@ impl HybridQuery {
self.inner_vec.bypass_vector_index();
}
#[pyo3(signature = (lower_bound=None, upper_bound=None))]
pub fn distance_range(&mut self, lower_bound: Option<f32>, upper_bound: Option<f32>) {
self.inner_vec.distance_range(lower_bound, upper_bound);
}
pub fn to_vector_query(&mut self) -> PyResult<VectorQuery> {
Ok(VectorQuery {
inner: self.inner_vec.inner.clone(),

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.19.0-beta.9"
version = "0.19.0-beta.10"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -224,7 +224,8 @@ impl<S: HttpSend> RemoteTable<S> {
}
// Server requires k.
let limit = params.limit.unwrap_or(usize::MAX);
// use isize::MAX as usize to avoid overflow: https://github.com/lancedb/lancedb/issues/2211
let limit = params.limit.unwrap_or(isize::MAX as usize);
body["k"] = serde_json::Value::Number(serde_json::Number::from(limit));
if let Some(filter) = &params.filter {
@@ -1616,7 +1617,7 @@ mod tests {
let body = request.body().unwrap().as_bytes().unwrap();
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
let expected_body = serde_json::json!({
"k": usize::MAX,
"k": isize::MAX as usize,
"prefilter": true,
"vector": [], // Empty vector means no vector query.
"version": null,