Compare commits

...

12 Commits

Author SHA1 Message Date
Lance Release
1f41101897 Bump version: 0.14.0 → 0.14.1-beta.0 2024-10-17 18:58:45 +00:00
Will Jones
99e4db0d6a feat(rust): allow add_embedding on create_empty_table (#1754)
Fixes https://github.com/lancedb/lancedb/issues/1750
2024-10-17 11:58:15 -07:00
Will Jones
46486d4d22 fix: list_indices can handle fts indexes (#1753)
Fixes #1752
2024-10-16 10:39:40 -07:00
Weston Pace
f43cb8bba1 feat: upgrade lance to 0.18.3 (#1748) 2024-10-16 00:48:31 -07:00
James Wu
38eb05f297 fix(python): remove dependency on retry package (#1749)
## user story

fixes https://github.com/lancedb/lancedb/issues/1480

https://github.com/invl/retry has not had an update in 8 years, one if
its sub-dependencies via requirements.txt
(https://github.com/pytest-dev/py) is no longer maintained and has a
high severity vulnerability (CVE-2022-42969).

retry is only used for a single function in the python codebase for a
deprecated helper function `with_embeddings`, which was created for an
older tutorial (https://github.com/lancedb/lancedb/pull/12) [but is now
deprecated](https://lancedb.github.io/lancedb/embeddings/legacy/).

## changes

i backported a limited range of functionality of the `@retry()`
decorator directly into lancedb so that we no longer have a dependency
to the `retry` package.

## tests

```
/Users/james/src/lancedb/python $ ruff check .
All checks passed!
/Users/james/src/lancedb/python $ pytest python/tests/test_embeddings.py
python/tests/test_embeddings.py .......s....                                                                                                                        [100%]
================================================================ 11 passed, 1 skipped, 2 warnings in 7.08s ================================================================
```
2024-10-15 15:13:57 -07:00
Ryan Green
679a70231e feat: allow fast_search on python remote table (#1747)
Add `fast_search` parameter to query builder and remote table to support
skipping flat search in remote search
2024-10-14 14:39:54 -06:00
Dominik Weckmüller
e7b56b7b2a docs: add permanent link chain icon to headings without impacting SEO (#1746)
I noted that there are no permanent links in the docs. Adapted the
current best solution from
https://github.com/squidfunk/mkdocs-material/discussions/3535. It adds a
GitHub-like chain icon to the left of each heading (right on mobile) and
does not impact SEO unlike the default solution with pilcrow char `¶`
that might show up on google search results.

<img alt="image"
src="https://user-images.githubusercontent.com/182589/153004627-6df3f8e9-c747-4f43-bd62-a8dabaa96c3f.gif">
2024-10-14 11:58:23 -07:00
Olzhas Alexandrov
5ccd0edec2 docs: clarify infrastructure requirements for S3 Express One Zone (#1745) 2024-10-11 14:06:28 -06:00
Will Jones
9c74c435e0 ci: update package lock (#1740) 2024-10-09 15:14:08 -06:00
Lance Release
6de53ce393 Updating package-lock.json 2024-10-09 18:54:29 +00:00
Lance Release
9f42fbba96 Bump version: 0.11.0-beta.2 → 0.11.0 2024-10-09 18:54:09 +00:00
Lance Release
d892f7a622 Bump version: 0.11.0-beta.1 → 0.11.0-beta.2 2024-10-09 18:54:04 +00:00
30 changed files with 227 additions and 100 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.11.0-beta.1"
current_version = "0.11.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -20,13 +20,15 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.18.2", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.18.2" }
lance-linalg = { "version" = "=0.18.2" }
lance-table = { "version" = "=0.18.2" }
lance-testing = { "version" = "=0.18.2" }
lance-datafusion = { "version" = "=0.18.2" }
lance-encoding = { "version" = "=0.18.2" }
lance = { "version" = "=0.18.3", "features" = [
"dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
# Note that this one does not include pyarrow
arrow = { version = "52.2", optional = false }
arrow-array = "52.2"

View File

@@ -90,6 +90,9 @@ markdown_extensions:
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- markdown.extensions.toc:
baselevel: 1
permalink: ""
nav:
- Home:

View File

@@ -498,7 +498,7 @@ This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` envir
#### S3 Express
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region.
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional infrastructure configuration for the compute service, such as EC2 or Lambda. Please refer to [Networking requirements for S3 Express One Zone](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html).
To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**.

View File

@@ -39,4 +39,46 @@
height: 1.2rem;
margin-top: -.1rem;
}
}
}
/* remove pilcrow as permanent link and add chain icon similar to github https://github.com/squidfunk/mkdocs-material/discussions/3535 */
.headerlink {
--permalink-size: 16px; /* for font-relative sizes, 0.6em is a good choice */
--permalink-spacing: 4px;
width: calc(var(--permalink-size) + var(--permalink-spacing));
height: var(--permalink-size);
vertical-align: middle;
background-color: var(--md-default-fg-color--lighter);
background-size: var(--permalink-size);
mask-size: var(--permalink-size);
-webkit-mask-size: var(--permalink-size);
mask-repeat: no-repeat;
-webkit-mask-repeat: no-repeat;
visibility: visible;
mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
-webkit-mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
}
[id]:target .headerlink {
background-color: var(--md-typeset-a-color);
}
.headerlink:hover {
background-color: var(--md-accent-fg-color) !important;
}
@media screen and (min-width: 76.25em) {
h1, h2, h3, h4, h5, h6 {
display: flex;
align-items: center;
flex-direction: row;
column-gap: 0.2em; /* fixes spaces in titles */
}
.headerlink {
order: -1;
margin-left: calc(var(--permalink-size) * -1 - var(--permalink-spacing)) !important;
}
}

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.11.0-beta.1</version>
<version>0.11.0-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.11.0-beta.1</version>
<version>0.11.0-final.0</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1",
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1"
"@lancedb/vectordb-darwin-arm64": "0.11.0",
"@lancedb/vectordb-darwin-x64": "0.11.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.11.0-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0-beta.1.tgz",
"integrity": "sha512-qKQbFJwstMQEO2MVkkipyDxmH3/KafkuC4xfU8LjMtZ98ZGTQIW47t/OyftiUXYWcjsVxeXI3l2m9MCozFOdhg==",
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0.tgz",
"integrity": "sha512-ffI9sLdlJ0L0FjKVy5QpznRTgVaEGL2INJVcJauuzsYY2aOC3weNfE+v58n/cm9I/NulTdu1BemwzFpESoZf5A==",
"cpu": [
"arm64"
],
@@ -339,9 +339,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.11.0-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0-beta.1.tgz",
"integrity": "sha512-245Q5hjQKljczBcDLbiq3N5fmUaY2zFRHoW6SBxOziQwyMphhLDSTNkAYkc3JnrQvf6dMolVYWigOsRVCFj56A==",
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0.tgz",
"integrity": "sha512-sMGKVmTj7Gt1z+1Sy24toCV8UgcQkX0ljQU1QunVEzJvoP9yah/DN5rw5Ozxiv8Obk6Pz3BMZYqV3BPmL9AiAg==",
"cpu": [
"x64"
],
@@ -351,9 +351,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.11.0-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0-beta.1.tgz",
"integrity": "sha512-B4z6sx4X6uqGDnQm3zL5mL47Agn4X4spf/nlxtrUWEfiOAyp9Iw465UQMmrbnodi+4k/BNjCNZNMFSjMOSsrcA==",
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0.tgz",
"integrity": "sha512-BQTiTbvJfNKEye9FRomItlFcbOoYCV8frBrQMIfli4q9GECwgBmXQaWP+rEZZrdqfG0DivTQJ0YSSHgAy3wCcA==",
"cpu": [
"arm64"
],
@@ -363,9 +363,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.11.0-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0-beta.1.tgz",
"integrity": "sha512-0vWcPqpe3to78bYkc+3XWZToRu6TMrhLJAxC9cnV5d9GMuN1VbDoLqD8QPRWkoEr9Nk7jdIRKEBUwfq5yGOFLw==",
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0.tgz",
"integrity": "sha512-+RHu6YY311N21ZBM8OYbBFNuW+rqq0AC7Vp5eBvWKTOeNIf1Lz2vFAKhDPOgJt+ROoT/nzKRbksIEeIvnYQJNw==",
"cpu": [
"x64"
],
@@ -375,9 +375,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.11.0-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0-beta.1.tgz",
"integrity": "sha512-jU/+w2TfA4HKOZkib1UP4kIpaLgu+88S/t+Ccde67w/4qQuP0uAixTAls1WE4mtlf6pOnG0A1ILTY98nVkIQ3A==",
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0.tgz",
"integrity": "sha512-IWdhJdiYIkJW+njNlRVNGG1bnGlQs+Wbrjy/NIZhVxch2yCj9gknZqWCuSHNR43a2oAdUY/kXgWL2AKEOfK1CQ==",
"cpu": [
"x64"
],

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1",
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1"
"@lancedb/vectordb-darwin-arm64": "0.11.0",
"@lancedb/vectordb-darwin-x64": "0.11.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.11.0-beta.1"
version = "0.11.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.11.0-beta.1",
"version": "0.11.0",
"cpu": [
"x64",
"arm64"

View File

@@ -10,7 +10,7 @@
"vector database",
"ann"
],
"version": "0.11.0-beta.1",
"version": "0.11.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.14.0"
current_version = "0.14.1-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.14.0"
version = "0.14.1-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -3,9 +3,8 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.18.2",
"pylance==0.18.3-beta.2",
"requests>=2.31.0",
"retry>=0.9.2",
"tqdm>=4.27.0",
"pydantic>=1.10",
"attrs>=21.3.0",

View File

@@ -21,14 +21,35 @@ import time
import urllib.error
import weakref
import logging
from functools import wraps
from typing import Callable, List, Union
import numpy as np
import pyarrow as pa
from lance.vector import vec_to_table
from retry import retry
from ..util import deprecated, safe_import_pandas
# ruff: noqa: PERF203
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
def wrapper(fn):
@wraps(fn)
def wrapped(*args, **kwargs):
for i in range(tries):
try:
return fn(*args, **kwargs)
except Exception:
if i + 1 == tries:
raise
else:
sleep = min(delay * (backoff**i) + jitter, max_delay)
time.sleep(sleep)
return wrapped
return wrapper
pd = safe_import_pandas()
DATA = Union[pa.Table, "pd.DataFrame"]

View File

@@ -88,6 +88,11 @@ class Query(pydantic.BaseModel):
tuning advice.
offset: int
The offset to start fetching results from
fast_search: bool
Skip a flat search of unindexed data. This will improve
search performance but search results will not include unindexed data.
- *default False*.
"""
vector_column: Optional[str] = None
@@ -124,6 +129,8 @@ class Query(pydantic.BaseModel):
offset: int = 0
fast_search: bool = False
class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search,
@@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC):
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
@@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC):
If "auto", the query type is inferred based on the query.
vector_column_name: str
The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
"""
# Check hybrid search first as it supports empty query pattern
if query_type == "hybrid":
@@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC):
else:
raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query)
return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
)
@classmethod
def _resolve_query(cls, table, query, query_type, vector_column_name):
@@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
):
super().__init__(table)
self._query = query
@@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._prefilter = False
self._reranker = None
self._str_query = str_query
self._fast_search = fast_search
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
@@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector_column=self._vector_column,
with_row_id=self._with_row_id,
offset=self._offset,
fast_search=self._fast_search,
)
result_set = self._table._execute_query(query, batch_size)
if self._reranker is not None:

View File

@@ -50,6 +50,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
fast_search: bool = False
@attrs.define
class VectorQueryResult:

View File

@@ -270,6 +270,7 @@ class RemoteTable(Table):
vector_column_name: Optional[str] = None,
query_type="auto",
fts_columns: Optional[Union[str, List[str]]] = None,
fast_search: bool = False,
) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search]
@@ -314,6 +315,12 @@ class RemoteTable(Table):
- If the table has multiple vector columns then the *vector_column_name*
needs to be specified. Otherwise, an error is raised.
fast_search: bool, optional
Skip a flat search of unindexed data. This may improve
search performance but search results will not include unindexed data.
- *default False*.
Returns
-------
LanceQueryBuilder
@@ -343,6 +350,7 @@ class RemoteTable(Table):
query_type,
vector_column_name=vector_column_name,
fts_columns=fts_columns,
fast_search=fast_search,
)
def _execute_query(

View File

@@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Union
from unittest.mock import MagicMock, patch
import lance
import lancedb
@@ -25,6 +26,7 @@ from lancedb.embeddings import (
)
from lancedb.embeddings.base import TextEmbeddingFunction
from lancedb.embeddings.registry import get_registry, register
from lancedb.embeddings.utils import retry
from lancedb.pydantic import LanceModel, Vector
@@ -225,3 +227,12 @@ def test_embedding_function_safe_model_dump(embedding_type):
f"{embedding_type}: Private attribute '{key}' "
f"is present in dumped model"
)
@patch("time.sleep")
def test_retry(mock_sleep):
test_function = MagicMock(side_effect=[Exception] * 9 + ["result"])
test_function = retry()(test_function)
result = test_function()
assert mock_sleep.call_count == 9
assert result == "result"

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.11.0-beta.1"
version = "0.11.0"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.11.0-beta.1"
version = "0.11.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -196,22 +196,6 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
};
Ok((data, builder))
}
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
// Builder methods that only apply when we do not have initial data
@@ -329,6 +313,26 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
};
self
}
/// Add an embedding definition to the table.
///
/// The `embedding_name` must match the name of an embedding function that
/// was previously registered with the connection's [`EmbeddingRegistry`].
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
#[derive(Clone, Debug)]

View File

@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
"BTREE" => Ok(Self::BTree),
"BITMAP" => Ok(Self::Bitmap),
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
"FTS" => Ok(Self::FTS),
"FTS" | "INVERTED" => Ok(Self::FTS),
"IVF_PQ" => Ok(Self::IvfPq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),

View File

@@ -2110,7 +2110,6 @@ mod tests {
use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt;
use lance::dataset::{Dataset, WriteMode};
use lance::index::DatasetIndexInternalExt;
use lance::io::{ObjectStoreParams, WrappingObjectStore};
use rand::Rng;
use tempfile::tempdir;
@@ -3002,22 +3001,8 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]);
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("category", &index_meta.uuid.to_string())
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::Bitmap);
}
#[tokio::test]
@@ -3086,22 +3071,57 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.columns, vec!["tags".to_string()]);
}
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
#[tokio::test]
async fn test_create_inverted_index() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
let mut text_builder = StringBuilder::new();
let num_rows = 120;
for i in 0..num_rows {
text_builder.append_value(WORDS[i % 3]);
}
let text = Arc::new(text_builder.finish());
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("text", DataType::Utf8, true),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
text,
],
)
.unwrap();
let table = conn
.create_table(
"test_bitmap",
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
)
.execute()
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("tags", &index_meta.uuid.to_string())
table
.create_index(&["text"], Index::FTS(Default::default()))
.execute()
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::LabelList);
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::FTS);
assert_eq!(index.columns, vec!["text".to_string()]);
assert_eq!(index.name, "text_idx");
}
#[tokio::test]