mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 13:59:58 +00:00
Compare commits
12 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1f41101897 | ||
|
|
99e4db0d6a | ||
|
|
46486d4d22 | ||
|
|
f43cb8bba1 | ||
|
|
38eb05f297 | ||
|
|
679a70231e | ||
|
|
e7b56b7b2a | ||
|
|
5ccd0edec2 | ||
|
|
9c74c435e0 | ||
|
|
6de53ce393 | ||
|
|
9f42fbba96 | ||
|
|
d892f7a622 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.11.0-beta.1"
|
||||
current_version = "0.11.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
16
Cargo.toml
16
Cargo.toml
@@ -20,13 +20,15 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.18.2", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.18.2" }
|
||||
lance-linalg = { "version" = "=0.18.2" }
|
||||
lance-table = { "version" = "=0.18.2" }
|
||||
lance-testing = { "version" = "=0.18.2" }
|
||||
lance-datafusion = { "version" = "=0.18.2" }
|
||||
lance-encoding = { "version" = "=0.18.2" }
|
||||
lance = { "version" = "=0.18.3", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "52.2", optional = false }
|
||||
arrow-array = "52.2"
|
||||
|
||||
@@ -90,6 +90,9 @@ markdown_extensions:
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- markdown.extensions.toc:
|
||||
baselevel: 1
|
||||
permalink: ""
|
||||
|
||||
nav:
|
||||
- Home:
|
||||
|
||||
@@ -498,7 +498,7 @@ This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` envir
|
||||
|
||||
#### S3 Express
|
||||
|
||||
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region.
|
||||
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional infrastructure configuration for the compute service, such as EC2 or Lambda. Please refer to [Networking requirements for S3 Express One Zone](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html).
|
||||
|
||||
To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**.
|
||||
|
||||
|
||||
@@ -39,4 +39,46 @@
|
||||
height: 1.2rem;
|
||||
margin-top: -.1rem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* remove pilcrow as permanent link and add chain icon similar to github https://github.com/squidfunk/mkdocs-material/discussions/3535 */
|
||||
|
||||
.headerlink {
|
||||
--permalink-size: 16px; /* for font-relative sizes, 0.6em is a good choice */
|
||||
--permalink-spacing: 4px;
|
||||
|
||||
width: calc(var(--permalink-size) + var(--permalink-spacing));
|
||||
height: var(--permalink-size);
|
||||
vertical-align: middle;
|
||||
background-color: var(--md-default-fg-color--lighter);
|
||||
background-size: var(--permalink-size);
|
||||
mask-size: var(--permalink-size);
|
||||
-webkit-mask-size: var(--permalink-size);
|
||||
mask-repeat: no-repeat;
|
||||
-webkit-mask-repeat: no-repeat;
|
||||
visibility: visible;
|
||||
mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
|
||||
-webkit-mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
|
||||
}
|
||||
|
||||
[id]:target .headerlink {
|
||||
background-color: var(--md-typeset-a-color);
|
||||
}
|
||||
|
||||
.headerlink:hover {
|
||||
background-color: var(--md-accent-fg-color) !important;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 76.25em) {
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-direction: row;
|
||||
column-gap: 0.2em; /* fixes spaces in titles */
|
||||
}
|
||||
|
||||
.headerlink {
|
||||
order: -1;
|
||||
margin-left: calc(var(--permalink-size) * -1 - var(--permalink-spacing)) !important;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.11.0-beta.1</version>
|
||||
<version>0.11.0-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.11.0-beta.1</version>
|
||||
<version>0.11.0-final.0</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
44
node/package-lock.json
generated
44
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,11 +52,11 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -327,9 +327,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.11.0-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0-beta.1.tgz",
|
||||
"integrity": "sha512-qKQbFJwstMQEO2MVkkipyDxmH3/KafkuC4xfU8LjMtZ98ZGTQIW47t/OyftiUXYWcjsVxeXI3l2m9MCozFOdhg==",
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0.tgz",
|
||||
"integrity": "sha512-ffI9sLdlJ0L0FjKVy5QpznRTgVaEGL2INJVcJauuzsYY2aOC3weNfE+v58n/cm9I/NulTdu1BemwzFpESoZf5A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -339,9 +339,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.11.0-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0-beta.1.tgz",
|
||||
"integrity": "sha512-245Q5hjQKljczBcDLbiq3N5fmUaY2zFRHoW6SBxOziQwyMphhLDSTNkAYkc3JnrQvf6dMolVYWigOsRVCFj56A==",
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0.tgz",
|
||||
"integrity": "sha512-sMGKVmTj7Gt1z+1Sy24toCV8UgcQkX0ljQU1QunVEzJvoP9yah/DN5rw5Ozxiv8Obk6Pz3BMZYqV3BPmL9AiAg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -351,9 +351,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.11.0-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0-beta.1.tgz",
|
||||
"integrity": "sha512-B4z6sx4X6uqGDnQm3zL5mL47Agn4X4spf/nlxtrUWEfiOAyp9Iw465UQMmrbnodi+4k/BNjCNZNMFSjMOSsrcA==",
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0.tgz",
|
||||
"integrity": "sha512-BQTiTbvJfNKEye9FRomItlFcbOoYCV8frBrQMIfli4q9GECwgBmXQaWP+rEZZrdqfG0DivTQJ0YSSHgAy3wCcA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -363,9 +363,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.11.0-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0-beta.1.tgz",
|
||||
"integrity": "sha512-0vWcPqpe3to78bYkc+3XWZToRu6TMrhLJAxC9cnV5d9GMuN1VbDoLqD8QPRWkoEr9Nk7jdIRKEBUwfq5yGOFLw==",
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0.tgz",
|
||||
"integrity": "sha512-+RHu6YY311N21ZBM8OYbBFNuW+rqq0AC7Vp5eBvWKTOeNIf1Lz2vFAKhDPOgJt+ROoT/nzKRbksIEeIvnYQJNw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -375,9 +375,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.11.0-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0-beta.1.tgz",
|
||||
"integrity": "sha512-jU/+w2TfA4HKOZkib1UP4kIpaLgu+88S/t+Ccde67w/4qQuP0uAixTAls1WE4mtlf6pOnG0A1ILTY98nVkIQ3A==",
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0.tgz",
|
||||
"integrity": "sha512-IWdhJdiYIkJW+njNlRVNGG1bnGlQs+Wbrjy/NIZhVxch2yCj9gknZqWCuSHNR43a2oAdUY/kXgWL2AKEOfK1CQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -88,10 +88,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.11.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.11.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.11.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.11.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.11.0-beta.1"
|
||||
version = "0.11.0"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"vector database",
|
||||
"ann"
|
||||
],
|
||||
"version": "0.11.0-beta.1",
|
||||
"version": "0.11.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.14.0"
|
||||
current_version = "0.14.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.14.0"
|
||||
version = "0.14.1-beta.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -3,9 +3,8 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.18.2",
|
||||
"pylance==0.18.3-beta.2",
|
||||
"requests>=2.31.0",
|
||||
"retry>=0.9.2",
|
||||
"tqdm>=4.27.0",
|
||||
"pydantic>=1.10",
|
||||
"attrs>=21.3.0",
|
||||
|
||||
@@ -21,14 +21,35 @@ import time
|
||||
import urllib.error
|
||||
import weakref
|
||||
import logging
|
||||
from functools import wraps
|
||||
from typing import Callable, List, Union
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
from lance.vector import vec_to_table
|
||||
from retry import retry
|
||||
|
||||
from ..util import deprecated, safe_import_pandas
|
||||
|
||||
|
||||
# ruff: noqa: PERF203
|
||||
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
|
||||
def wrapper(fn):
|
||||
@wraps(fn)
|
||||
def wrapped(*args, **kwargs):
|
||||
for i in range(tries):
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
except Exception:
|
||||
if i + 1 == tries:
|
||||
raise
|
||||
else:
|
||||
sleep = min(delay * (backoff**i) + jitter, max_delay)
|
||||
time.sleep(sleep)
|
||||
|
||||
return wrapped
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
pd = safe_import_pandas()
|
||||
|
||||
DATA = Union[pa.Table, "pd.DataFrame"]
|
||||
|
||||
@@ -88,6 +88,11 @@ class Query(pydantic.BaseModel):
|
||||
tuning advice.
|
||||
offset: int
|
||||
The offset to start fetching results from
|
||||
fast_search: bool
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
"""
|
||||
|
||||
vector_column: Optional[str] = None
|
||||
@@ -124,6 +129,8 @@ class Query(pydantic.BaseModel):
|
||||
|
||||
offset: int = 0
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
|
||||
class LanceQueryBuilder(ABC):
|
||||
"""An abstract query builder. Subclasses are defined for vector search,
|
||||
@@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC):
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
) -> LanceQueryBuilder:
|
||||
"""
|
||||
Create a query builder based on the given query and query type.
|
||||
@@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC):
|
||||
If "auto", the query type is inferred based on the query.
|
||||
vector_column_name: str
|
||||
The name of the vector column to use for vector search.
|
||||
fast_search: bool
|
||||
Skip flat search of unindexed data.
|
||||
"""
|
||||
# Check hybrid search first as it supports empty query pattern
|
||||
if query_type == "hybrid":
|
||||
@@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC):
|
||||
else:
|
||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||
|
||||
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query)
|
||||
return LanceVectorQueryBuilder(
|
||||
table, query, vector_column_name, str_query, fast_search
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _resolve_query(cls, table, query, query_type, vector_column_name):
|
||||
@@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
@@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._prefilter = False
|
||||
self._reranker = None
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
@@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
vector_column=self._vector_column,
|
||||
with_row_id=self._with_row_id,
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
)
|
||||
result_set = self._table._execute_query(query, batch_size)
|
||||
if self._reranker is not None:
|
||||
|
||||
@@ -50,6 +50,8 @@ class VectorQuery(BaseModel):
|
||||
|
||||
vector_column: str = VECTOR_COLUMN_NAME
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
|
||||
@attrs.define
|
||||
class VectorQueryResult:
|
||||
|
||||
@@ -270,6 +270,7 @@ class RemoteTable(Table):
|
||||
vector_column_name: Optional[str] = None,
|
||||
query_type="auto",
|
||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||
fast_search: bool = False,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector. We currently support [vector search][search]
|
||||
@@ -314,6 +315,12 @@ class RemoteTable(Table):
|
||||
- If the table has multiple vector columns then the *vector_column_name*
|
||||
needs to be specified. Otherwise, an error is raised.
|
||||
|
||||
fast_search: bool, optional
|
||||
Skip a flat search of unindexed data. This may improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
@@ -343,6 +350,7 @@ class RemoteTable(Table):
|
||||
query_type,
|
||||
vector_column_name=vector_column_name,
|
||||
fts_columns=fts_columns,
|
||||
fast_search=fast_search,
|
||||
)
|
||||
|
||||
def _execute_query(
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import List, Union
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import lance
|
||||
import lancedb
|
||||
@@ -25,6 +26,7 @@ from lancedb.embeddings import (
|
||||
)
|
||||
from lancedb.embeddings.base import TextEmbeddingFunction
|
||||
from lancedb.embeddings.registry import get_registry, register
|
||||
from lancedb.embeddings.utils import retry
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
|
||||
|
||||
@@ -225,3 +227,12 @@ def test_embedding_function_safe_model_dump(embedding_type):
|
||||
f"{embedding_type}: Private attribute '{key}' "
|
||||
f"is present in dumped model"
|
||||
)
|
||||
|
||||
|
||||
@patch("time.sleep")
|
||||
def test_retry(mock_sleep):
|
||||
test_function = MagicMock(side_effect=[Exception] * 9 + ["result"])
|
||||
test_function = retry()(test_function)
|
||||
result = test_function()
|
||||
assert mock_sleep.call_count == 9
|
||||
assert result == "result"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.11.0-beta.1"
|
||||
version = "0.11.0"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.11.0-beta.1"
|
||||
version = "0.11.0"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -196,22 +196,6 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
|
||||
};
|
||||
Ok((data, builder))
|
||||
}
|
||||
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.parent
|
||||
.embedding_registry()
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Builder methods that only apply when we do not have initial data
|
||||
@@ -329,6 +313,26 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an embedding definition to the table.
|
||||
///
|
||||
/// The `embedding_name` must match the name of an embedding function that
|
||||
/// was previously registered with the connection's [`EmbeddingRegistry`].
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.parent
|
||||
.embedding_registry()
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
|
||||
"BTREE" => Ok(Self::BTree),
|
||||
"BITMAP" => Ok(Self::Bitmap),
|
||||
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
|
||||
"FTS" => Ok(Self::FTS),
|
||||
"FTS" | "INVERTED" => Ok(Self::FTS),
|
||||
"IVF_PQ" => Ok(Self::IvfPq),
|
||||
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
|
||||
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
|
||||
|
||||
@@ -2110,7 +2110,6 @@ mod tests {
|
||||
use arrow_schema::{DataType, Field, Schema, TimeUnit};
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::{Dataset, WriteMode};
|
||||
use lance::index::DatasetIndexInternalExt;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use rand::Rng;
|
||||
use tempfile::tempdir;
|
||||
@@ -3002,22 +3001,8 @@ mod tests {
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["category".to_string()]);
|
||||
|
||||
// For now, just open the index to verify its type
|
||||
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
|
||||
let indices = lance_dataset
|
||||
.load_indices_by_name(&index.name)
|
||||
.await
|
||||
.unwrap();
|
||||
let index_meta = &indices[0];
|
||||
let idx = lance_dataset
|
||||
.open_scalar_index("category", &index_meta.uuid.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(idx.index_type(), IndexType::Bitmap);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3086,22 +3071,57 @@ mod tests {
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
|
||||
assert_eq!(index.index_type, crate::index::IndexType::LabelList);
|
||||
assert_eq!(index.columns, vec!["tags".to_string()]);
|
||||
}
|
||||
|
||||
// For now, just open the index to verify its type
|
||||
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
|
||||
let indices = lance_dataset
|
||||
.load_indices_by_name(&index.name)
|
||||
#[tokio::test]
|
||||
async fn test_create_inverted_index() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
|
||||
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
|
||||
let mut text_builder = StringBuilder::new();
|
||||
let num_rows = 120;
|
||||
for i in 0..num_rows {
|
||||
text_builder.append_value(WORDS[i % 3]);
|
||||
}
|
||||
let text = Arc::new(text_builder.finish());
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("text", DataType::Utf8, true),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
|
||||
text,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = conn
|
||||
.create_table(
|
||||
"test_bitmap",
|
||||
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
|
||||
)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let index_meta = &indices[0];
|
||||
let idx = lance_dataset
|
||||
.open_scalar_index("tags", &index_meta.uuid.to_string())
|
||||
|
||||
table
|
||||
.create_index(&["text"], Index::FTS(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(idx.index_type(), IndexType::LabelList);
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::FTS);
|
||||
assert_eq!(index.columns, vec!["text".to_string()]);
|
||||
assert_eq!(index.name, "text_idx");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user