Compare commits

...

12 Commits

Author SHA1 Message Date
Lance Release
1f41101897 Bump version: 0.14.0 → 0.14.1-beta.0 2024-10-17 18:58:45 +00:00
Will Jones
99e4db0d6a feat(rust): allow add_embedding on create_empty_table (#1754)
Fixes https://github.com/lancedb/lancedb/issues/1750
2024-10-17 11:58:15 -07:00
Will Jones
46486d4d22 fix: list_indices can handle fts indexes (#1753)
Fixes #1752
2024-10-16 10:39:40 -07:00
Weston Pace
f43cb8bba1 feat: upgrade lance to 0.18.3 (#1748) 2024-10-16 00:48:31 -07:00
James Wu
38eb05f297 fix(python): remove dependency on retry package (#1749)
## user story

fixes https://github.com/lancedb/lancedb/issues/1480

https://github.com/invl/retry has not had an update in 8 years, one if
its sub-dependencies via requirements.txt
(https://github.com/pytest-dev/py) is no longer maintained and has a
high severity vulnerability (CVE-2022-42969).

retry is only used for a single function in the python codebase for a
deprecated helper function `with_embeddings`, which was created for an
older tutorial (https://github.com/lancedb/lancedb/pull/12) [but is now
deprecated](https://lancedb.github.io/lancedb/embeddings/legacy/).

## changes

i backported a limited range of functionality of the `@retry()`
decorator directly into lancedb so that we no longer have a dependency
to the `retry` package.

## tests

```
/Users/james/src/lancedb/python $ ruff check .
All checks passed!
/Users/james/src/lancedb/python $ pytest python/tests/test_embeddings.py
python/tests/test_embeddings.py .......s....                                                                                                                        [100%]
================================================================ 11 passed, 1 skipped, 2 warnings in 7.08s ================================================================
```
2024-10-15 15:13:57 -07:00
Ryan Green
679a70231e feat: allow fast_search on python remote table (#1747)
Add `fast_search` parameter to query builder and remote table to support
skipping flat search in remote search
2024-10-14 14:39:54 -06:00
Dominik Weckmüller
e7b56b7b2a docs: add permanent link chain icon to headings without impacting SEO (#1746)
I noted that there are no permanent links in the docs. Adapted the
current best solution from
https://github.com/squidfunk/mkdocs-material/discussions/3535. It adds a
GitHub-like chain icon to the left of each heading (right on mobile) and
does not impact SEO unlike the default solution with pilcrow char `¶`
that might show up on google search results.

<img alt="image"
src="https://user-images.githubusercontent.com/182589/153004627-6df3f8e9-c747-4f43-bd62-a8dabaa96c3f.gif">
2024-10-14 11:58:23 -07:00
Olzhas Alexandrov
5ccd0edec2 docs: clarify infrastructure requirements for S3 Express One Zone (#1745) 2024-10-11 14:06:28 -06:00
Will Jones
9c74c435e0 ci: update package lock (#1740) 2024-10-09 15:14:08 -06:00
Lance Release
6de53ce393 Updating package-lock.json 2024-10-09 18:54:29 +00:00
Lance Release
9f42fbba96 Bump version: 0.11.0-beta.2 → 0.11.0 2024-10-09 18:54:09 +00:00
Lance Release
d892f7a622 Bump version: 0.11.0-beta.1 → 0.11.0-beta.2 2024-10-09 18:54:04 +00:00
30 changed files with 227 additions and 100 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.11.0-beta.1" current_version = "0.11.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -20,13 +20,15 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"] categories = ["database-implementations"]
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.18.2", "features" = ["dynamodb"] } lance = { "version" = "=0.18.3", "features" = [
lance-index = { "version" = "=0.18.2" } "dynamodb",
lance-linalg = { "version" = "=0.18.2" } ], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-table = { "version" = "=0.18.2" } lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-testing = { "version" = "=0.18.2" } lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-datafusion = { "version" = "=0.18.2" } lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-encoding = { "version" = "=0.18.2" } lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "52.2", optional = false } arrow = { version = "52.2", optional = false }
arrow-array = "52.2" arrow-array = "52.2"

View File

@@ -90,6 +90,9 @@ markdown_extensions:
- pymdownx.emoji: - pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg emoji_generator: !!python/name:material.extensions.emoji.to_svg
- markdown.extensions.toc:
baselevel: 1
permalink: ""
nav: nav:
- Home: - Home:

View File

@@ -498,7 +498,7 @@ This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` envir
#### S3 Express #### S3 Express
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region. LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional infrastructure configuration for the compute service, such as EC2 or Lambda. Please refer to [Networking requirements for S3 Express One Zone](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html).
To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**. To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**.

View File

@@ -40,3 +40,45 @@
margin-top: -.1rem; margin-top: -.1rem;
} }
} }
/* remove pilcrow as permanent link and add chain icon similar to github https://github.com/squidfunk/mkdocs-material/discussions/3535 */
.headerlink {
--permalink-size: 16px; /* for font-relative sizes, 0.6em is a good choice */
--permalink-spacing: 4px;
width: calc(var(--permalink-size) + var(--permalink-spacing));
height: var(--permalink-size);
vertical-align: middle;
background-color: var(--md-default-fg-color--lighter);
background-size: var(--permalink-size);
mask-size: var(--permalink-size);
-webkit-mask-size: var(--permalink-size);
mask-repeat: no-repeat;
-webkit-mask-repeat: no-repeat;
visibility: visible;
mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
-webkit-mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
}
[id]:target .headerlink {
background-color: var(--md-typeset-a-color);
}
.headerlink:hover {
background-color: var(--md-accent-fg-color) !important;
}
@media screen and (min-width: 76.25em) {
h1, h2, h3, h4, h5, h6 {
display: flex;
align-items: center;
flex-direction: row;
column-gap: 0.2em; /* fixes spaces in titles */
}
.headerlink {
order: -1;
margin-left: calc(var(--permalink-size) * -1 - var(--permalink-spacing)) !important;
}
}

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.11.0-beta.1</version> <version>0.11.0-final.0</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.11.0-beta.1</version> <version>0.11.0-final.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>LanceDB Parent</name> <name>LanceDB Parent</name>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.11.0-beta.1", "version": "0.11.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.11.0-beta.1", "version": "0.11.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1", "@lancedb/vectordb-darwin-arm64": "0.11.0",
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1", "@lancedb/vectordb-darwin-x64": "0.11.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1", "@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1", "@lancedb/vectordb-linux-x64-gnu": "0.11.0",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1" "@lancedb/vectordb-win32-x64-msvc": "0.11.0"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": { "node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.11.0-beta.1", "version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.11.0.tgz",
"integrity": "sha512-qKQbFJwstMQEO2MVkkipyDxmH3/KafkuC4xfU8LjMtZ98ZGTQIW47t/OyftiUXYWcjsVxeXI3l2m9MCozFOdhg==", "integrity": "sha512-ffI9sLdlJ0L0FjKVy5QpznRTgVaEGL2INJVcJauuzsYY2aOC3weNfE+v58n/cm9I/NulTdu1BemwzFpESoZf5A==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -339,9 +339,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-darwin-x64": { "node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.11.0-beta.1", "version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.11.0.tgz",
"integrity": "sha512-245Q5hjQKljczBcDLbiq3N5fmUaY2zFRHoW6SBxOziQwyMphhLDSTNkAYkc3JnrQvf6dMolVYWigOsRVCFj56A==", "integrity": "sha512-sMGKVmTj7Gt1z+1Sy24toCV8UgcQkX0ljQU1QunVEzJvoP9yah/DN5rw5Ozxiv8Obk6Pz3BMZYqV3BPmL9AiAg==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -351,9 +351,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-arm64-gnu": { "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.11.0-beta.1", "version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.11.0.tgz",
"integrity": "sha512-B4z6sx4X6uqGDnQm3zL5mL47Agn4X4spf/nlxtrUWEfiOAyp9Iw465UQMmrbnodi+4k/BNjCNZNMFSjMOSsrcA==", "integrity": "sha512-BQTiTbvJfNKEye9FRomItlFcbOoYCV8frBrQMIfli4q9GECwgBmXQaWP+rEZZrdqfG0DivTQJ0YSSHgAy3wCcA==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -363,9 +363,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-x64-gnu": { "node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.11.0-beta.1", "version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.11.0.tgz",
"integrity": "sha512-0vWcPqpe3to78bYkc+3XWZToRu6TMrhLJAxC9cnV5d9GMuN1VbDoLqD8QPRWkoEr9Nk7jdIRKEBUwfq5yGOFLw==", "integrity": "sha512-+RHu6YY311N21ZBM8OYbBFNuW+rqq0AC7Vp5eBvWKTOeNIf1Lz2vFAKhDPOgJt+ROoT/nzKRbksIEeIvnYQJNw==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -375,9 +375,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-win32-x64-msvc": { "node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.11.0-beta.1", "version": "0.11.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.11.0.tgz",
"integrity": "sha512-jU/+w2TfA4HKOZkib1UP4kIpaLgu+88S/t+Ccde67w/4qQuP0uAixTAls1WE4mtlf6pOnG0A1ILTY98nVkIQ3A==", "integrity": "sha512-IWdhJdiYIkJW+njNlRVNGG1bnGlQs+Wbrjy/NIZhVxch2yCj9gknZqWCuSHNR43a2oAdUY/kXgWL2AKEOfK1CQ==",
"cpu": [ "cpu": [
"x64" "x64"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.11.0-beta.1", "version": "0.11.0",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.11.0-beta.1", "@lancedb/vectordb-darwin-arm64": "0.11.0",
"@lancedb/vectordb-darwin-x64": "0.11.0-beta.1", "@lancedb/vectordb-darwin-x64": "0.11.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.0-beta.1", "@lancedb/vectordb-linux-arm64-gnu": "0.11.0",
"@lancedb/vectordb-linux-x64-gnu": "0.11.0-beta.1", "@lancedb/vectordb-linux-x64-gnu": "0.11.0",
"@lancedb/vectordb-win32-x64-msvc": "0.11.0-beta.1" "@lancedb/vectordb-win32-x64-msvc": "0.11.0"
} }
} }

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.11.0-beta.1" version = "0.11.0"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.11.0-beta.1", "version": "0.11.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.11.0-beta.1", "version": "0.11.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.11.0-beta.1", "version": "0.11.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.11.0-beta.1", "version": "0.11.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.11.0-beta.1", "version": "0.11.0",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.11.0-beta.1", "version": "0.11.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.11.0-beta.1", "version": "0.11.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -10,7 +10,7 @@
"vector database", "vector database",
"ann" "ann"
], ],
"version": "0.11.0-beta.1", "version": "0.11.0",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.14.0" current_version = "0.14.1-beta.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.14.0" version = "0.14.1-beta.0"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -3,9 +3,8 @@ name = "lancedb"
# version in Cargo.toml # version in Cargo.toml
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.18.2", "pylance==0.18.3-beta.2",
"requests>=2.31.0", "requests>=2.31.0",
"retry>=0.9.2",
"tqdm>=4.27.0", "tqdm>=4.27.0",
"pydantic>=1.10", "pydantic>=1.10",
"attrs>=21.3.0", "attrs>=21.3.0",

View File

@@ -21,14 +21,35 @@ import time
import urllib.error import urllib.error
import weakref import weakref
import logging import logging
from functools import wraps
from typing import Callable, List, Union from typing import Callable, List, Union
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
from lance.vector import vec_to_table from lance.vector import vec_to_table
from retry import retry
from ..util import deprecated, safe_import_pandas from ..util import deprecated, safe_import_pandas
# ruff: noqa: PERF203
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
def wrapper(fn):
@wraps(fn)
def wrapped(*args, **kwargs):
for i in range(tries):
try:
return fn(*args, **kwargs)
except Exception:
if i + 1 == tries:
raise
else:
sleep = min(delay * (backoff**i) + jitter, max_delay)
time.sleep(sleep)
return wrapped
return wrapper
pd = safe_import_pandas() pd = safe_import_pandas()
DATA = Union[pa.Table, "pd.DataFrame"] DATA = Union[pa.Table, "pd.DataFrame"]

View File

@@ -88,6 +88,11 @@ class Query(pydantic.BaseModel):
tuning advice. tuning advice.
offset: int offset: int
The offset to start fetching results from The offset to start fetching results from
fast_search: bool
Skip a flat search of unindexed data. This will improve
search performance but search results will not include unindexed data.
- *default False*.
""" """
vector_column: Optional[str] = None vector_column: Optional[str] = None
@@ -124,6 +129,8 @@ class Query(pydantic.BaseModel):
offset: int = 0 offset: int = 0
fast_search: bool = False
class LanceQueryBuilder(ABC): class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search, """An abstract query builder. Subclasses are defined for vector search,
@@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC):
vector_column_name: str, vector_column_name: str,
ordering_field_name: Optional[str] = None, ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [], fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
) -> LanceQueryBuilder: ) -> LanceQueryBuilder:
""" """
Create a query builder based on the given query and query type. Create a query builder based on the given query and query type.
@@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC):
If "auto", the query type is inferred based on the query. If "auto", the query type is inferred based on the query.
vector_column_name: str vector_column_name: str
The name of the vector column to use for vector search. The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
""" """
# Check hybrid search first as it supports empty query pattern # Check hybrid search first as it supports empty query pattern
if query_type == "hybrid": if query_type == "hybrid":
@@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC):
else: else:
raise TypeError(f"Unsupported query type: {type(query)}") raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query) return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
)
@classmethod @classmethod
def _resolve_query(cls, table, query, query_type, vector_column_name): def _resolve_query(cls, table, query, query_type, vector_column_name):
@@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
query: Union[np.ndarray, list, "PIL.Image.Image"], query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str, vector_column: str,
str_query: Optional[str] = None, str_query: Optional[str] = None,
fast_search: bool = False,
): ):
super().__init__(table) super().__init__(table)
self._query = query self._query = query
@@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._prefilter = False self._prefilter = False
self._reranker = None self._reranker = None
self._str_query = str_query self._str_query = str_query
self._fast_search = fast_search
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder: def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use. """Set the distance metric to use.
@@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector_column=self._vector_column, vector_column=self._vector_column,
with_row_id=self._with_row_id, with_row_id=self._with_row_id,
offset=self._offset, offset=self._offset,
fast_search=self._fast_search,
) )
result_set = self._table._execute_query(query, batch_size) result_set = self._table._execute_query(query, batch_size)
if self._reranker is not None: if self._reranker is not None:

View File

@@ -50,6 +50,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME vector_column: str = VECTOR_COLUMN_NAME
fast_search: bool = False
@attrs.define @attrs.define
class VectorQueryResult: class VectorQueryResult:

View File

@@ -270,6 +270,7 @@ class RemoteTable(Table):
vector_column_name: Optional[str] = None, vector_column_name: Optional[str] = None,
query_type="auto", query_type="auto",
fts_columns: Optional[Union[str, List[str]]] = None, fts_columns: Optional[Union[str, List[str]]] = None,
fast_search: bool = False,
) -> LanceVectorQueryBuilder: ) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors """Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search] of the given query vector. We currently support [vector search][search]
@@ -314,6 +315,12 @@ class RemoteTable(Table):
- If the table has multiple vector columns then the *vector_column_name* - If the table has multiple vector columns then the *vector_column_name*
needs to be specified. Otherwise, an error is raised. needs to be specified. Otherwise, an error is raised.
fast_search: bool, optional
Skip a flat search of unindexed data. This may improve
search performance but search results will not include unindexed data.
- *default False*.
Returns Returns
------- -------
LanceQueryBuilder LanceQueryBuilder
@@ -343,6 +350,7 @@ class RemoteTable(Table):
query_type, query_type,
vector_column_name=vector_column_name, vector_column_name=vector_column_name,
fts_columns=fts_columns, fts_columns=fts_columns,
fast_search=fast_search,
) )
def _execute_query( def _execute_query(

View File

@@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import List, Union from typing import List, Union
from unittest.mock import MagicMock, patch
import lance import lance
import lancedb import lancedb
@@ -25,6 +26,7 @@ from lancedb.embeddings import (
) )
from lancedb.embeddings.base import TextEmbeddingFunction from lancedb.embeddings.base import TextEmbeddingFunction
from lancedb.embeddings.registry import get_registry, register from lancedb.embeddings.registry import get_registry, register
from lancedb.embeddings.utils import retry
from lancedb.pydantic import LanceModel, Vector from lancedb.pydantic import LanceModel, Vector
@@ -225,3 +227,12 @@ def test_embedding_function_safe_model_dump(embedding_type):
f"{embedding_type}: Private attribute '{key}' " f"{embedding_type}: Private attribute '{key}' "
f"is present in dumped model" f"is present in dumped model"
) )
@patch("time.sleep")
def test_retry(mock_sleep):
test_function = MagicMock(side_effect=[Exception] * 9 + ["result"])
test_function = retry()(test_function)
result = test_function()
assert mock_sleep.call_count == 9
assert result == "result"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.11.0-beta.1" version = "0.11.0"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.11.0-beta.1" version = "0.11.0"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -196,22 +196,6 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
}; };
Ok((data, builder)) Ok((data, builder))
} }
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
} }
// Builder methods that only apply when we do not have initial data // Builder methods that only apply when we do not have initial data
@@ -329,6 +313,26 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
}; };
self self
} }
/// Add an embedding definition to the table.
///
/// The `embedding_name` must match the name of an embedding function that
/// was previously registered with the connection's [`EmbeddingRegistry`].
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View File

@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
"BTREE" => Ok(Self::BTree), "BTREE" => Ok(Self::BTree),
"BITMAP" => Ok(Self::Bitmap), "BITMAP" => Ok(Self::Bitmap),
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList), "LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
"FTS" => Ok(Self::FTS), "FTS" | "INVERTED" => Ok(Self::FTS),
"IVF_PQ" => Ok(Self::IvfPq), "IVF_PQ" => Ok(Self::IvfPq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq), "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq), "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),

View File

@@ -2110,7 +2110,6 @@ mod tests {
use arrow_schema::{DataType, Field, Schema, TimeUnit}; use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt; use futures::TryStreamExt;
use lance::dataset::{Dataset, WriteMode}; use lance::dataset::{Dataset, WriteMode};
use lance::index::DatasetIndexInternalExt;
use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lance::io::{ObjectStoreParams, WrappingObjectStore};
use rand::Rng; use rand::Rng;
use tempfile::tempdir; use tempfile::tempdir;
@@ -3002,22 +3001,8 @@ mod tests {
let index_configs = table.list_indices().await.unwrap(); let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1); assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap(); let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039 assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]); assert_eq!(index.columns, vec!["category".to_string()]);
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("category", &index_meta.uuid.to_string())
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::Bitmap);
} }
#[tokio::test] #[tokio::test]
@@ -3086,22 +3071,57 @@ mod tests {
let index_configs = table.list_indices().await.unwrap(); let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1); assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap(); let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039 assert_eq!(index.index_type, crate::index::IndexType::LabelList);
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.columns, vec!["tags".to_string()]); assert_eq!(index.columns, vec!["tags".to_string()]);
}
// For now, just open the index to verify its type #[tokio::test]
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap(); async fn test_create_inverted_index() {
let indices = lance_dataset let tmp_dir = tempdir().unwrap();
.load_indices_by_name(&index.name) let uri = tmp_dir.path().to_str().unwrap();
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
let mut text_builder = StringBuilder::new();
let num_rows = 120;
for i in 0..num_rows {
text_builder.append_value(WORDS[i % 3]);
}
let text = Arc::new(text_builder.finish());
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("text", DataType::Utf8, true),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
text,
],
)
.unwrap();
let table = conn
.create_table(
"test_bitmap",
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
)
.execute()
.await .await
.unwrap(); .unwrap();
let index_meta = &indices[0];
let idx = lance_dataset table
.open_scalar_index("tags", &index_meta.uuid.to_string()) .create_index(&["text"], Index::FTS(Default::default()))
.execute()
.await .await
.unwrap(); .unwrap();
assert_eq!(idx.index_type(), IndexType::LabelList); let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::FTS);
assert_eq!(index.columns, vec!["text".to_string()]);
assert_eq!(index.name, "text_idx");
} }
#[tokio::test] #[tokio::test]