Compare commits

...

30 Commits

Author SHA1 Message Date
Lance Release
a33a0670f6 Bump version: 0.19.1-beta.2 → 0.19.1-beta.3 2025-02-20 03:37:27 +00:00
BubbleCal
14c9ff46d1 feat: support multivector on remote table (#2045)
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-20 11:34:51 +08:00
Lei Xu
1865f7decf fix: support optional nested pydantic model (#2130)
Closes #2129
2025-02-17 20:43:13 -08:00
BubbleCal
a608621476 test: query with dist range and new rows (#2126)
we found a bug that flat KNN plan node's stats is not in right order as
fields in schema, it would cause an error if querying with distance
range and new unindexed rows.

we've fixed this in lance so add this test for verifying it works

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-17 12:57:45 +08:00
BubbleCal
00514999ff feat: upgrade lance to 0.23.1-beta.4 (#2121)
this also upgrades object_store to 0.11.0, snafu to 0.8

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-16 14:53:26 +08:00
Lance Release
b3b597fef6 Updating package-lock.json 2025-02-13 04:40:10 +00:00
Lance Release
bf17144591 Updating package-lock.json 2025-02-13 04:39:54 +00:00
Lance Release
09e110525f Bump version: 0.16.1-beta.1 → 0.16.1-beta.2 2025-02-13 04:39:38 +00:00
Lance Release
40f0dbb64d Bump version: 0.19.1-beta.1 → 0.19.1-beta.2 2025-02-13 04:39:19 +00:00
BubbleCal
3b19e96ae7 fix: panic when field id doesn't equal to field index (#2116)
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-13 12:38:35 +08:00
Will Jones
78a17ad54c chore: improve dev instructions for Python (#2088)
Closes #2042
2025-02-12 14:08:52 -08:00
Lance Release
a8e6b491e2 Updating package-lock.json 2025-02-11 22:05:54 +00:00
Lance Release
cea541ca46 Updating package-lock.json 2025-02-11 20:56:22 +00:00
Lance Release
873ffc1042 Updating package-lock.json 2025-02-11 20:56:05 +00:00
Lance Release
83273ad997 Bump version: 0.16.1-beta.0 → 0.16.1-beta.1 2025-02-11 20:55:43 +00:00
Lance Release
d18d63c69d Bump version: 0.19.1-beta.0 → 0.19.1-beta.1 2025-02-11 20:55:23 +00:00
LuQQiu
c3e865e8d0 fix: fix index out of bound in load indices (#2108)
panicked at 'index out of bounds: the len is 24 but the index is
25':Lancedb/rust/lancedb/src/index/vector.rs:26\n

load_indices() on the old manifest while use the newer manifest to get
column names could result in index out of bound if some columns are
removed from the new version.
This change reduce the possibility of index out of bound operation but
does not fully remove it.
Better that lance can directly provide column name info so no need extra
calls to get column name but that require modify the public APIs
2025-02-11 12:54:11 -08:00
Weston Pace
a7755cb313 docs: standardize node example prints (#2080)
Minor cleanup to help debug future CI failures
2025-02-11 08:26:29 -08:00
BubbleCal
3490f3456f chore: upgrade lance to 0.23.1-beta.2 (#2109) 2025-02-11 23:57:56 +08:00
Lance Release
0a1d0693e1 Updating package-lock.json 2025-02-07 20:06:22 +00:00
Lance Release
fd330b4b4b Updating package-lock.json 2025-02-07 19:28:01 +00:00
Lance Release
d4e9fc08e0 Updating package-lock.json 2025-02-07 19:27:44 +00:00
Lance Release
3626f2f5e1 Bump version: 0.16.0 → 0.16.1-beta.0 2025-02-07 19:27:26 +00:00
Lance Release
e64712cfa5 Bump version: 0.19.0 → 0.19.1-beta.0 2025-02-07 19:27:07 +00:00
Wyatt Alt
3e3118f85c feat: update lance dependency to 0.23.1-beta.1 (#2102) 2025-02-07 10:56:01 -08:00
Lance Release
592598a333 Updating package-lock.json 2025-02-07 18:50:53 +00:00
Lance Release
5ad21341c9 Updating package-lock.json 2025-02-07 17:34:04 +00:00
Lance Release
6e08caa091 Updating package-lock.json 2025-02-07 17:33:48 +00:00
Lance Release
7e259d8b0f Bump version: 0.16.0-beta.0 → 0.16.0 2025-02-07 17:33:13 +00:00
Lance Release
e84f747464 Bump version: 0.15.1-beta.3 → 0.16.0-beta.0 2025-02-07 17:33:08 +00:00
35 changed files with 477 additions and 567 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.15.1-beta.3"
current_version = "0.16.1-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

499
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -21,16 +21,16 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.23.0", "features" = [
lance = { "version" = "=0.23.1", "features" = [
"dynamodb",
]}
lance-io = "=0.23.0"
lance-index = "=0.23.0"
lance-linalg = "=0.23.0"
lance-table = "=0.23.0"
lance-testing = "=0.23.0"
lance-datafusion = "=0.23.0"
lance-encoding = "=0.23.0"
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.4"}
lance-io = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-index = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-table = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
# Note that this one does not include pyarrow
arrow = { version = "53.2", optional = false }
arrow-array = "53.2"
@@ -55,9 +55,9 @@ half = { "version" = "=2.4.1", default-features = false, features = [
futures = "0"
log = "0.4"
moka = { version = "0.12", features = ["future"] }
object_store = "0.10.2"
object_store = "0.11.0"
pin-project = "1.0.7"
snafu = "0.7.4"
snafu = "0.8"
url = "2"
num-traits = "0.2"
rand = "0.8"

View File

@@ -3,6 +3,7 @@ import * as vectordb from "vectordb";
// --8<-- [end:import]
(async () => {
console.log("ann_indexes.ts: start");
// --8<-- [start:ingest]
const db = await vectordb.connect("data/sample-lancedb");
@@ -49,5 +50,5 @@ import * as vectordb from "vectordb";
.execute();
// --8<-- [end:search3]
console.log("Ann indexes: done");
console.log("ann_indexes.ts: done");
})();

View File

@@ -107,7 +107,6 @@ const example = async () => {
// --8<-- [start:search]
const query = await tbl.search([100, 100]).limit(2).execute();
// --8<-- [end:search]
console.log(query);
// --8<-- [start:delete]
await tbl.delete('item = "fizz"');
@@ -119,8 +118,9 @@ const example = async () => {
};
async function main() {
console.log("basic_legacy.ts: start");
await example();
console.log("Basic example: done");
console.log("basic_legacy.ts: done");
}
main();

View File

@@ -20,6 +20,7 @@ async function setup() {
}
async () => {
console.log("search_legacy.ts: start");
await setup();
// --8<-- [start:search1]
@@ -37,5 +38,5 @@ async () => {
.execute();
// --8<-- [end:search2]
console.log("search: done");
console.log("search_legacy.ts: done");
};

View File

@@ -1,6 +1,7 @@
import * as vectordb from "vectordb";
(async () => {
console.log("sql_legacy.ts: start");
const db = await vectordb.connect("data/sample-lancedb");
let data = [];
@@ -34,5 +35,5 @@ import * as vectordb from "vectordb";
await tbl.filter("id = 10").limit(10).execute();
// --8<-- [end:sql_search]
console.log("SQL search: done");
console.log("sql_legacy.ts: done");
})();

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.15.1-beta.3</version>
<version>0.16.1-beta.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.15.1-beta.3</version>
<version>0.16.1-beta.2</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

124
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"cpu": [
"x64",
"arm64"
@@ -52,14 +52,14 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.3",
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.3",
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.3",
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.3",
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.3",
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.3",
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.3"
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -329,110 +329,6 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.1-beta.3.tgz",
"integrity": "sha512-2GinbODdSsUc+zJQ4BFZPsdraPWHJpDpGf7CsZIqfokwxIRnzVzFfQy+SZhmNhKzFkmtW21yWw6wrJ4FgS7Qtw==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.1-beta.3.tgz",
"integrity": "sha512-nRp5eN6yvx5kvfDEQuh3EHCmwjVNCIm7dXoV6BasepFkOoaHHmjKSIUFW7HjtJOfdFbb+r8UjBJx4cN6Jh2iFg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.1-beta.3.tgz",
"integrity": "sha512-JOyD7Nt3RSfHGWNQjHbZMHsIw1cVWPySxbtDmDqk5QH5IfgDNZLiz/sNbROuQkNvc5SsC6wUmhBUwWBETzW7/g==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.1-beta.3.tgz",
"integrity": "sha512-4jTHl1i/4e7wP2U7RMjHr87/gsGJ9tfRJ4ljQIfV+LkA7ROMd/TA5XSnvPesQCDjPNRI4wAyb/BmK18V96VqBg==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.1-beta.3.tgz",
"integrity": "sha512-odrNqB/bGL+sweZi6ed9sKft/H5/bca/tDVG/Y39xCJ6swPWxXQK2Zpn7EjqbccI2p2zkrhKcOUBO/bEkOqQng==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-musl": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.1-beta.3.tgz",
"integrity": "sha512-Zml4KgQWzkkMBHZiD30Gs3N56BT5xO01efwO/Q2qB7JKw5Vy9pa6SgFf9woBvKFQRY73fiKqafy+BmGHTgozNg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.1-beta.3.tgz",
"integrity": "sha512-3BWkK+8JP+js/KoTad7bm26NTR5pq2tvXJkrFB0eaFfsIuUXebS+LIBF22f39He2WMpq3YojT0bMnYxp8qvRkQ==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.15.1-beta.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.1-beta.3.tgz",
"integrity": "sha512-jr8SEisYAX7pQHIbxIDJPkANmxWh5Yohm8ELbMgu76IvLI7bsS7sB9ID+kcj1SiS5m4V6OG2BO1FrEYbPLZ6Dg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -92,13 +92,13 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.3",
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.3",
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.3",
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.3",
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.3",
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.3",
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.3"
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2",
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.15.1-beta.3"
version = "0.16.1-beta.2"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.15.1-beta.3",
"version": "0.16.1-beta.2",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.19.0"
current_version = "0.19.1-beta.3"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.19.0"
version = "0.19.1-beta.3"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -29,4 +29,4 @@ doctest: ## Run documentation tests.
.PHONY: test
test: ## Run tests.
pytest python/tests -vv --durations=10 -m "not slow"
pytest python/tests -vv --durations=10 -m "not slow and not s3_test"

View File

@@ -199,18 +199,29 @@ else:
]
def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
if inspect.isclass(tp):
if issubclass(tp, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(tp)
return pa.struct(fields)
if issubclass(tp, FixedSizeListMixin):
return pa.list_(tp.value_arrow_type(), tp.dim())
return _py_type_to_arrow_type(tp, field)
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
"""Convert a Pydantic FieldInfo to Arrow DataType"""
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__
args = field.annotation.__args__
if origin is list:
child = args[0]
return pa.list_(_py_type_to_arrow_type(child, field))
elif origin == Union:
if len(args) == 2 and args[1] is type(None):
return _py_type_to_arrow_type(args[0], field)
return _pydantic_type_to_arrow_type(args[0], field)
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__
if len(args) == 2:
@@ -218,14 +229,7 @@ def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
if typ is type(None):
continue
return _py_type_to_arrow_type(typ, field)
elif inspect.isclass(field.annotation):
if issubclass(field.annotation, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(field.annotation)
return pa.struct(fields)
elif issubclass(field.annotation, FixedSizeListMixin):
return pa.list_(field.annotation.value_arrow_type(), field.annotation.dim())
return _py_type_to_arrow_type(field.annotation, field)
return _pydantic_type_to_arrow_type(field.annotation, field)
def is_nullable(field: FieldInfo) -> bool:

View File

@@ -341,6 +341,7 @@ def test_add_optional_vector(tmp_path):
assert not (np.abs(tbl.to_pandas()["vector"][0]) < 1e-6).all()
@pytest.mark.slow
@pytest.mark.parametrize(
"embedding_type",
[

View File

@@ -10,6 +10,7 @@ import pyarrow as pa
import pydantic
import pytest
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
from pydantic import BaseModel
from pydantic import Field
@@ -252,3 +253,104 @@ def test_lance_model():
t = TestModel()
assert t == TestModel(vec=[0.0] * 16, li=[1, 2, 3])
def test_optional_nested_model():
class WAMedia(BaseModel):
url: str
mimetype: str
filename: Optional[str]
error: Optional[str]
data: bytes
class WALocation(BaseModel):
description: Optional[str]
latitude: str
longitude: str
class ReplyToMessage(BaseModel):
id: str
participant: str
body: str
class Message(BaseModel):
id: str
timestamp: int
from_: str
fromMe: bool
to: str
body: str
hasMedia: Optional[bool]
media: WAMedia
mediaUrl: Optional[str]
ack: Optional[int]
ackName: Optional[str]
author: Optional[str]
location: Optional[WALocation]
vCards: Optional[List[str]]
replyTo: Optional[ReplyToMessage]
class AnyEvent(LanceModel):
id: str
session: str
metadata: Optional[str] = None
engine: str
event: str
class MessageEvent(AnyEvent):
payload: Message
schema = pydantic_to_schema(MessageEvent)
payload = schema.field("payload")
assert payload.type == pa.struct(
[
pa.field("id", pa.utf8(), False),
pa.field("timestamp", pa.int64(), False),
pa.field("from_", pa.utf8(), False),
pa.field("fromMe", pa.bool_(), False),
pa.field("to", pa.utf8(), False),
pa.field("body", pa.utf8(), False),
pa.field("hasMedia", pa.bool_(), True),
pa.field(
"media",
pa.struct(
[
pa.field("url", pa.utf8(), False),
pa.field("mimetype", pa.utf8(), False),
pa.field("filename", pa.utf8(), True),
pa.field("error", pa.utf8(), True),
pa.field("data", pa.binary(), False),
]
),
False,
),
pa.field("mediaUrl", pa.utf8(), True),
pa.field("ack", pa.int64(), True),
pa.field("ackName", pa.utf8(), True),
pa.field("author", pa.utf8(), True),
pa.field(
"location",
pa.struct(
[
pa.field("description", pa.utf8(), True),
pa.field("latitude", pa.utf8(), False),
pa.field("longitude", pa.utf8(), False),
]
),
True, # Optional
),
pa.field("vCards", pa.list_(pa.utf8()), True),
pa.field(
"replyTo",
pa.struct(
[
pa.field("id", pa.utf8(), False),
pa.field("participant", pa.utf8(), False),
pa.field("body", pa.utf8(), False),
]
),
True,
),
]
)

View File

@@ -232,6 +232,71 @@ async def test_distance_range_async(table_async: AsyncTable):
assert res["_distance"].to_pylist() == [min_dist, max_dist]
@pytest.mark.asyncio
async def test_distance_range_with_new_rows_async():
conn = await lancedb.connect_async(
"memory://", read_consistency_interval=timedelta(seconds=0)
)
data = pa.table(
{
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
np.random.rand(256, 2)
),
}
)
table = await conn.create_table("test", data)
table.create_index("vector", config=IvfPq(num_partitions=1, num_sub_vectors=2))
q = [0, 0]
rs = await table.query().nearest_to(q).to_arrow()
dists = rs["_distance"].to_pylist()
min_dist = dists[0]
max_dist = dists[-1]
# append more rows so that execution plan would be mixed with ANN & Flat KNN
new_data = pa.table(
{
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
}
)
await table.add(new_data)
res = (
await table.query()
.nearest_to(q)
.distance_range(upper_bound=min_dist)
.to_arrow()
)
assert len(res) == 0
res = (
await table.query()
.nearest_to(q)
.distance_range(lower_bound=max_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist >= max_dist
res = (
await table.query()
.nearest_to(q)
.distance_range(upper_bound=max_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist < max_dist
res = (
await table.query()
.nearest_to(q)
.distance_range(lower_bound=min_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist >= min_dist
@pytest.mark.parametrize(
"multivec_table", [pa.float16(), pa.float32(), pa.float64()], indirect=True
)

View File

@@ -32,15 +32,16 @@ def make_mock_http_handler(handler):
@contextlib.contextmanager
def mock_lancedb_connection(handler):
with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler)
("localhost", 0), make_mock_http_handler(handler)
) as server:
port = server.server_address[1]
handle = threading.Thread(target=server.serve_forever)
handle.start()
db = lancedb.connect(
"db://dev",
api_key="fake",
host_override="http://localhost:8080",
host_override=f"http://localhost:{port}",
client_config={
"retry_config": {"retries": 2},
"timeout_config": {
@@ -59,15 +60,16 @@ def mock_lancedb_connection(handler):
@contextlib.asynccontextmanager
async def mock_lancedb_connection_async(handler, **client_config):
with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler)
("localhost", 0), make_mock_http_handler(handler)
) as server:
port = server.server_address[1]
handle = threading.Thread(target=server.serve_forever)
handle.start()
db = await lancedb.connect_async(
"db://dev",
api_key="fake",
host_override="http://localhost:8080",
host_override=f"http://localhost:{port}",
client_config={
"retry_config": {"retries": 2},
"timeout_config": {

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.15.1-beta.3"
version = "0.16.1-beta.2"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.15.1-beta.3"
version = "0.16.1-beta.2"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -23,7 +23,19 @@ impl VectorIndex {
let fields = index
.fields
.iter()
.map(|i| manifest.schema.fields[*i as usize].name.clone())
.map(|field_id| {
manifest
.schema
.field_by_id(*field_id)
.unwrap_or_else(|| {
panic!(
"field {field_id} of index {} must exist in schema",
index.name
)
})
.name
.clone()
})
.collect();
Self {
columns: fields,

View File

@@ -10,7 +10,7 @@ use crate::index::IndexStatistics;
use crate::query::{QueryRequest, Select, VectorQueryRequest};
use crate::table::{AddDataMode, AnyQuery, Filter};
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
use crate::{DistanceType, Error, Table};
use crate::{DistanceType, Error};
use arrow_array::RecordBatchReader;
use arrow_ipc::reader::FileReader;
use arrow_schema::{DataType, SchemaRef};
@@ -24,7 +24,7 @@ use http::StatusCode;
use lance::arrow::json::{JsonDataType, JsonSchema};
use lance::dataset::scanner::DatasetRecordBatchStream;
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
use lance_datafusion::exec::{execute_plan, OneShotExec};
use lance_datafusion::exec::OneShotExec;
use serde::{Deserialize, Serialize};
use tokio::sync::RwLock;
@@ -205,10 +205,10 @@ impl<S: HttpSend> RemoteTable<S> {
}
fn apply_vector_query_params(
mut body: serde_json::Value,
body: &mut serde_json::Value,
query: &VectorQueryRequest,
) -> Result<Vec<serde_json::Value>> {
Self::apply_query_params(&mut body, &query.base)?;
) -> Result<()> {
Self::apply_query_params(body, &query.base)?;
// Apply general parameters, before we dispatch based on number of query vectors.
body["prefilter"] = query.base.prefilter.into();
@@ -254,22 +254,21 @@ impl<S: HttpSend> RemoteTable<S> {
0 => {
// Server takes empty vector, not null or undefined.
body["vector"] = serde_json::Value::Array(Vec::new());
Ok(vec![body])
}
1 => {
body["vector"] = vector_to_json(&query.query_vector[0])?;
Ok(vec![body])
}
_ => {
let mut bodies = Vec::with_capacity(query.query_vector.len());
for vector in &query.query_vector {
let mut body = body.clone();
body["vector"] = vector_to_json(vector)?;
bodies.push(body);
}
Ok(bodies)
let vectors = query
.query_vector
.iter()
.map(vector_to_json)
.collect::<Result<Vec<_>>>()?;
body["vector"] = serde_json::Value::Array(vectors);
}
}
Ok(())
}
async fn check_mutable(&self) -> Result<()> {
@@ -294,7 +293,7 @@ impl<S: HttpSend> RemoteTable<S> {
&self,
query: &AnyQuery,
_options: QueryExecutionOptions,
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
) -> Result<Pin<Box<dyn RecordBatchStream + Send>>> {
let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
let version = self.current_version().await;
@@ -305,28 +304,16 @@ impl<S: HttpSend> RemoteTable<S> {
Self::apply_query_params(&mut body, query)?;
// Empty vector can be passed if no vector search is performed.
body["vector"] = serde_json::Value::Array(Vec::new());
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
Ok(vec![stream])
}
AnyQuery::VectorQuery(query) => {
let bodies = Self::apply_vector_query_params(body, query)?;
let mut futures = Vec::with_capacity(bodies.len());
for body in bodies {
let request = request.try_clone().unwrap().json(&body);
let future = async move {
let (request_id, response) = self.client.send(request, true).await?;
self.read_arrow_stream(&request_id, response).await
};
futures.push(future);
}
futures::future::try_join_all(futures).await
Self::apply_vector_query_params(&mut body, query)?;
}
}
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
Ok(stream)
}
}
@@ -498,18 +485,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
query: &AnyQuery,
options: QueryExecutionOptions,
) -> Result<Arc<dyn ExecutionPlan>> {
let streams = self.execute_query(query, options).await?;
if streams.len() == 1 {
let stream = streams.into_iter().next().unwrap();
Ok(Arc::new(OneShotExec::new(stream)))
} else {
let stream_execs = streams
.into_iter()
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
.collect();
Table::multi_vector_plan(stream_execs)
}
let stream = self.execute_query(query, options).await?;
Ok(Arc::new(OneShotExec::new(stream)))
}
async fn query(
@@ -517,24 +494,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
query: &AnyQuery,
_options: QueryExecutionOptions,
) -> Result<DatasetRecordBatchStream> {
let streams = self.execute_query(query, _options).await?;
if streams.len() == 1 {
Ok(DatasetRecordBatchStream::new(
streams.into_iter().next().unwrap(),
))
} else {
let stream_execs = streams
.into_iter()
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
.collect();
let plan = Table::multi_vector_plan(stream_execs)?;
Ok(DatasetRecordBatchStream::new(execute_plan(
plan,
Default::default(),
)?))
}
let stream = self.execute_query(query, _options).await?;
Ok(DatasetRecordBatchStream::new(stream))
}
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
self.check_mutable().await?;
@@ -1500,9 +1461,21 @@ mod tests {
request.headers().get("Content-Type").unwrap(),
JSON_CONTENT_TYPE
);
let body: serde_json::Value =
serde_json::from_slice(request.body().unwrap().as_bytes().unwrap()).unwrap();
let query_vectors = body["vector"].as_array().unwrap();
assert_eq!(query_vectors.len(), 2);
assert_eq!(query_vectors[0].as_array().unwrap().len(), 3);
assert_eq!(query_vectors[1].as_array().unwrap().len(), 3);
let data = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("query_index", DataType::Int32, false),
])),
vec![
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])),
Arc::new(Int32Array::from(vec![0, 0, 0, 1, 1, 1])),
],
)
.unwrap();
let response_body = write_ipc_file(&data);
@@ -1519,8 +1492,6 @@ mod tests {
.unwrap()
.add_query_vector(vec![0.4, 0.5, 0.6])
.unwrap();
let plan = query.explain_plan(true).await.unwrap();
assert!(plan.contains("UnionExec"), "Plan: {}", plan);
let results = query
.execute()

View File

@@ -1380,10 +1380,11 @@ impl NativeTable {
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
let dataset = self.dataset.get().await?;
let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;
let mf = dataset.manifest();
let indices = dataset.load_indices().await?;
Ok(indices
.iter()
.map(|i| VectorIndex::new_from_format(&(mf.0), i))
.map(|i| VectorIndex::new_from_format(mf, i))
.collect())
}
@@ -1995,8 +1996,8 @@ impl BaseTable for NativeTable {
};
let ds_ref = self.dataset.get().await?;
let mut column = query.column.clone();
let schema = ds_ref.schema();
let mut column = query.column.clone();
let mut query_vector = query.query_vector.first().cloned();
if query.query_vector.len() > 1 {