Compare commits

...

15 Commits

Author SHA1 Message Date
Lance Release
a33a0670f6 Bump version: 0.19.1-beta.2 → 0.19.1-beta.3 2025-02-20 03:37:27 +00:00
BubbleCal
14c9ff46d1 feat: support multivector on remote table (#2045)
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-20 11:34:51 +08:00
Lei Xu
1865f7decf fix: support optional nested pydantic model (#2130)
Closes #2129
2025-02-17 20:43:13 -08:00
BubbleCal
a608621476 test: query with dist range and new rows (#2126)
we found a bug that flat KNN plan node's stats is not in right order as
fields in schema, it would cause an error if querying with distance
range and new unindexed rows.

we've fixed this in lance so add this test for verifying it works

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-17 12:57:45 +08:00
BubbleCal
00514999ff feat: upgrade lance to 0.23.1-beta.4 (#2121)
this also upgrades object_store to 0.11.0, snafu to 0.8

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-16 14:53:26 +08:00
Lance Release
b3b597fef6 Updating package-lock.json 2025-02-13 04:40:10 +00:00
Lance Release
bf17144591 Updating package-lock.json 2025-02-13 04:39:54 +00:00
Lance Release
09e110525f Bump version: 0.16.1-beta.1 → 0.16.1-beta.2 2025-02-13 04:39:38 +00:00
Lance Release
40f0dbb64d Bump version: 0.19.1-beta.1 → 0.19.1-beta.2 2025-02-13 04:39:19 +00:00
BubbleCal
3b19e96ae7 fix: panic when field id doesn't equal to field index (#2116)
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-02-13 12:38:35 +08:00
Will Jones
78a17ad54c chore: improve dev instructions for Python (#2088)
Closes #2042
2025-02-12 14:08:52 -08:00
Lance Release
a8e6b491e2 Updating package-lock.json 2025-02-11 22:05:54 +00:00
Lance Release
cea541ca46 Updating package-lock.json 2025-02-11 20:56:22 +00:00
Lance Release
873ffc1042 Updating package-lock.json 2025-02-11 20:56:05 +00:00
Lance Release
83273ad997 Bump version: 0.16.1-beta.0 → 0.16.1-beta.1 2025-02-11 20:55:43 +00:00
31 changed files with 453 additions and 535 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.16.1-beta.0" current_version = "0.16.1-beta.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

463
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,14 +23,14 @@ rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.23.1", "features" = [ lance = { "version" = "=0.23.1", "features" = [
"dynamodb", "dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.2"} ], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.4"}
lance-io = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-io = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-index = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-index = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-table = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-table = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"} lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "53.2", optional = false } arrow = { version = "53.2", optional = false }
arrow-array = "53.2" arrow-array = "53.2"
@@ -55,9 +55,9 @@ half = { "version" = "=2.4.1", default-features = false, features = [
futures = "0" futures = "0"
log = "0.4" log = "0.4"
moka = { version = "0.12", features = ["future"] } moka = { version = "0.12", features = ["future"] }
object_store = "0.10.2" object_store = "0.11.0"
pin-project = "1.0.7" pin-project = "1.0.7"
snafu = "0.7.4" snafu = "0.8"
url = "2" url = "2"
num-traits = "0.2" num-traits = "0.2"
rand = "0.8" rand = "0.8"

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.16.1-beta.0</version> <version>0.16.1-beta.2</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.16.1-beta.0</version> <version>0.16.1-beta.2</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>LanceDB Parent</name> <name>LanceDB Parent</name>

124
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,14 +52,14 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.0", "@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.0", "@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.0", "@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.0", "@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.0", "@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.0", "@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.0", "@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.0" "@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -329,110 +329,6 @@
"@jridgewell/sourcemap-codec": "^1.4.10" "@jridgewell/sourcemap-codec": "^1.4.10"
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.16.1-beta.0.tgz",
"integrity": "sha512-wfPwpPB0ml6hjnyrMzaF/jKuq2oKt8w8sY9B6ZxmTh2JBCij+vvNUtDIxGkInTPr+EYVcQdhdtzkkzTBX5GQLg==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.16.1-beta.0.tgz",
"integrity": "sha512-cTRH2q2x48UOJ5n+dBYJr3gH5gDhq/aUbu281Ie0wHpNtlVwFXOrYdAlqhEmDPe3Pj61iua/mcnyWficsUD1gg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.16.1-beta.0.tgz",
"integrity": "sha512-NYHCM/Bcfvv11tGRzgpqQFqjAJsQrMLdNXy9r+qRmyNvAs482jK5VOoD3ckN4XisbxLB04APZSIz+s/edNCeBQ==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.16.1-beta.0.tgz",
"integrity": "sha512-GU3sWKvYZ0YVJ+TCBarWfIy7skVU1otx78um81q+8NtX3b1bxKa15LDWWEU6ToXZbEat72OmIZRJ+AIgjdRcoQ==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.16.1-beta.0.tgz",
"integrity": "sha512-Pjj5SH33jIL6CKfNJfW9ISDEpJI/VOhhKx1QN50ndvAjUFm2xH9i654wPX2QzyZx7Qb/+5jTKnAuYSc8Ev/Fyg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-musl": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.16.1-beta.0.tgz",
"integrity": "sha512-rIGTxcUtRiP3wVp7icND5XtevSmr5RFglednyDqpd+Ly6Raek1LoIrPHFuVnpVUDZuq4Hu7/sgX/yT4OB7kIIA==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.16.1-beta.0.tgz",
"integrity": "sha512-rho0bc+LW9UHM7ZX0Rc6T2pM1hgHFvvhp5nt+4MF7wq/9N3JR6AiMwg1p0nLKu3yjjLkBjRGu+uxjgG/IqdPpw==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.16.1-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.16.1-beta.0.tgz",
"integrity": "sha512-KJ+qKh/dEmisnZJxbc9ocuwyQdyyQpivXyoAsjEc+JGbGP2gTMOw05q900q2eaLB8TQMeQQ4QGnu4gQZGTuxzg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": { "node_modules/@neon-rs/cli": {
"version": "0.0.160", "version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"private": false, "private": false,
"main": "dist/index.js", "main": "dist/index.js",
@@ -92,13 +92,13 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.0", "@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.0", "@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.0", "@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.0", "@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.0", "@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.0", "@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.0", "@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2",
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.0" "@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2"
} }
} }

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.16.1-beta.0" version = "0.16.1-beta.2"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.16.1-beta.0", "version": "0.16.1-beta.2",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.19.1-beta.1" current_version = "0.19.1-beta.3"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.19.1-beta.1" version = "0.19.1-beta.3"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -29,4 +29,4 @@ doctest: ## Run documentation tests.
.PHONY: test .PHONY: test
test: ## Run tests. test: ## Run tests.
pytest python/tests -vv --durations=10 -m "not slow" pytest python/tests -vv --durations=10 -m "not slow and not s3_test"

View File

@@ -199,18 +199,29 @@ else:
] ]
def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
if inspect.isclass(tp):
if issubclass(tp, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(tp)
return pa.struct(fields)
if issubclass(tp, FixedSizeListMixin):
return pa.list_(tp.value_arrow_type(), tp.dim())
return _py_type_to_arrow_type(tp, field)
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType: def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
"""Convert a Pydantic FieldInfo to Arrow DataType""" """Convert a Pydantic FieldInfo to Arrow DataType"""
if isinstance(field.annotation, (_GenericAlias, GenericAlias)): if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__ origin = field.annotation.__origin__
args = field.annotation.__args__ args = field.annotation.__args__
if origin is list: if origin is list:
child = args[0] child = args[0]
return pa.list_(_py_type_to_arrow_type(child, field)) return pa.list_(_py_type_to_arrow_type(child, field))
elif origin == Union: elif origin == Union:
if len(args) == 2 and args[1] is type(None): if len(args) == 2 and args[1] is type(None):
return _py_type_to_arrow_type(args[0], field) return _pydantic_type_to_arrow_type(args[0], field)
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType): elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__ args = field.annotation.__args__
if len(args) == 2: if len(args) == 2:
@@ -218,14 +229,7 @@ def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
if typ is type(None): if typ is type(None):
continue continue
return _py_type_to_arrow_type(typ, field) return _py_type_to_arrow_type(typ, field)
elif inspect.isclass(field.annotation): return _pydantic_type_to_arrow_type(field.annotation, field)
if issubclass(field.annotation, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(field.annotation)
return pa.struct(fields)
elif issubclass(field.annotation, FixedSizeListMixin):
return pa.list_(field.annotation.value_arrow_type(), field.annotation.dim())
return _py_type_to_arrow_type(field.annotation, field)
def is_nullable(field: FieldInfo) -> bool: def is_nullable(field: FieldInfo) -> bool:

View File

@@ -341,6 +341,7 @@ def test_add_optional_vector(tmp_path):
assert not (np.abs(tbl.to_pandas()["vector"][0]) < 1e-6).all() assert not (np.abs(tbl.to_pandas()["vector"][0]) < 1e-6).all()
@pytest.mark.slow
@pytest.mark.parametrize( @pytest.mark.parametrize(
"embedding_type", "embedding_type",
[ [

View File

@@ -10,6 +10,7 @@ import pyarrow as pa
import pydantic import pydantic
import pytest import pytest
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
from pydantic import BaseModel
from pydantic import Field from pydantic import Field
@@ -252,3 +253,104 @@ def test_lance_model():
t = TestModel() t = TestModel()
assert t == TestModel(vec=[0.0] * 16, li=[1, 2, 3]) assert t == TestModel(vec=[0.0] * 16, li=[1, 2, 3])
def test_optional_nested_model():
class WAMedia(BaseModel):
url: str
mimetype: str
filename: Optional[str]
error: Optional[str]
data: bytes
class WALocation(BaseModel):
description: Optional[str]
latitude: str
longitude: str
class ReplyToMessage(BaseModel):
id: str
participant: str
body: str
class Message(BaseModel):
id: str
timestamp: int
from_: str
fromMe: bool
to: str
body: str
hasMedia: Optional[bool]
media: WAMedia
mediaUrl: Optional[str]
ack: Optional[int]
ackName: Optional[str]
author: Optional[str]
location: Optional[WALocation]
vCards: Optional[List[str]]
replyTo: Optional[ReplyToMessage]
class AnyEvent(LanceModel):
id: str
session: str
metadata: Optional[str] = None
engine: str
event: str
class MessageEvent(AnyEvent):
payload: Message
schema = pydantic_to_schema(MessageEvent)
payload = schema.field("payload")
assert payload.type == pa.struct(
[
pa.field("id", pa.utf8(), False),
pa.field("timestamp", pa.int64(), False),
pa.field("from_", pa.utf8(), False),
pa.field("fromMe", pa.bool_(), False),
pa.field("to", pa.utf8(), False),
pa.field("body", pa.utf8(), False),
pa.field("hasMedia", pa.bool_(), True),
pa.field(
"media",
pa.struct(
[
pa.field("url", pa.utf8(), False),
pa.field("mimetype", pa.utf8(), False),
pa.field("filename", pa.utf8(), True),
pa.field("error", pa.utf8(), True),
pa.field("data", pa.binary(), False),
]
),
False,
),
pa.field("mediaUrl", pa.utf8(), True),
pa.field("ack", pa.int64(), True),
pa.field("ackName", pa.utf8(), True),
pa.field("author", pa.utf8(), True),
pa.field(
"location",
pa.struct(
[
pa.field("description", pa.utf8(), True),
pa.field("latitude", pa.utf8(), False),
pa.field("longitude", pa.utf8(), False),
]
),
True, # Optional
),
pa.field("vCards", pa.list_(pa.utf8()), True),
pa.field(
"replyTo",
pa.struct(
[
pa.field("id", pa.utf8(), False),
pa.field("participant", pa.utf8(), False),
pa.field("body", pa.utf8(), False),
]
),
True,
),
]
)

View File

@@ -232,6 +232,71 @@ async def test_distance_range_async(table_async: AsyncTable):
assert res["_distance"].to_pylist() == [min_dist, max_dist] assert res["_distance"].to_pylist() == [min_dist, max_dist]
@pytest.mark.asyncio
async def test_distance_range_with_new_rows_async():
conn = await lancedb.connect_async(
"memory://", read_consistency_interval=timedelta(seconds=0)
)
data = pa.table(
{
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
np.random.rand(256, 2)
),
}
)
table = await conn.create_table("test", data)
table.create_index("vector", config=IvfPq(num_partitions=1, num_sub_vectors=2))
q = [0, 0]
rs = await table.query().nearest_to(q).to_arrow()
dists = rs["_distance"].to_pylist()
min_dist = dists[0]
max_dist = dists[-1]
# append more rows so that execution plan would be mixed with ANN & Flat KNN
new_data = pa.table(
{
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
}
)
await table.add(new_data)
res = (
await table.query()
.nearest_to(q)
.distance_range(upper_bound=min_dist)
.to_arrow()
)
assert len(res) == 0
res = (
await table.query()
.nearest_to(q)
.distance_range(lower_bound=max_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist >= max_dist
res = (
await table.query()
.nearest_to(q)
.distance_range(upper_bound=max_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist < max_dist
res = (
await table.query()
.nearest_to(q)
.distance_range(lower_bound=min_dist)
.to_arrow()
)
for dist in res["_distance"].to_pylist():
assert dist >= min_dist
@pytest.mark.parametrize( @pytest.mark.parametrize(
"multivec_table", [pa.float16(), pa.float32(), pa.float64()], indirect=True "multivec_table", [pa.float16(), pa.float32(), pa.float64()], indirect=True
) )

View File

@@ -32,15 +32,16 @@ def make_mock_http_handler(handler):
@contextlib.contextmanager @contextlib.contextmanager
def mock_lancedb_connection(handler): def mock_lancedb_connection(handler):
with http.server.HTTPServer( with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler) ("localhost", 0), make_mock_http_handler(handler)
) as server: ) as server:
port = server.server_address[1]
handle = threading.Thread(target=server.serve_forever) handle = threading.Thread(target=server.serve_forever)
handle.start() handle.start()
db = lancedb.connect( db = lancedb.connect(
"db://dev", "db://dev",
api_key="fake", api_key="fake",
host_override="http://localhost:8080", host_override=f"http://localhost:{port}",
client_config={ client_config={
"retry_config": {"retries": 2}, "retry_config": {"retries": 2},
"timeout_config": { "timeout_config": {
@@ -59,15 +60,16 @@ def mock_lancedb_connection(handler):
@contextlib.asynccontextmanager @contextlib.asynccontextmanager
async def mock_lancedb_connection_async(handler, **client_config): async def mock_lancedb_connection_async(handler, **client_config):
with http.server.HTTPServer( with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler) ("localhost", 0), make_mock_http_handler(handler)
) as server: ) as server:
port = server.server_address[1]
handle = threading.Thread(target=server.serve_forever) handle = threading.Thread(target=server.serve_forever)
handle.start() handle.start()
db = await lancedb.connect_async( db = await lancedb.connect_async(
"db://dev", "db://dev",
api_key="fake", api_key="fake",
host_override="http://localhost:8080", host_override=f"http://localhost:{port}",
client_config={ client_config={
"retry_config": {"retries": 2}, "retry_config": {"retries": 2},
"timeout_config": { "timeout_config": {

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.16.1-beta.0" version = "0.16.1-beta.2"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.16.1-beta.0" version = "0.16.1-beta.2"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -23,7 +23,19 @@ impl VectorIndex {
let fields = index let fields = index
.fields .fields
.iter() .iter()
.map(|i| manifest.schema.fields[*i as usize].name.clone()) .map(|field_id| {
manifest
.schema
.field_by_id(*field_id)
.unwrap_or_else(|| {
panic!(
"field {field_id} of index {} must exist in schema",
index.name
)
})
.name
.clone()
})
.collect(); .collect();
Self { Self {
columns: fields, columns: fields,

View File

@@ -10,7 +10,7 @@ use crate::index::IndexStatistics;
use crate::query::{QueryRequest, Select, VectorQueryRequest}; use crate::query::{QueryRequest, Select, VectorQueryRequest};
use crate::table::{AddDataMode, AnyQuery, Filter}; use crate::table::{AddDataMode, AnyQuery, Filter};
use crate::utils::{supported_btree_data_type, supported_vector_data_type}; use crate::utils::{supported_btree_data_type, supported_vector_data_type};
use crate::{DistanceType, Error, Table}; use crate::{DistanceType, Error};
use arrow_array::RecordBatchReader; use arrow_array::RecordBatchReader;
use arrow_ipc::reader::FileReader; use arrow_ipc::reader::FileReader;
use arrow_schema::{DataType, SchemaRef}; use arrow_schema::{DataType, SchemaRef};
@@ -24,7 +24,7 @@ use http::StatusCode;
use lance::arrow::json::{JsonDataType, JsonSchema}; use lance::arrow::json::{JsonDataType, JsonSchema};
use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::scanner::DatasetRecordBatchStream;
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version}; use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
use lance_datafusion::exec::{execute_plan, OneShotExec}; use lance_datafusion::exec::OneShotExec;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::sync::RwLock; use tokio::sync::RwLock;
@@ -205,10 +205,10 @@ impl<S: HttpSend> RemoteTable<S> {
} }
fn apply_vector_query_params( fn apply_vector_query_params(
mut body: serde_json::Value, body: &mut serde_json::Value,
query: &VectorQueryRequest, query: &VectorQueryRequest,
) -> Result<Vec<serde_json::Value>> { ) -> Result<()> {
Self::apply_query_params(&mut body, &query.base)?; Self::apply_query_params(body, &query.base)?;
// Apply general parameters, before we dispatch based on number of query vectors. // Apply general parameters, before we dispatch based on number of query vectors.
body["prefilter"] = query.base.prefilter.into(); body["prefilter"] = query.base.prefilter.into();
@@ -254,22 +254,21 @@ impl<S: HttpSend> RemoteTable<S> {
0 => { 0 => {
// Server takes empty vector, not null or undefined. // Server takes empty vector, not null or undefined.
body["vector"] = serde_json::Value::Array(Vec::new()); body["vector"] = serde_json::Value::Array(Vec::new());
Ok(vec![body])
} }
1 => { 1 => {
body["vector"] = vector_to_json(&query.query_vector[0])?; body["vector"] = vector_to_json(&query.query_vector[0])?;
Ok(vec![body])
} }
_ => { _ => {
let mut bodies = Vec::with_capacity(query.query_vector.len()); let vectors = query
for vector in &query.query_vector { .query_vector
let mut body = body.clone(); .iter()
body["vector"] = vector_to_json(vector)?; .map(vector_to_json)
bodies.push(body); .collect::<Result<Vec<_>>>()?;
} body["vector"] = serde_json::Value::Array(vectors);
Ok(bodies)
} }
} }
Ok(())
} }
async fn check_mutable(&self) -> Result<()> { async fn check_mutable(&self) -> Result<()> {
@@ -294,7 +293,7 @@ impl<S: HttpSend> RemoteTable<S> {
&self, &self,
query: &AnyQuery, query: &AnyQuery,
_options: QueryExecutionOptions, _options: QueryExecutionOptions,
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> { ) -> Result<Pin<Box<dyn RecordBatchStream + Send>>> {
let request = self.client.post(&format!("/v1/table/{}/query/", self.name)); let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
let version = self.current_version().await; let version = self.current_version().await;
@@ -305,28 +304,16 @@ impl<S: HttpSend> RemoteTable<S> {
Self::apply_query_params(&mut body, query)?; Self::apply_query_params(&mut body, query)?;
// Empty vector can be passed if no vector search is performed. // Empty vector can be passed if no vector search is performed.
body["vector"] = serde_json::Value::Array(Vec::new()); body["vector"] = serde_json::Value::Array(Vec::new());
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
Ok(vec![stream])
} }
AnyQuery::VectorQuery(query) => { AnyQuery::VectorQuery(query) => {
let bodies = Self::apply_vector_query_params(body, query)?; Self::apply_vector_query_params(&mut body, query)?;
let mut futures = Vec::with_capacity(bodies.len());
for body in bodies {
let request = request.try_clone().unwrap().json(&body);
let future = async move {
let (request_id, response) = self.client.send(request, true).await?;
self.read_arrow_stream(&request_id, response).await
};
futures.push(future);
}
futures::future::try_join_all(futures).await
} }
} }
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
Ok(stream)
} }
} }
@@ -498,18 +485,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
query: &AnyQuery, query: &AnyQuery,
options: QueryExecutionOptions, options: QueryExecutionOptions,
) -> Result<Arc<dyn ExecutionPlan>> { ) -> Result<Arc<dyn ExecutionPlan>> {
let streams = self.execute_query(query, options).await?; let stream = self.execute_query(query, options).await?;
Ok(Arc::new(OneShotExec::new(stream)))
if streams.len() == 1 {
let stream = streams.into_iter().next().unwrap();
Ok(Arc::new(OneShotExec::new(stream)))
} else {
let stream_execs = streams
.into_iter()
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
.collect();
Table::multi_vector_plan(stream_execs)
}
} }
async fn query( async fn query(
@@ -517,24 +494,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
query: &AnyQuery, query: &AnyQuery,
_options: QueryExecutionOptions, _options: QueryExecutionOptions,
) -> Result<DatasetRecordBatchStream> { ) -> Result<DatasetRecordBatchStream> {
let streams = self.execute_query(query, _options).await?; let stream = self.execute_query(query, _options).await?;
Ok(DatasetRecordBatchStream::new(stream))
if streams.len() == 1 {
Ok(DatasetRecordBatchStream::new(
streams.into_iter().next().unwrap(),
))
} else {
let stream_execs = streams
.into_iter()
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
.collect();
let plan = Table::multi_vector_plan(stream_execs)?;
Ok(DatasetRecordBatchStream::new(execute_plan(
plan,
Default::default(),
)?))
}
} }
async fn update(&self, update: UpdateBuilder) -> Result<u64> { async fn update(&self, update: UpdateBuilder) -> Result<u64> {
self.check_mutable().await?; self.check_mutable().await?;
@@ -1500,9 +1461,21 @@ mod tests {
request.headers().get("Content-Type").unwrap(), request.headers().get("Content-Type").unwrap(),
JSON_CONTENT_TYPE JSON_CONTENT_TYPE
); );
let body: serde_json::Value =
serde_json::from_slice(request.body().unwrap().as_bytes().unwrap()).unwrap();
let query_vectors = body["vector"].as_array().unwrap();
assert_eq!(query_vectors.len(), 2);
assert_eq!(query_vectors[0].as_array().unwrap().len(), 3);
assert_eq!(query_vectors[1].as_array().unwrap().len(), 3);
let data = RecordBatch::try_new( let data = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])), Arc::new(Schema::new(vec![
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], Field::new("a", DataType::Int32, false),
Field::new("query_index", DataType::Int32, false),
])),
vec![
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])),
Arc::new(Int32Array::from(vec![0, 0, 0, 1, 1, 1])),
],
) )
.unwrap(); .unwrap();
let response_body = write_ipc_file(&data); let response_body = write_ipc_file(&data);
@@ -1519,8 +1492,6 @@ mod tests {
.unwrap() .unwrap()
.add_query_vector(vec![0.4, 0.5, 0.6]) .add_query_vector(vec![0.4, 0.5, 0.6])
.unwrap(); .unwrap();
let plan = query.explain_plan(true).await.unwrap();
assert!(plan.contains("UnionExec"), "Plan: {}", plan);
let results = query let results = query
.execute() .execute()

View File

@@ -1996,8 +1996,8 @@ impl BaseTable for NativeTable {
}; };
let ds_ref = self.dataset.get().await?; let ds_ref = self.dataset.get().await?;
let mut column = query.column.clone();
let schema = ds_ref.schema(); let schema = ds_ref.schema();
let mut column = query.column.clone();
let mut query_vector = query.query_vector.first().cloned(); let mut query_vector = query.query_vector.first().cloned();
if query.query_vector.len() > 1 { if query.query_vector.len() > 1 {